feat: show CoderAI broker telemetry

parent 9bf96583
...@@ -1527,7 +1527,7 @@ class RequestHandler: ...@@ -1527,7 +1527,7 @@ class RequestHandler:
enhanced_models = [] enhanced_models = []
current_time = int(time_module.time()) current_time = int(time_module.time())
for model in models: for model in models:
model_dict = model.dict() model_dict = model.model_dump() if hasattr(model, 'model_dump') else model.dict()
model_name = model_dict.get('id', '') model_name = model_dict.get('id', '')
# Add OpenAI-compatible required fields # Add OpenAI-compatible required fields
...@@ -1627,7 +1627,7 @@ class RequestHandler: ...@@ -1627,7 +1627,7 @@ class RequestHandler:
enhanced_models = [] enhanced_models = []
current_time = int(time_module.time()) current_time = int(time_module.time())
for model in models: for model in models:
model_dict = model.dict() model_dict = model.model_dump() if hasattr(model, 'model_dump') else model.dict()
model_name = model_dict.get('id', '') model_name = model_dict.get('id', '')
# Add OpenAI-compatible required fields # Add OpenAI-compatible required fields
......
...@@ -844,7 +844,8 @@ class BaseProviderHandler: ...@@ -844,7 +844,8 @@ class BaseProviderHandler:
# Initialize adaptive rate limiter # Initialize adaptive rate limiter
adaptive_config = None adaptive_config = None
if config.aisbf and config.aisbf.adaptive_rate_limiting: if config.aisbf and config.aisbf.adaptive_rate_limiting:
adaptive_config = config.aisbf.adaptive_rate_limiting.dict() adaptive_obj = config.aisbf.adaptive_rate_limiting
adaptive_config = adaptive_obj.model_dump() if hasattr(adaptive_obj, 'model_dump') else adaptive_obj.dict()
self.adaptive_limiter = get_adaptive_rate_limiter(provider_id, adaptive_config, user_id) self.adaptive_limiter = get_adaptive_rate_limiter(provider_id, adaptive_config, user_id)
# Load rate-limit disabled state from cache (persists across restarts) # Load rate-limit disabled state from cache (persists across restarts)
self._load_disabled_until_from_cache() self._load_disabled_until_from_cache()
......
...@@ -607,6 +607,8 @@ The resulting session snapshot tracks: ...@@ -607,6 +607,8 @@ The resulting session snapshot tracks:
- average total tokens - average total tokens
- success rate - success rate
CoderAI should prefer sending exact `latency_ms` and `tokens_per_second` whenever it can measure them internally. AISBF estimation is only a fallback for implementations that cannot yet provide exact metrics.
### Error ### Error
```json ```json
......
...@@ -175,6 +175,32 @@ function formatBrokerTimestamp(ts) { ...@@ -175,6 +175,32 @@ function formatBrokerTimestamp(ts) {
return date.toLocaleString(); return date.toLocaleString();
} }
function formatVramMb(value) {
if (value == null || value === '') return 'Unknown';
const mb = Number(value);
if (!Number.isFinite(mb)) return 'Unknown';
if (mb >= 1024) return `${(mb / 1024).toFixed(1)} GB`;
return `${Math.round(mb)} MB`;
}
function formatPerfNumber(value, digits = 1, suffix = '') {
if (value == null || value === '') return 'Unknown';
const num = Number(value);
if (!Number.isFinite(num)) return 'Unknown';
return `${num.toFixed(digits)}${suffix}`;
}
function renderGpuSummary(metadata) {
const gpus = Array.isArray(metadata?.gpus) ? metadata.gpus : [];
if (!gpus.length) return 'None reported';
return gpus.map((gpu, index) => {
const name = gpu?.name || `GPU ${gpu?.index ?? index}`;
const free = formatVramMb(gpu?.available_vram_mb);
const total = formatVramMb(gpu?.total_vram_mb);
return `${name} (${free} free / ${total} total)`;
}).join('; ');
}
async function apiCall(method, url, body) { async function apiCall(method, url, body) {
const opts = { method, headers: { 'Content-Type': 'application/json' } }; const opts = { method, headers: { 'Content-Type': 'application/json' } };
if (body !== undefined) opts.body = JSON.stringify(body); if (body !== undefined) opts.body = JSON.stringify(body);
...@@ -760,6 +786,8 @@ function renderProviderDetails(key) { ...@@ -760,6 +786,8 @@ function renderProviderDetails(key) {
const brokerSession = coderaiConfig.broker_session || {}; const brokerSession = coderaiConfig.broker_session || {};
const brokerConnected = !!brokerSession.connected; const brokerConnected = !!brokerSession.connected;
const ownerLabel = brokerSession.owner_user_id == null ? 'Global admin' : `User #${brokerSession.owner_user_id}`; const ownerLabel = brokerSession.owner_user_id == null ? 'Global admin' : `User #${brokerSession.owner_user_id}`;
const brokerMetadata = brokerSession.metadata || {};
const performance = brokerSession.performance || {};
// Build authentication fields based on provider type // Build authentication fields based on provider type
let authFieldsHtml = ''; let authFieldsHtml = '';
...@@ -1068,6 +1096,14 @@ function renderProviderDetails(key) { ...@@ -1068,6 +1096,14 @@ function renderProviderDetails(key) {
<div><strong>Connected At:</strong> ${escHtmlAttr(formatBrokerTimestamp(brokerSession.connected_at))}</div> <div><strong>Connected At:</strong> ${escHtmlAttr(formatBrokerTimestamp(brokerSession.connected_at))}</div>
<div><strong>Remote Endpoint:</strong> ${escHtmlAttr(brokerSession.endpoint || 'Unknown')}</div> <div><strong>Remote Endpoint:</strong> ${escHtmlAttr(brokerSession.endpoint || 'Unknown')}</div>
<div><strong>Transport:</strong> ${escHtmlAttr(brokerSession.transport || 'broker')}</div> <div><strong>Transport:</strong> ${escHtmlAttr(brokerSession.transport || 'broker')}</div>
<div><strong>GPU Count:</strong> ${escHtmlAttr(String(brokerMetadata.gpu_count ?? (Array.isArray(brokerMetadata.gpus) ? brokerMetadata.gpus.length : 0) || '0'))}</div>
<div><strong>VRAM:</strong> ${escHtmlAttr(formatVramMb(brokerMetadata.available_vram_mb))} free / ${escHtmlAttr(formatVramMb(brokerMetadata.total_vram_mb))} total</div>
<div><strong>GPUs:</strong> ${escHtmlAttr(renderGpuSummary(brokerMetadata))}</div>
<div><strong>Avg Latency:</strong> ${escHtmlAttr(formatPerfNumber(performance.avg_latency_ms, 1, ' ms'))}</div>
<div><strong>Avg Throughput:</strong> ${escHtmlAttr(formatPerfNumber(performance.avg_tokens_per_second, 2, ' tok/s'))}</div>
<div><strong>Avg Tokens:</strong> ${escHtmlAttr(formatPerfNumber(performance.avg_total_tokens, 1))}</div>
<div><strong>Success Rate:</strong> ${escHtmlAttr(formatPerfNumber((performance.success_rate ?? 0) * 100, 1, '%'))}</div>
<div><strong>Samples:</strong> ${escHtmlAttr(String(performance.sample_count ?? 0))} / ${escHtmlAttr(String(performance.window_size ?? 100))}</div>
<div><strong>Studio Endpoints:</strong> ${escHtmlAttr((brokerSession.studio_endpoints || []).join(', ') || 'None advertised')}</div> <div><strong>Studio Endpoints:</strong> ${escHtmlAttr((brokerSession.studio_endpoints || []).join(', ') || 'None advertised')}</div>
</div> </div>
</div> </div>
......
...@@ -173,6 +173,32 @@ window.AISBF_PROVIDERS_PAGE = { serverUrl: window.location.origin + BASE_PATH }; ...@@ -173,6 +173,32 @@ window.AISBF_PROVIDERS_PAGE = { serverUrl: window.location.origin + BASE_PATH };
// Global cache settings cache (pun intended) // Global cache settings cache (pun intended)
let cacheSettings = []; let cacheSettings = [];
function formatVramMb(value) {
if (value == null || value === '') return 'Unknown';
const mb = Number(value);
if (!Number.isFinite(mb)) return 'Unknown';
if (mb >= 1024) return `${(mb / 1024).toFixed(1)} GB`;
return `${Math.round(mb)} MB`;
}
function formatPerfNumber(value, digits = 1, suffix = '') {
if (value == null || value === '') return 'Unknown';
const num = Number(value);
if (!Number.isFinite(num)) return 'Unknown';
return `${num.toFixed(digits)}${suffix}`;
}
function renderGpuSummary(metadata) {
const gpus = Array.isArray(metadata?.gpus) ? metadata.gpus : [];
if (!gpus.length) return 'None reported';
return gpus.map((gpu, index) => {
const name = gpu?.name || `GPU ${gpu?.index ?? index}`;
const free = formatVramMb(gpu?.available_vram_mb);
const total = formatVramMb(gpu?.total_vram_mb);
return `${name} (${free} free / ${total} total)`;
}).join('; ');
}
async function setCacheSetting(provider_id, model_name, enabled) { async function setCacheSetting(provider_id, model_name, enabled) {
try { try {
const response = await fetch('{{ url_for(request, "/dashboard/api/cache-settings") }}', { const response = await fetch('{{ url_for(request, "/dashboard/api/cache-settings") }}', {
...@@ -805,6 +831,8 @@ function renderProviderDetails(key) { ...@@ -805,6 +831,8 @@ function renderProviderDetails(key) {
const brokerSession = coderaiConfig.broker_session || {}; const brokerSession = coderaiConfig.broker_session || {};
const brokerConnected = !!brokerSession.connected; const brokerConnected = !!brokerSession.connected;
const ownerLabel = brokerSession.owner_user_id == null ? 'Global admin' : `User #${brokerSession.owner_user_id}`; const ownerLabel = brokerSession.owner_user_id == null ? 'Global admin' : `User #${brokerSession.owner_user_id}`;
const brokerMetadata = brokerSession.metadata || {};
const performance = brokerSession.performance || {};
// Build authentication fields based on provider type // Build authentication fields based on provider type
let authFieldsHtml = ''; let authFieldsHtml = '';
...@@ -1077,6 +1105,14 @@ function renderProviderDetails(key) { ...@@ -1077,6 +1105,14 @@ function renderProviderDetails(key) {
<div><strong>Connected At:</strong> ${escHtmlAttr(formatBrokerTimestamp(brokerSession.connected_at))}</div> <div><strong>Connected At:</strong> ${escHtmlAttr(formatBrokerTimestamp(brokerSession.connected_at))}</div>
<div><strong>Remote Endpoint:</strong> ${escHtmlAttr(brokerSession.endpoint || 'Unknown')}</div> <div><strong>Remote Endpoint:</strong> ${escHtmlAttr(brokerSession.endpoint || 'Unknown')}</div>
<div><strong>Transport:</strong> ${escHtmlAttr(brokerSession.transport || 'broker')}</div> <div><strong>Transport:</strong> ${escHtmlAttr(brokerSession.transport || 'broker')}</div>
<div><strong>GPU Count:</strong> ${escHtmlAttr(String(brokerMetadata.gpu_count ?? (Array.isArray(brokerMetadata.gpus) ? brokerMetadata.gpus.length : 0) || '0'))}</div>
<div><strong>VRAM:</strong> ${escHtmlAttr(formatVramMb(brokerMetadata.available_vram_mb))} free / ${escHtmlAttr(formatVramMb(brokerMetadata.total_vram_mb))} total</div>
<div><strong>GPUs:</strong> ${escHtmlAttr(renderGpuSummary(brokerMetadata))}</div>
<div><strong>Avg Latency:</strong> ${escHtmlAttr(formatPerfNumber(performance.avg_latency_ms, 1, ' ms'))}</div>
<div><strong>Avg Throughput:</strong> ${escHtmlAttr(formatPerfNumber(performance.avg_tokens_per_second, 2, ' tok/s'))}</div>
<div><strong>Avg Tokens:</strong> ${escHtmlAttr(formatPerfNumber(performance.avg_total_tokens, 1))}</div>
<div><strong>Success Rate:</strong> ${escHtmlAttr(formatPerfNumber((performance.success_rate ?? 0) * 100, 1, '%'))}</div>
<div><strong>Samples:</strong> ${escHtmlAttr(String(performance.sample_count ?? 0))} / ${escHtmlAttr(String(performance.window_size ?? 100))}</div>
<div><strong>Studio Endpoints:</strong> ${escHtmlAttr((brokerSession.studio_endpoints || []).join(', ') || 'None advertised')}</div> <div><strong>Studio Endpoints:</strong> ${escHtmlAttr((brokerSession.studio_endpoints || []).join(', ') || 'None advertised')}</div>
</div> </div>
</div> </div>
......
...@@ -248,9 +248,26 @@ def test_dashboard_providers_page_includes_broker_status_for_coderai(monkeypatch ...@@ -248,9 +248,26 @@ def test_dashboard_providers_page_includes_broker_status_for_coderai(monkeypatch
StubWebSocket(), StubWebSocket(),
"coderai", "coderai",
"workstation-01", "workstation-01",
metadata={"owner_user_id": None, "endpoint": "ws://nat-client", "transport": "websocket", "studio_endpoints": ["v1/images/generate"]}, metadata={
"owner_user_id": None,
"endpoint": "ws://nat-client",
"transport": "websocket",
"studio_endpoints": ["v1/images/generate"],
"gpus": [{"name": "RTX 4090", "total_vram_mb": 24576, "available_vram_mb": 20480}],
"gpu_count": 1,
"total_vram_mb": 24576,
"available_vram_mb": 20480,
},
capabilities={"studio": {"enabled": True}}, capabilities={"studio": {"enabled": True}},
) )
session = await broker.get_session("coderai", "workstation-01")
session.recent_requests.append({
"latency_ms": 842.0,
"tokens_per_second": 54.6,
"total_tokens": 460,
"success": True,
"recorded_at": 0,
})
asyncio.run(_clear_broker_sessions()) asyncio.run(_clear_broker_sessions())
asyncio.run(scenario()) asyncio.run(scenario())
...@@ -259,7 +276,8 @@ def test_dashboard_providers_page_includes_broker_status_for_coderai(monkeypatch ...@@ -259,7 +276,8 @@ def test_dashboard_providers_page_includes_broker_status_for_coderai(monkeypatch
assert response.status_code == 200 assert response.status_code == 200
assert "Broker Session Status" in response.text assert "Broker Session Status" in response.text
assert "workstation-01" in response.text assert "workstation-01" in response.text
assert "workstation-01" in response.text assert "RTX 4090" in response.text
assert "54.6 tok/s" in response.text
finally: finally:
asyncio.run(_clear_broker_sessions()) asyncio.run(_clear_broker_sessions())
if original_provider is None: if original_provider is None:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment