admin: show per-engine loaded models (tasks hover + cross-engine model page)

- Tasks page: the engine box "N models" count now has a hover tooltip
  listing every loaded model key on that engine (dotted underline + help
  cursor); "no models loaded" when empty.
- Models page: models loaded on a non-primary engine were shown as idle.
  /admin/api/model-loaded-status is served by the primary engine and only
  reported its own pool. Added a front-proxy interceptor that proxies to
  the primary then unions in every other engine's loaded_models, mirroring
  the existing /admin/api/engines and /admin/api/status aggregation.
Co-Authored-By: 's avatarClaude Opus 4.8 <noreply@anthropic.com>
parent bce43398
......@@ -312,13 +312,15 @@ async function loadEngines(){
const vram = e.vram ? `${(e.vram.used ?? 0).toFixed ? e.vram.used.toFixed(1) : e.vram.used}/${e.vram.total} GB` : '';
const cool = e.cooling ? ` <span class="badge badge-warn" style="font-size:9px">❄ cooling</span>` : '';
const prim = e.primary ? ` <span class="badge badge-user" style="font-size:9px">primary</span>` : '';
const models = (e.loaded_models||[]).length;
const loaded = e.loaded_models||[];
const models = loaded.length;
const mtip = models ? esc(loaded.join('\n')) : 'no models loaded';
return `<div class="sys-tile">
<div class="sys-head">
<span class="sys-name">${esc(e.name)} <span class="dim" style="text-transform:none">(${esc(e.backend)})</span>${prim}${cool}</span>
<span style="width:9px;height:9px;border-radius:50%;background:${dot};display:inline-block" title="${state}"></span>
</div>
<div class="sys-sub"><span>${esc(state)}${vram?' · '+esc(vram):''}</span><span>${models} model${models!==1?'s':''}</span></div>
<div class="sys-sub"><span>${esc(state)}${vram?' · '+esc(vram):''}</span><span title="${mtip}" style="cursor:${models?'help':'default'};${models?'text-decoration:underline dotted':''}">${models} model${models!==1?'s':''}</span></div>
<div style="margin-top:.5rem;text-align:right">
<button class="btn btn-ghost" style="font-size:11px;padding:.15rem .5rem;color:var(--error,#e55)"
onclick="restartEngine(${e.id}, '${esc(e.name)}')" title="Kill and respawn this engine">↻ Restart</button>
......
......@@ -326,6 +326,34 @@ class FrontProxy:
"loaded_models": sorted(e.loaded_models), "pid": pid})
return out
async def model_loaded_status(self, request: Request):
"""Proxy /admin/api/model-loaded-status to the primary, then union in the
models loaded on every *other* engine. Otherwise the models page only sees
the primary engine's pool and shows models loaded on a secondary (e.g. the
radeon engine) as idle."""
prim = self.registry.primary()
if prim is None:
return JSONResponse({"loaded": [], "instances": {}, "configured_max": {}})
try:
headers = self._filter_headers(request.headers, _DROP_REQ)
r = await self._short.get(prim.url + request.url.path, headers=headers,
params=request.query_params)
if r.status_code != 200:
return Response(content=r.content, status_code=r.status_code,
headers=dict(self._filter_headers(r.headers, _DROP_RESP)),
media_type=r.headers.get("content-type"))
data = r.json()
except Exception:
return JSONResponse({"loaded": [], "instances": {}, "configured_max": {}})
if isinstance(data, dict):
loaded = set(data.get("loaded") or [])
for e in self.registry.all():
if prim is not None and e.id == prim.id:
continue
loaded |= e.loaded_models
data["loaded"] = sorted(loaded)
return JSONResponse(data)
def _cooling_engines(self) -> list:
"""Which engines are in thermal cooldown right now (for the Tasks banner)."""
out = []
......@@ -585,6 +613,10 @@ def build_app(config, config_dir=None) -> FastAPI:
async def _status(request: Request):
return await front.status(request)
@app.get("/admin/api/model-loaded-status", include_in_schema=False)
async def _model_loaded_status(request: Request):
return await front.model_loaded_status(request)
@app.get("/admin/api/tasks", include_in_schema=False)
async def _tasks(request: Request):
return await front.poll(request)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment