Better multi instances visibility

parent aa34ff9a
...@@ -1225,9 +1225,26 @@ async def api_model_disable(request: Request, username: str = Depends(require_ad ...@@ -1225,9 +1225,26 @@ async def api_model_disable(request: Request, username: str = Depends(require_ad
@router.get("/admin/api/model-loaded-status") @router.get("/admin/api/model-loaded-status")
async def api_model_loaded_status(username: str = Depends(require_admin)): async def api_model_loaded_status(username: str = Depends(require_admin)):
"""Return the set of model keys currently loaded in VRAM.""" """Return loaded model keys with per-model instance pool info."""
from codai.models.manager import multi_model_manager from codai.models.manager import multi_model_manager
return {"loaded": list(multi_model_manager.models.keys())} loaded = list(multi_model_manager.models.keys())
instance_pools = {}
for key, pool in multi_model_manager.model_pools.items():
instance_pools[key] = {"loaded": pool.count, "max": pool.max_instances}
configured_max = {}
if config_manager:
for cat in ("text_models", "image_models", "audio_models", "vision_models",
"tts_models", "gguf_models", "video_models", "audio_gen_models", "embedding_models"):
for m in config_manager.models_data.get(cat, []):
if isinstance(m, dict):
path = m.get("path") or m.get("id") or ""
max_inst = m.get("max_instances", 1)
if path and max_inst and int(max_inst) > 1:
configured_max[path] = int(max_inst)
return {"loaded": loaded, "instances": instance_pools, "configured_max": configured_max}
@router.post("/admin/api/model-load") @router.post("/admin/api/model-load")
......
...@@ -175,6 +175,7 @@ td code{font-family:var(--mono);font-size:11.5px;background:var(--raised);paddin ...@@ -175,6 +175,7 @@ td code{font-family:var(--mono);font-size:11.5px;background:var(--raised);paddin
.badge-admin{background:var(--accent-s);color:#A5B4FC;border:1px solid rgba(99,102,241,.2)} .badge-admin{background:var(--accent-s);color:#A5B4FC;border:1px solid rgba(99,102,241,.2)}
.badge-user{background:var(--raised);color:var(--text-3);border:1px solid var(--border)} .badge-user{background:var(--raised);color:var(--text-3);border:1px solid var(--border)}
.badge-ok{background:rgba(52,211,153,.08);color:var(--green);border:1px solid rgba(52,211,153,.2)} .badge-ok{background:rgba(52,211,153,.08);color:var(--green);border:1px solid rgba(52,211,153,.2)}
.badge-warn{background:rgba(251,191,36,.08);color:#f59e0b;border:1px solid rgba(251,191,36,.2)}
.badge-danger{background:rgba(248,113,113,.08);color:var(--red);border:1px solid rgba(248,113,113,.2)} .badge-danger{background:rgba(248,113,113,.08);color:var(--red);border:1px solid rgba(248,113,113,.2)}
/* ── Modals ──────────────────────────────────────────────────────── */ /* ── Modals ──────────────────────────────────────────────────────── */
......
...@@ -468,6 +468,7 @@ ...@@ -468,6 +468,7 @@
</label> </label>
<input type="number" id="cfg-max-instances" class="form-input" min="1" step="1" value="1" oninput="_updatePreloadAllVisibility()"> <input type="number" id="cfg-max-instances" class="form-input" min="1" step="1" value="1" oninput="_updatePreloadAllVisibility()">
<span style="font-size:11px;color:var(--text-3)">Parallel copies in VRAM</span> <span style="font-size:11px;color:var(--text-3)">Parallel copies in VRAM</span>
<span id="cfg-instances-status" style="font-size:11px;color:var(--accent);display:none;margin-top:.2rem"></span>
</div> </div>
</div> </div>
<div id="cfg-preload-all-row" style="display:none;margin-top:.5rem"> <div id="cfg-preload-all-row" style="display:none;margin-top:.5rem">
...@@ -1213,12 +1214,13 @@ async function loadCachedModels(){ ...@@ -1213,12 +1214,13 @@ async function loadCachedModels(){
capabilities:m.capabilities||[]}); capabilities:m.capabilities||[]});
const loaded = _loadedKeys.has(m.id) || [..._loadedKeys].some(k=>k.endsWith(':'+m.id)||k===m.id); const loaded = _loadedKeys.has(m.id) || [..._loadedKeys].some(k=>k.endsWith(':'+m.id)||k===m.id);
const capBadges = fmtCapabilities(m.capabilities||[]); const capBadges = fmtCapabilities(m.capabilities||[]);
const instBadgeHf = m.in_config ? _instanceBadge([m.id], (m.settings||{}).max_instances||1) : '';
return `<tr style="border-top:1px solid var(--border)"> return `<tr style="border-top:1px solid var(--border)">
<td style="padding:.4rem .25rem;font-family:monospace;font-size:12px;max-width:260px;overflow:hidden;text-overflow:ellipsis;white-space:nowrap" title="${esc(m.id)}">${esc(m.id)}</td> <td style="padding:.4rem .25rem;font-family:monospace;font-size:12px;max-width:260px;overflow:hidden;text-overflow:ellipsis;white-space:nowrap" title="${esc(m.id)}">${esc(m.id)}</td>
<td style="text-align:right;padding:.4rem .25rem;white-space:nowrap;color:var(--text-2)">${fmtGB(m.size_gb)}</td> <td style="text-align:right;padding:.4rem .25rem;white-space:nowrap;color:var(--text-2)">${fmtGB(m.size_gb)}</td>
<td style="text-align:right;padding:.4rem .25rem;color:var(--text-2)">${m.file_count}</td> <td style="text-align:right;padding:.4rem .25rem;color:var(--text-2)">${m.file_count}</td>
<td style="padding:.4rem .25rem;font-size:11px">${capBadges||'<span class="muted small">—</span>'}</td> <td style="padding:.4rem .25rem;font-size:11px">${capBadges||'<span class="muted small">—</span>'}</td>
<td style="text-align:center;padding:.4rem .25rem">${m.in_config?'<span class="badge badge-ok">enabled</span>':'<span class="muted small">—</span>'}</td> <td style="text-align:center;padding:.4rem .25rem">${m.in_config?`<span class="badge badge-ok">enabled</span>${instBadgeHf?'<br>'+instBadgeHf:''}`:' <span class="muted small">—</span>'}</td>
<td style="padding:.4rem .25rem;text-align:right;white-space:nowrap"> <td style="padding:.4rem .25rem;text-align:right;white-space:nowrap">
${m.in_config?(loaded ${m.in_config?(loaded
?`<button class="btn btn-ghost btn-sm" onclick="unloadModel(${idx})">Unload</button>` ?`<button class="btn btn-ghost btn-sm" onclick="unloadModel(${idx})">Unload</button>`
...@@ -1266,6 +1268,7 @@ async function loadCachedModels(){ ...@@ -1266,6 +1268,7 @@ async function loadCachedModels(){
[..._loadedKeys].some(k=>k.endsWith(':'+primary.path)||k.endsWith(':'+primary.filename)); [..._loadedKeys].some(k=>k.endsWith(':'+primary.path)||k.endsWith(':'+primary.filename));
const capBadges = fmtCapabilities(primary.capabilities||[]); const capBadges = fmtCapabilities(primary.capabilities||[]);
const in_config = files.some(f=>f.in_config); const in_config = files.some(f=>f.in_config);
const instBadgeGguf = in_config ? _instanceBadge([primary.path, primary.filename], (primary.settings||{}).max_instances||1) : '';
// Quant badges // Quant badges
const quantBadges = files.length > 1 const quantBadges = files.length > 1
? files.map(f=>{ ? files.map(f=>{
...@@ -1278,7 +1281,7 @@ async function loadCachedModels(){ ...@@ -1278,7 +1281,7 @@ async function loadCachedModels(){
<td style="padding:.4rem .25rem;font-family:monospace;font-size:11px;max-width:280px;overflow:hidden;text-overflow:ellipsis;white-space:nowrap" title="${esc(primary.filename)}">${esc(base)}<br>${quantBadges}</td> <td style="padding:.4rem .25rem;font-family:monospace;font-size:11px;max-width:280px;overflow:hidden;text-overflow:ellipsis;white-space:nowrap" title="${esc(primary.filename)}">${esc(base)}<br>${quantBadges}</td>
<td style="text-align:right;padding:.4rem .25rem;white-space:nowrap;color:var(--text-2)">${fmtGB(primary.size_gb)}</td> <td style="text-align:right;padding:.4rem .25rem;white-space:nowrap;color:var(--text-2)">${fmtGB(primary.size_gb)}</td>
<td style="padding:.4rem .25rem;font-size:11px">${capBadges||'<span class="muted small">—</span>'}</td> <td style="padding:.4rem .25rem;font-size:11px">${capBadges||'<span class="muted small">—</span>'}</td>
<td style="text-align:center;padding:.4rem .25rem">${in_config?'<span class="badge badge-ok">enabled</span>':'<span class="muted small">—</span>'}</td> <td style="text-align:center;padding:.4rem .25rem">${in_config?`<span class="badge badge-ok">enabled</span>${instBadgeGguf?'<br>'+instBadgeGguf:''}`:' <span class="muted small">—</span>'}</td>
<td style="padding:.4rem .25rem;text-align:right;white-space:nowrap"> <td style="padding:.4rem .25rem;text-align:right;white-space:nowrap">
${in_config?(loaded ${in_config?(loaded
?`<button class="btn btn-ghost btn-sm" onclick="unloadModel(${idx})">Unload</button>` ?`<button class="btn btn-ghost btn-sm" onclick="unloadModel(${idx})">Unload</button>`
...@@ -1326,14 +1329,42 @@ function getWhisperServerModelPath(){ ...@@ -1326,14 +1329,42 @@ function getWhisperServerModelPath(){
} }
let _loadedKeys = new Set(); let _loadedKeys = new Set();
let _instanceInfo = {}; // loaded_key -> {loaded: N, max: N}
async function refreshLoadedStatus(){ async function refreshLoadedStatus(){
try{ try{
const r = await fetch('/admin/api/model-loaded-status'); const r = await fetch('/admin/api/model-loaded-status');
if(r.ok){ const d = await r.json(); _loadedKeys = new Set(d.loaded||[]); } if(r.ok){
const d = await r.json();
_loadedKeys = new Set(d.loaded||[]);
_instanceInfo = d.instances || {};
}
}catch{} }catch{}
} }
function _findLoadedKey(paths){
for(const p of paths){
if(_instanceInfo[p]) return p;
for(const k of Object.keys(_instanceInfo)){
if(k === p || k.endsWith(':'+p)) return k;
}
}
return null;
}
function _instanceBadge(lookupPaths, maxCfg){
if(!maxCfg || maxCfg <= 1) return '';
const instKey = _findLoadedKey(lookupPaths);
const info = instKey ? _instanceInfo[instKey] : null;
const loadedCount = info ? info.loaded : 0;
const maxCount = info ? info.max : maxCfg;
if(loadedCount === 0){
return `<span class="badge badge-user" style="font-size:10px" title="${maxCount} instances configured">×${maxCount} inst.</span>`;
}
const cls = loadedCount >= maxCount ? 'badge-ok' : 'badge-warn';
return `<span class="badge ${cls}" style="font-size:10px" title="${loadedCount} of ${maxCount} instances loaded">${loadedCount}/${maxCount} inst.</span>`;
}
async function refreshLocal(){ async function refreshLocal(){
await refreshLoadedStatus(); await refreshLoadedStatus();
loadCacheStats(); loadCacheStats();
...@@ -1576,6 +1607,18 @@ function openCfgModal(idx){ ...@@ -1576,6 +1607,18 @@ function openCfgModal(idx){
document.getElementById('cfg-max-instances').value = s.max_instances != null ? s.max_instances : 1; document.getElementById('cfg-max-instances').value = s.max_instances != null ? s.max_instances : 1;
document.getElementById('cfg-preload-all-instances').checked = !!s.preload_all_instances; document.getElementById('cfg-preload-all-instances').checked = !!s.preload_all_instances;
_updatePreloadAllVisibility(); _updatePreloadAllVisibility();
const instStatusEl = document.getElementById('cfg-instances-status');
if(instStatusEl){
const instKey = _findLoadedKey([m.path]);
const info = instKey ? _instanceInfo[instKey] : null;
if(info && info.max > 1){
instStatusEl.textContent = `Currently loaded: ${info.loaded}/${info.max} instances`;
instStatusEl.style.display = 'block';
} else {
instStatusEl.textContent = '';
instStatusEl.style.display = 'none';
}
}
document.getElementById('cfg-max-gpu').value = s.max_gpu_percent != null ? s.max_gpu_percent : ''; document.getElementById('cfg-max-gpu').value = s.max_gpu_percent != null ? s.max_gpu_percent : '';
document.getElementById('cfg-ram-gb').value = s.manual_ram_gb != null ? s.manual_ram_gb : ''; document.getElementById('cfg-ram-gb').value = s.manual_ram_gb != null ? s.manual_ram_gb : '';
document.getElementById('cfg-4bit').checked = !!s.load_in_4bit; document.getElementById('cfg-4bit').checked = !!s.load_in_4bit;
......
...@@ -334,7 +334,7 @@ def main(): ...@@ -334,7 +334,7 @@ def main():
def _model_cfg(m, mtype): def _model_cfg(m, mtype):
cfg = build_kwargs_from_config(m, mtype) if isinstance(m, dict) else {} cfg = build_kwargs_from_config(m, mtype) if isinstance(m, dict) else {}
if isinstance(m, dict): if isinstance(m, dict):
for k in ("load_mode", "used_vram_gb", "alias"): for k in ("load_mode", "used_vram_gb", "alias", "max_instances"):
if k in m: if k in m:
cfg[k] = m[k] cfg[k] = m[k]
return cfg return cfg
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment