Fix multimodel

parent f524118d
......@@ -338,17 +338,27 @@ async def api_status(username: str = Depends(require_auth)):
except Exception:
pass
# Enabled (configured) models
# Enabled (configured) models + aliases
enabled_models = []
enabled_aliases: dict = {} # alias -> [model_id, ...]
try:
if config_manager:
md = config_manager.models_data
for cat in ("text_models", "image_models", "audio_models", "vision_models", "tts_models",
"video_models", "audio_gen_models", "embedding_models"):
for m in md.get(cat, []):
mid = (m.get("path") or m.get("id") or m) if isinstance(m, dict) else m
if isinstance(m, dict):
mid = m.get("path") or m.get("id") or ""
alias = (m.get("alias") or "").strip()
else:
mid = m
alias = ""
if mid and mid not in enabled_models:
enabled_models.append(mid)
if alias:
enabled_aliases.setdefault(alias, [])
if mid and mid not in enabled_aliases[alias]:
enabled_aliases[alias].append(mid)
except Exception:
pass
......@@ -399,6 +409,7 @@ async def api_status(username: str = Depends(require_auth)):
"models_loaded": len(loaded_keys),
"loaded_models": loaded_keys,
"enabled_models": enabled_models,
"enabled_aliases": enabled_aliases,
"vram": vram,
"cuda": is_cuda,
"requests": {
......@@ -1356,13 +1367,13 @@ async def api_model_configure(request: Request, username: str = Depends(require_
gpu_device = int(data.get("gpu_device", 0))
if gpu_device < 0:
raise HTTPException(status_code=400, detail="gpu_device must be >= 0")
for existing in config_manager.models_data.get("audio_models", []):
if (
isinstance(existing, dict)
and existing.get("backend") == "whisper-server"
and existing.get("id") == model_id
):
raise HTTPException(status_code=409, detail=f"whisper-server model '{model_id}' already exists")
# Remove existing entry with same id (update semantics)
audio_list = config_manager.models_data.get("audio_models", [])
config_manager.models_data["audio_models"] = [
m for m in audio_list
if not (isinstance(m, dict) and m.get("id") == model_id)
]
alias = (data.get("alias") or "").strip() or None
entry = {
"id": model_id,
"backend": "whisper-server",
......@@ -1374,11 +1385,16 @@ async def api_model_configure(request: Request, username: str = Depends(require_
"model_type": "audio_models",
"model_types": ["audio_models"],
}
if alias:
entry["alias"] = alias
if data.get("used_vram_gb") is not None:
entry["used_vram_gb"] = data["used_vram_gb"]
config_manager.models_data.setdefault("audio_models", []).append(entry)
config_manager.save_models()
return {"success": True, "model_id": model_id, "model_path": model_path, "server_path": server_path}
result = {"success": True, "model_id": model_id, "model_path": model_path, "server_path": server_path}
if alias:
result["alias"] = alias
return result
path = data.get("path") or data.get("model_id", "")
valid = {"text_models", "image_models", "audio_models", "tts_models", "vision_models", "video_models",
"audio_gen_models", "embedding_models"}
......
......@@ -79,11 +79,19 @@ async function poll() {
const loaded = d.loaded_models || [];
const enabled = d.enabled_models || [];
const aliases = d.enabled_aliases || {};
const loadedSet = new Set(loaded);
const notLoaded = enabled.filter(m => !loadedSet.has(m));
let html = '';
if(loaded.length) html += loaded.map(m => `<span class="badge badge-admin" style="margin:.125rem" title="Loaded">● ${m}</span>`).join('');
if(notLoaded.length) html += notLoaded.map(m => `<span class="badge" style="margin:.125rem;opacity:.55" title="Enabled, not loaded">○ ${m}</span>`).join('');
const aliasEntries = Object.entries(aliases);
if(aliasEntries.length) {
html += aliasEntries.map(([alias, ids]) => {
const tip = ids.length > 1 ? `Round-robin alias → ${ids.join(', ')}` : `Alias → ${ids[0] || ''}`;
return `<span class="badge" style="margin:.125rem;background:var(--bg);border:1px solid var(--border);color:var(--text-2)" title="${tip}">⇄ ${alias}</span>`;
}).join('');
}
document.getElementById('active-models').innerHTML = html || '<span class="muted small">No models loaded</span>';
if (d.vram) {
......
......@@ -118,10 +118,10 @@
</div>
<div style="display:grid;grid-template-columns:repeat(2,minmax(0,1fr));gap:.75rem;margin-top:.75rem">
<input id="ws-used-vram" class="form-input" type="number" min="0" step="0.1" placeholder="Used VRAM (optional)">
<div></div>
<input id="ws-alias" class="form-input" placeholder="Alias (optional, e.g. whisper)">
</div>
<div class="form-actions" style="margin-top:.75rem">
<button class="btn btn-primary" onclick="addWhisperServerModel()">Add model</button>
<button class="btn btn-primary" id="ws-submit-btn" onclick="addWhisperServerModel()">Add model</button>
</div>
</div>
</div>
......@@ -329,6 +329,69 @@
</div>
</div>
<!-- Whisper-server edit modal -->
<div id="ws-edit-modal" class="modal">
<div class="modal-box" style="max-width:520px">
<div class="modal-head">
<span class="modal-title">Edit whisper-server model</span>
<button class="modal-close" onclick="closeModal('ws-edit-modal')">×</button>
</div>
<div class="modal-body">
<input type="hidden" id="wse-model-id">
<div class="form-row">
<label class="form-label">Model ID</label>
<div id="wse-id-label" style="font-size:12px;font-family:monospace;color:var(--text-2);padding:.3rem 0"></div>
</div>
<div class="form-row">
<label class="form-label">Alias <span class="muted small">(optional — shared by multiple instances for round-robin)</span></label>
<input id="wse-alias" class="form-input" placeholder="e.g. whisper">
</div>
<div class="form-row">
<label class="form-label">Server path</label>
<input id="wse-server-path" class="form-input" placeholder="/usr/local/bin/whisper-server">
</div>
<div class="form-row">
<label class="form-label">Model source</label>
<select id="wse-model-source" class="form-input" onchange="toggleWseModelSource()">
<option value="cached-gguf">Downloaded GGUF</option>
<option value="manual-path">Manual path</option>
</select>
</div>
<div class="form-row" id="wse-gguf-row">
<label class="form-label">Downloaded GGUF</label>
<select id="wse-gguf-select" class="form-input">
<option value="">Select downloaded GGUF</option>
</select>
</div>
<div class="form-row" id="wse-path-row" style="display:none">
<label class="form-label">Model path</label>
<input id="wse-model-path" class="form-input" placeholder="/path/to/ggml-model.bin">
</div>
<div style="display:grid;grid-template-columns:1fr 1fr 1fr;gap:.75rem">
<div class="form-row" style="margin:0">
<label class="form-label">Port</label>
<input id="wse-port" class="form-input" type="number" min="1" max="65535">
</div>
<div class="form-row" style="margin:0">
<label class="form-label">GPU device</label>
<input id="wse-gpu-device" class="form-input" type="number" min="0">
</div>
<div class="form-row" style="margin:0">
<label class="form-label">Load mode</label>
<select id="wse-load-mode" class="form-input">
<option value="on-request">On request</option>
<option value="load">Load</option>
</select>
</div>
</div>
<div class="form-actions" style="margin-top:1rem">
<button class="btn btn-primary" onclick="saveWhisperServerEdit()">Save</button>
<button class="btn btn-ghost" onclick="closeModal('ws-edit-modal')">Cancel</button>
</div>
</div>
</div>
</div>
<!-- Model configuration modal -->
<div id="cfg-modal" class="modal">
<div class="modal-box" style="max-width:600px;max-height:92vh;overflow-y:auto">
......@@ -1037,20 +1100,21 @@ function _renderWhisperServerRows(models){
size_gb:0,
defaultType:'audio_models',
settings:{
backend:m.backend || 'whisper-server',
load_mode:m.load_mode || 'on-request',
model_type:'audio_models',
model_path:m.model_path || '',
port:m.port,
gpu_device:m.gpu_device,
backend: m.backend || 'whisper-server',
load_mode: m.load_mode || 'on-request',
model_type: 'audio_models',
model_path: m.model_path || '',
server_path: m.server_path || '',
alias: m.alias || '',
port: m.port ?? 8744,
gpu_device: m.gpu_device ?? 0,
},
in_config:true,
capabilities:m.capabilities || ['speech_to_text']
});
const loaded = _loadedKeys.has(`audio:${m.id}`) || _loadedKeys.has(m.id);
return `<tr style="border-top:1px solid var(--border)">
<td style="padding:.4rem .25rem;font-family:monospace;font-size:12px">${esc(m.id)}</td>
<td style="padding:.4rem .25rem"><span class="badge badge-ok">${esc(m.backend || 'whisper-server')}</span></td>
<td style="padding:.4rem .25rem;font-family:monospace;font-size:12px">${esc(m.id)}${m.alias?`<br><span style="color:var(--text-2);font-size:10px">alias: ${esc(m.alias)}</span>`:''}</td>
<td style="padding:.4rem .25rem;font-size:11px;color:var(--text-2);max-width:160px;overflow:hidden;text-overflow:ellipsis;display:-webkit-box;-webkit-line-clamp:2;-webkit-box-orient:vertical;line-height:1.25;max-height:2.5em" title="${esc(m.model_path || "—")}">${esc(m.model_path || "—")}</td>
<td style="padding:.4rem .25rem;font-size:11px;color:var(--text-2)">${m.port ?? '—'} / GPU ${m.gpu_device ?? 0}</td>
<td style="padding:.4rem .25rem;font-size:11px;color:var(--text-2)">${esc(m.load_mode || 'on-request')}</td>
......@@ -1059,7 +1123,7 @@ function _renderWhisperServerRows(models){
${loaded
?`<button class="btn btn-ghost btn-sm" onclick="unloadModel(${idx})">Unload</button>`
:`<button class="btn btn-primary btn-sm" onclick="loadModel(${idx})">Load now</button>`}
<button class="btn btn-secondary btn-sm" onclick="openCfgModal(${idx})">Configure</button>
<button class="btn btn-secondary btn-sm" onclick="openCfgModal(${idx})">Edit</button>
<button class="btn btn-ghost btn-sm" onclick="disableModel(${idx})">Remove</button>
</td>
</tr>`;
......@@ -1068,8 +1132,7 @@ function _renderWhisperServerRows(models){
'<div class="card-title">Configured whisper-server models</div>'+
'<table style="width:100%;border-collapse:collapse;font-size:13px">'+
'<thead><tr style="color:var(--text-2);font-size:10px;text-transform:uppercase;letter-spacing:.05em">'+
'<th style="text-align:left;padding:.3rem .25rem;font-weight:700">Model</th>'+
'<th style="text-align:left;padding:.3rem .25rem;font-weight:700">Backend</th>'+
'<th style="text-align:left;padding:.3rem .25rem;font-weight:700">Model / Alias</th>'+
'<th style="text-align:left;padding:.3rem .25rem;font-weight:700">Model path</th>'+
'<th style="text-align:left;padding:.3rem .25rem;font-weight:700">Port / GPU</th>'+
'<th style="text-align:left;padding:.3rem .25rem;font-weight:700">Load mode</th>'+
......@@ -1234,7 +1297,6 @@ async function loadCachedModels(){
'<th style="text-align:center;padding:.3rem .25rem;font-weight:700">Config</th>'+
'<th></th></tr></thead><tbody>'+rows.join('')+'</tbody></table>';
}
ggufEl.insertAdjacentHTML('afterend', _renderWhisperServerRows(whisperModels));
// Remove any previously rendered whisper-server card before inserting the new one
document.querySelectorAll('#ws-rendered-card').forEach(el=>el.remove());
const wsHtml = _renderWhisperServerRows(whisperModels);
......@@ -1363,8 +1425,81 @@ function onCfgQuantChange(){
document.getElementById('cfg-id-label').textContent = m.label;
}
function toggleWseModelSource() {
const source = document.getElementById('wse-model-source').value;
document.getElementById('wse-gguf-row').style.display = source === 'cached-gguf' ? '' : 'none';
document.getElementById('wse-path-row').style.display = source === 'manual-path' ? '' : 'none';
}
function _getWseModelPath() {
const source = document.getElementById('wse-model-source').value;
return source === 'cached-gguf'
? document.getElementById('wse-gguf-select').value
: document.getElementById('wse-model-path').value.trim();
}
function _openWhisperServerEdit(m) {
const s = m.settings || {};
document.getElementById('wse-model-id').value = m.path;
document.getElementById('wse-id-label').textContent = m.path;
document.getElementById('wse-alias').value = s.alias || '';
document.getElementById('wse-server-path').value = s.server_path || defaultWhisperServerPath();
document.getElementById('wse-port').value = s.port ?? 8744;
document.getElementById('wse-gpu-device').value = s.gpu_device ?? 0;
document.getElementById('wse-load-mode').value = s.load_mode || 'on-request';
// Populate the GGUF select with current options from the add-form's list
const srcSelect = document.getElementById('ws-gguf-select');
const dstSelect = document.getElementById('wse-gguf-select');
dstSelect.innerHTML = srcSelect ? srcSelect.innerHTML : '<option value="">Select downloaded GGUF</option>';
const modelPath = s.model_path || '';
const inGguf = modelPath && _ggufFiles.some(f => f.path === modelPath);
if (inGguf) {
document.getElementById('wse-model-source').value = 'cached-gguf';
dstSelect.value = modelPath;
} else {
document.getElementById('wse-model-source').value = 'manual-path';
document.getElementById('wse-model-path').value = modelPath;
}
toggleWseModelSource();
openModal('ws-edit-modal');
}
async function saveWhisperServerEdit() {
const model_id = document.getElementById('wse-model-id').value;
const model_source = document.getElementById('wse-model-source').value;
const model_path = _getWseModelPath();
if (!model_path) { alert('Model path is required'); return; }
const payload = {
model_id,
backend: 'whisper-server',
model_source,
server_path: document.getElementById('wse-server-path').value.trim(),
model_path,
port: parseInt(document.getElementById('wse-port').value, 10) || 8744,
gpu_device: parseInt(document.getElementById('wse-gpu-device').value, 10) || 0,
load_mode: document.getElementById('wse-load-mode').value,
alias: document.getElementById('wse-alias').value.trim() || null,
};
try {
const r = await fetch('/admin/api/model-configure', {
method: 'POST', headers: {'Content-Type': 'application/json'},
body: JSON.stringify(payload)
});
const d = await r.json();
if (!r.ok) throw new Error(d.detail || 'Failed to save');
closeModal('ws-edit-modal');
refreshLocal();
} catch(e) { alert('Error: ' + e.message); }
}
function openCfgModal(idx){
const m = _localModels[idx];
if (m.cacheType === 'whisper-server') {
_openWhisperServerEdit(m);
return;
}
const s = m.settings || {};
document.getElementById('cfg-modal-title').textContent = m.in_config ? 'Configure model' : 'Add to CoderAI';
document.getElementById('cfg-id-label').textContent = m.label;
......@@ -1453,9 +1588,6 @@ function openCfgModal(idx){
document.getElementById('cfg-parser').value = s.parser || 'auto';
document.getElementById('cfg-tools').checked = !!s.tools_closer_prompt;
document.getElementById('cfg-grammar').checked = !!s.grammar_guided;
if (m.cacheType === 'whisper-server') {
document.getElementById('cfg-backend').value = 'cpu';
}
openModal('cfg-modal');
}
......@@ -1533,6 +1665,7 @@ async function addWhisperServerModel(){
gpu_device: parseInt(document.getElementById('ws-gpu-device').value, 10) || 0,
load_mode: document.getElementById('ws-load-mode').value,
used_vram_gb: Number.isNaN(usedVram) ? null : usedVram,
alias: document.getElementById('ws-alias').value.trim() || null,
};
try{
const r = await fetch('/admin/api/model-configure', {
......@@ -1551,6 +1684,8 @@ async function addWhisperServerModel(){
document.getElementById('ws-gpu-device').value = '0';
document.getElementById('ws-load-mode').value = 'on-request';
document.getElementById('ws-used-vram').value = '';
document.getElementById('ws-alias').value = '';
document.getElementById('ws-submit-btn').textContent = 'Add model';
toggleWhisperModelSource();
refreshLocal();
}catch(e){ alert('Error: '+e.message); }
......
......@@ -134,8 +134,12 @@ async def create_transcription(
if len(file_content) > _MAX_AUDIO_BYTES:
raise HTTPException(status_code=413, detail="Audio file too large (max 100 MB)")
# Check if the requested model maps to a configured whisper-server instance first
whisper_server = multi_model_manager.whisper_servers.get(model)
# Check if the requested model maps to a configured whisper-server instance first.
# Try alias round-robin resolution before direct ID lookup.
whisper_server = (
multi_model_manager.resolve_whisper_alias(model)
or multi_model_manager.whisper_servers.get(model)
)
if whisper_server is not None:
multi_model_manager.request_model(requested_model=model, model_type="audio")
if not whisper_server.is_running():
......
......@@ -373,12 +373,14 @@ def main():
continue
if isinstance(m, dict) and m.get("backend") == "whisper-server":
cfg = _model_cfg(m, "audio")
alias = (m.get("alias") or "").strip() or None
cfg.update({
"backend": "whisper-server",
"server_path": m.get("server_path", ""),
"model_path": m.get("model_path") or None,
"port": int(m.get("port", 8744)),
"gpu_device": int(m.get("gpu_device", 0)),
"alias": alias,
})
multi_model_manager.register_whisper_server(
model_id=mid,
......@@ -387,6 +389,7 @@ def main():
port=int(m.get("port", 8744)),
gpu_device=int(m.get("gpu_device", 0)),
config=cfg,
alias=alias,
)
else:
multi_model_manager.set_audio_model(mid, config=_model_cfg(m, "audio"))
......
......@@ -499,6 +499,8 @@ class MultiModelManager:
self.model_aliases: Dict[str, str] = {}
self.whisper_server: Optional[WhisperServerManager] = None # legacy single-instance compat
self.whisper_servers: Dict[str, WhisperServerManager] = {} # id -> manager
self.whisper_aliases: Dict[str, List[str]] = {} # alias -> [model_id, ...]
self._whisper_alias_counters: Dict[str, int] = {} # alias -> next round-robin index
self.model_backend_types: Dict[str, str] = {}
self.tool_breaker = FuzzyToolBreaker(threshold=3) # Circuit breaker for repetitive tool calls
self._load_lock = threading.Lock() # Prevents duplicate on-demand model loads
......@@ -761,7 +763,8 @@ class MultiModelManager:
print(f"Audio model '{model_name}' cached as: {resolved_model}")
def register_whisper_server(self, model_id: str, server_path: str, model_path: str = None,
port: int = 8744, gpu_device: int = 0, config: Dict = None):
port: int = 8744, gpu_device: int = 0, config: Dict = None,
alias: str = None):
"""Register a whisper-server instance as an audio model."""
wsm = WhisperServerManager(server_path=server_path, port=port)
wsm._model_path = model_path
......@@ -776,8 +779,25 @@ class MultiModelManager:
if model_id not in self.audio_models:
self.audio_models.append(model_id)
self.config[f"audio:{model_id}"] = cfg
print(f"Registered whisper-server audio model: {model_id} (server: {server_path})")
# Register alias for round-robin routing
if alias:
wsm._alias = alias
ids = self.whisper_aliases.setdefault(alias, [])
if model_id not in ids:
ids.append(model_id)
self._whisper_alias_counters.setdefault(alias, 0)
print(f"Registered whisper-server audio model: {model_id} (server: {server_path})"
+ (f" alias={alias}" if alias else ""))
return wsm
def resolve_whisper_alias(self, name: str) -> Optional[WhisperServerManager]:
"""Return the next round-robin WhisperServerManager for an alias, or None."""
ids = self.whisper_aliases.get(name)
if not ids:
return None
idx = self._whisper_alias_counters.get(name, 0) % len(ids)
self._whisper_alias_counters[name] = idx + 1
return self.whisper_servers.get(ids[idx])
def set_tts_model(self, model_name: str, config: Dict = None):
"""Set the text-to-speech model and download/cache it if needed."""
......@@ -2033,6 +2053,8 @@ class MultiModelManager:
capabilities=caps.to_list(),
backend=meta.get("backend"),
model_path=meta.get("model_path"),
server_path=meta.get("server_path"),
alias=meta.get("alias"),
port=meta.get("port"),
gpu_device=meta.get("gpu_device"),
load_mode=meta.get("load_mode"),
......@@ -2051,13 +2073,19 @@ class MultiModelManager:
if isinstance(m, str):
mid = m
else:
mid = m.get("alias") or m.get("path") or m.get("id") or ""
raw = m.get("path") or m.get("id") or ""
if raw and raw != mid:
_add(raw, mtype, m)
short = raw.split("/")[-1] if "/" in raw else raw
if short != raw:
_add(short, mtype, m)
alias = m.get("alias") or ""
# whisper-server aliases are round-robin group keys shared across
# multiple instances — don't expose the alias as a separate model
if m.get("backend") == "whisper-server":
mid = raw
else:
mid = alias or raw
if raw and raw != mid:
_add(raw, mtype, m)
short = raw.split("/")[-1] if "/" in raw else raw
if short != raw:
_add(short, mtype, m)
if mid:
_add(mid, mtype, m if isinstance(m, dict) else None)
short = mid.split("/")[-1] if "/" in mid else mid
......
......@@ -123,6 +123,8 @@ class ModelInfo(BaseModel):
capabilities: Optional[List[str]] = None # list of capability strings
backend: Optional[str] = None
model_path: Optional[str] = None
server_path: Optional[str] = None
alias: Optional[str] = None
port: Optional[int] = None
gpu_device: Optional[int] = None
load_mode: Optional[str] = None
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment