Fix multimodel

aa34ff9a · Stefy Lanza (nextime / spora ) · f524118d · aa34ff9a · aa34ff9a · aa34ff9a
Commit aa34ff9a authored May 07, 2026 by Stefy Lanza (nextime / spora )
7 changed files
--- a/codai/admin/routes.py
+++ b/codai/admin/routes.py
@@ -338,17 +338,27 @@ async def api_status(username: str = Depends(require_auth)):
    except Exception:
        pass

-    # Enabled (configured) models
+    # Enabled (configured) models + aliases
    enabled_models = []
+    enabled_aliases: dict = {}  # alias -> [model_id, ...]
    try:
        if config_manager:
            md = config_manager.models_data
            for cat in ("text_models", "image_models", "audio_models", "vision_models", "tts_models",
                        "video_models", "audio_gen_models", "embedding_models"):
                for m in md.get(cat, []):
-                    mid = (m.get("path") or m.get("id") or m) if isinstance(m, dict) else m
+                    if isinstance(m, dict):
+                        mid = m.get("path") or m.get("id") or ""
+                        alias = (m.get("alias") or "").strip()
+                    else:
+                        mid = m
+                        alias = ""
                    if mid and mid not in enabled_models:
                        enabled_models.append(mid)
+                    if alias:
+                        enabled_aliases.setdefault(alias, [])
+                        if mid and mid not in enabled_aliases[alias]:
+                            enabled_aliases[alias].append(mid)
    except Exception:
        pass

@@ -399,6 +409,7 @@ async def api_status(username: str = Depends(require_auth)):
        "models_loaded": len(loaded_keys),
        "loaded_models": loaded_keys,
        "enabled_models": enabled_models,
+        "enabled_aliases": enabled_aliases,
        "vram": vram,
        "cuda": is_cuda,
        "requests": {
@@ -1356,13 +1367,13 @@ async def api_model_configure(request: Request, username: str = Depends(require_
        gpu_device = int(data.get("gpu_device", 0))
        if gpu_device < 0:
            raise HTTPException(status_code=400, detail="gpu_device must be >= 0")
-        for existing in config_manager.models_data.get("audio_models", []):
-            if (
-                isinstance(existing, dict)
-                and existing.get("backend") == "whisper-server"
-                and existing.get("id") == model_id
-            ):
-                raise HTTPException(status_code=409, detail=f"whisper-server model '{model_id}' already exists")
+        # Remove existing entry with same id (update semantics)
+        audio_list = config_manager.models_data.get("audio_models", [])
+        config_manager.models_data["audio_models"] = [
+            m for m in audio_list
+            if not (isinstance(m, dict) and m.get("id") == model_id)
+        ]
+        alias = (data.get("alias") or "").strip() or None
        entry = {
            "id": model_id,
            "backend": "whisper-server",
@@ -1374,11 +1385,16 @@ async def api_model_configure(request: Request, username: str = Depends(require_
            "model_type": "audio_models",
            "model_types": ["audio_models"],
        }
+        if alias:
+            entry["alias"] = alias
        if data.get("used_vram_gb") is not None:
            entry["used_vram_gb"] = data["used_vram_gb"]
        config_manager.models_data.setdefault("audio_models", []).append(entry)
        config_manager.save_models()
-        return {"success": True, "model_id": model_id, "model_path": model_path, "server_path": server_path}
+        result = {"success": True, "model_id": model_id, "model_path": model_path, "server_path": server_path}
+        if alias:
+            result["alias"] = alias
+        return result
    path = data.get("path") or data.get("model_id", "")
    valid = {"text_models", "image_models", "audio_models", "tts_models", "vision_models", "video_models",
             "audio_gen_models", "embedding_models"}

--- a/codai/admin/templates/dashboard.html
+++ b/codai/admin/templates/dashboard.html
@@ -79,11 +79,19 @@ async function poll() {

    const loaded = d.loaded_models || [];
    const enabled = d.enabled_models || [];
+    const aliases = d.enabled_aliases || {};
    const loadedSet = new Set(loaded);
    const notLoaded = enabled.filter(m => !loadedSet.has(m));
    let html = '';
    if(loaded.length) html += loaded.map(m => `<span class="badge badge-admin" style="margin:.125rem" title="Loaded">● ${m}</span>`).join('');
    if(notLoaded.length) html += notLoaded.map(m => `<span class="badge" style="margin:.125rem;opacity:.55" title="Enabled, not loaded">○ ${m}</span>`).join('');
+    const aliasEntries = Object.entries(aliases);
+    if(aliasEntries.length) {
+      html += aliasEntries.map(([alias, ids]) => {
+        const tip = ids.length > 1 ? `Round-robin alias → ${ids.join(', ')}` : `Alias → ${ids[0] || ''}`;
+        return `<span class="badge" style="margin:.125rem;background:var(--bg);border:1px solid var(--border);color:var(--text-2)" title="${tip}">⇄ ${alias}</span>`;
+      }).join('');
+    }
    document.getElementById('active-models').innerHTML = html || '<span class="muted small">No models loaded</span>';

    if (d.vram) {

--- a/codai/admin/templates/models.html
+++ b/codai/admin/templates/models.html
@@ -118,10 +118,10 @@
    </div>
    <div style="display:grid;grid-template-columns:repeat(2,minmax(0,1fr));gap:.75rem;margin-top:.75rem">
      <input id="ws-used-vram" class="form-input" type="number" min="0" step="0.1" placeholder="Used VRAM (optional)">
-      <div></div>
+      <input id="ws-alias" class="form-input" placeholder="Alias (optional, e.g. whisper)">
    </div>
    <div class="form-actions" style="margin-top:.75rem">
-      <button class="btn btn-primary" onclick="addWhisperServerModel()">Add model</button>
+      <button class="btn btn-primary" id="ws-submit-btn" onclick="addWhisperServerModel()">Add model</button>
    </div>
  </div>
 </div>
@@ -329,6 +329,69 @@
  </div>
 </div>

+<!-- Whisper-server edit modal -->
+<div id="ws-edit-modal" class="modal">
+  <div class="modal-box" style="max-width:520px">
+    <div class="modal-head">
+      <span class="modal-title">Edit whisper-server model</span>
+      <button class="modal-close" onclick="closeModal('ws-edit-modal')">×</button>
+    </div>
+    <div class="modal-body">
+      <input type="hidden" id="wse-model-id">
+      <div class="form-row">
+        <label class="form-label">Model ID</label>
+        <div id="wse-id-label" style="font-size:12px;font-family:monospace;color:var(--text-2);padding:.3rem 0"></div>
+      </div>
+      <div class="form-row">
+        <label class="form-label">Alias <span class="muted small">(optional — shared by multiple instances for round-robin)</span></label>
+        <input id="wse-alias" class="form-input" placeholder="e.g. whisper">
+      </div>
+      <div class="form-row">
+        <label class="form-label">Server path</label>
+        <input id="wse-server-path" class="form-input" placeholder="/usr/local/bin/whisper-server">
+      </div>
+      <div class="form-row">
+        <label class="form-label">Model source</label>
+        <select id="wse-model-source" class="form-input" onchange="toggleWseModelSource()">
+          <option value="cached-gguf">Downloaded GGUF</option>
+          <option value="manual-path">Manual path</option>
+        </select>
+      </div>
+      <div class="form-row" id="wse-gguf-row">
+        <label class="form-label">Downloaded GGUF</label>
+        <select id="wse-gguf-select" class="form-input">
+          <option value="">Select downloaded GGUF</option>
+        </select>
+      </div>
+      <div class="form-row" id="wse-path-row" style="display:none">
+        <label class="form-label">Model path</label>
+        <input id="wse-model-path" class="form-input" placeholder="/path/to/ggml-model.bin">
+      </div>
+      <div style="display:grid;grid-template-columns:1fr 1fr 1fr;gap:.75rem">
+        <div class="form-row" style="margin:0">
+          <label class="form-label">Port</label>
+          <input id="wse-port" class="form-input" type="number" min="1" max="65535">
+        </div>
+        <div class="form-row" style="margin:0">
+          <label class="form-label">GPU device</label>
+          <input id="wse-gpu-device" class="form-input" type="number" min="0">
+        </div>
+        <div class="form-row" style="margin:0">
+          <label class="form-label">Load mode</label>
+          <select id="wse-load-mode" class="form-input">
+            <option value="on-request">On request</option>
+            <option value="load">Load</option>
+          </select>
+        </div>
+      </div>
+      <div class="form-actions" style="margin-top:1rem">
+        <button class="btn btn-primary" onclick="saveWhisperServerEdit()">Save</button>
+        <button class="btn btn-ghost" onclick="closeModal('ws-edit-modal')">Cancel</button>
+      </div>
+    </div>
+  </div>
+</div>
+
 <!-- Model configuration modal -->
 <div id="cfg-modal" class="modal">
  <div class="modal-box" style="max-width:600px;max-height:92vh;overflow-y:auto">
@@ -1037,20 +1100,21 @@ function _renderWhisperServerRows(models){
      size_gb:0,
      defaultType:'audio_models',
      settings:{
-        backend:m.backend || 'whisper-server',
-        load_mode:m.load_mode || 'on-request',
-        model_type:'audio_models',
-        model_path:m.model_path || '',
-        port:m.port,
-        gpu_device:m.gpu_device,
+        backend: m.backend || 'whisper-server',
+        load_mode: m.load_mode || 'on-request',
+        model_type: 'audio_models',
+        model_path: m.model_path || '',
+        server_path: m.server_path || '',
+        alias: m.alias || '',
+        port: m.port ?? 8744,
+        gpu_device: m.gpu_device ?? 0,
      },
      in_config:true,
      capabilities:m.capabilities || ['speech_to_text']
    });
    const loaded = _loadedKeys.has(`audio:${m.id}`) || _loadedKeys.has(m.id);
    return `<tr style="border-top:1px solid var(--border)">
-      <td style="padding:.4rem .25rem;font-family:monospace;font-size:12px">${esc(m.id)}</td>
-      <td style="padding:.4rem .25rem"><span class="badge badge-ok">${esc(m.backend || 'whisper-server')}</span></td>
+      <td style="padding:.4rem .25rem;font-family:monospace;font-size:12px">${esc(m.id)}${m.alias?`<br><span style="color:var(--text-2);font-size:10px">alias: ${esc(m.alias)}</span>`:''}</td>
      <td style="padding:.4rem .25rem;font-size:11px;color:var(--text-2);max-width:160px;overflow:hidden;text-overflow:ellipsis;display:-webkit-box;-webkit-line-clamp:2;-webkit-box-orient:vertical;line-height:1.25;max-height:2.5em" title="${esc(m.model_path || "—")}">${esc(m.model_path || "—")}</td>
      <td style="padding:.4rem .25rem;font-size:11px;color:var(--text-2)">${m.port ?? '—'} / GPU ${m.gpu_device ?? 0}</td>
      <td style="padding:.4rem .25rem;font-size:11px;color:var(--text-2)">${esc(m.load_mode || 'on-request')}</td>
@@ -1059,7 +1123,7 @@ function _renderWhisperServerRows(models){
        ${loaded
          ?`<button class="btn btn-ghost btn-sm" onclick="unloadModel(${idx})">Unload</button>`
          :`<button class="btn btn-primary btn-sm" onclick="loadModel(${idx})">Load now</button>`}
-        <button class="btn btn-secondary btn-sm" onclick="openCfgModal(${idx})">Configure</button>
+        <button class="btn btn-secondary btn-sm" onclick="openCfgModal(${idx})">Edit</button>
        <button class="btn btn-ghost btn-sm" onclick="disableModel(${idx})">Remove</button>
      </td>
    </tr>`;
@@ -1068,8 +1132,7 @@ function _renderWhisperServerRows(models){
    '<div class="card-title">Configured whisper-server models</div>'+
    '<table style="width:100%;border-collapse:collapse;font-size:13px">'+
    '<thead><tr style="color:var(--text-2);font-size:10px;text-transform:uppercase;letter-spacing:.05em">'+
-    '<th style="text-align:left;padding:.3rem .25rem;font-weight:700">Model</th>'+
-    '<th style="text-align:left;padding:.3rem .25rem;font-weight:700">Backend</th>'+
+    '<th style="text-align:left;padding:.3rem .25rem;font-weight:700">Model / Alias</th>'+
    '<th style="text-align:left;padding:.3rem .25rem;font-weight:700">Model path</th>'+
    '<th style="text-align:left;padding:.3rem .25rem;font-weight:700">Port / GPU</th>'+
    '<th style="text-align:left;padding:.3rem .25rem;font-weight:700">Load mode</th>'+
@@ -1234,7 +1297,6 @@ async function loadCachedModels(){
        '<th style="text-align:center;padding:.3rem .25rem;font-weight:700">Config</th>'+
        '<th></th></tr></thead><tbody>'+rows.join('')+'</tbody></table>';
    }
-    ggufEl.insertAdjacentHTML('afterend', _renderWhisperServerRows(whisperModels));
    // Remove any previously rendered whisper-server card before inserting the new one
    document.querySelectorAll('#ws-rendered-card').forEach(el=>el.remove());
    const wsHtml = _renderWhisperServerRows(whisperModels);
@@ -1363,8 +1425,81 @@ function onCfgQuantChange(){
  document.getElementById('cfg-id-label').textContent = m.label;
 }

+function toggleWseModelSource() {
+  const source = document.getElementById('wse-model-source').value;
+  document.getElementById('wse-gguf-row').style.display = source === 'cached-gguf' ? '' : 'none';
+  document.getElementById('wse-path-row').style.display = source === 'manual-path' ? '' : 'none';
+}
+
+function _getWseModelPath() {
+  const source = document.getElementById('wse-model-source').value;
+  return source === 'cached-gguf'
+    ? document.getElementById('wse-gguf-select').value
+    : document.getElementById('wse-model-path').value.trim();
+}
+
+function _openWhisperServerEdit(m) {
+  const s = m.settings || {};
+  document.getElementById('wse-model-id').value = m.path;
+  document.getElementById('wse-id-label').textContent = m.path;
+  document.getElementById('wse-alias').value = s.alias || '';
+  document.getElementById('wse-server-path').value = s.server_path || defaultWhisperServerPath();
+  document.getElementById('wse-port').value = s.port ?? 8744;
+  document.getElementById('wse-gpu-device').value = s.gpu_device ?? 0;
+  document.getElementById('wse-load-mode').value = s.load_mode || 'on-request';
+
+  // Populate the GGUF select with current options from the add-form's list
+  const srcSelect = document.getElementById('ws-gguf-select');
+  const dstSelect = document.getElementById('wse-gguf-select');
+  dstSelect.innerHTML = srcSelect ? srcSelect.innerHTML : '<option value="">Select downloaded GGUF</option>';
+
+  const modelPath = s.model_path || '';
+  const inGguf = modelPath && _ggufFiles.some(f => f.path === modelPath);
+  if (inGguf) {
+    document.getElementById('wse-model-source').value = 'cached-gguf';
+    dstSelect.value = modelPath;
+  } else {
+    document.getElementById('wse-model-source').value = 'manual-path';
+    document.getElementById('wse-model-path').value = modelPath;
+  }
+  toggleWseModelSource();
+  openModal('ws-edit-modal');
+}
+
+async function saveWhisperServerEdit() {
+  const model_id = document.getElementById('wse-model-id').value;
+  const model_source = document.getElementById('wse-model-source').value;
+  const model_path = _getWseModelPath();
+  if (!model_path) { alert('Model path is required'); return; }
+  const payload = {
+    model_id,
+    backend: 'whisper-server',
+    model_source,
+    server_path: document.getElementById('wse-server-path').value.trim(),
+    model_path,
+    port: parseInt(document.getElementById('wse-port').value, 10) || 8744,
+    gpu_device: parseInt(document.getElementById('wse-gpu-device').value, 10) || 0,
+    load_mode: document.getElementById('wse-load-mode').value,
+    alias: document.getElementById('wse-alias').value.trim() || null,
+  };
+  try {
+    const r = await fetch('/admin/api/model-configure', {
+      method: 'POST', headers: {'Content-Type': 'application/json'},
+      body: JSON.stringify(payload)
+    });
+    const d = await r.json();
+    if (!r.ok) throw new Error(d.detail || 'Failed to save');
+    closeModal('ws-edit-modal');
+    refreshLocal();
+  } catch(e) { alert('Error: ' + e.message); }
+}
+
 function openCfgModal(idx){
  const m = _localModels[idx];
+  if (m.cacheType === 'whisper-server') {
+    _openWhisperServerEdit(m);
+    return;
+  }
  const s = m.settings || {};
  document.getElementById('cfg-modal-title').textContent = m.in_config ? 'Configure model' : 'Add to CoderAI';
  document.getElementById('cfg-id-label').textContent = m.label;
@@ -1453,9 +1588,6 @@ function openCfgModal(idx){
  document.getElementById('cfg-parser').value = s.parser || 'auto';
  document.getElementById('cfg-tools').checked = !!s.tools_closer_prompt;
  document.getElementById('cfg-grammar').checked = !!s.grammar_guided;
-  if (m.cacheType === 'whisper-server') {
-    document.getElementById('cfg-backend').value = 'cpu';
-  }
  openModal('cfg-modal');
 }

@@ -1533,6 +1665,7 @@ async function addWhisperServerModel(){
    gpu_device: parseInt(document.getElementById('ws-gpu-device').value, 10) || 0,
    load_mode: document.getElementById('ws-load-mode').value,
    used_vram_gb: Number.isNaN(usedVram) ? null : usedVram,
+    alias: document.getElementById('ws-alias').value.trim() || null,
  };
  try{
    const r = await fetch('/admin/api/model-configure', {
@@ -1551,6 +1684,8 @@ async function addWhisperServerModel(){
    document.getElementById('ws-gpu-device').value = '0';
    document.getElementById('ws-load-mode').value = 'on-request';
    document.getElementById('ws-used-vram').value = '';
+    document.getElementById('ws-alias').value = '';
+    document.getElementById('ws-submit-btn').textContent = 'Add model';
    toggleWhisperModelSource();
    refreshLocal();
  }catch(e){ alert('Error: '+e.message); }

--- a/codai/api/transcriptions.py
+++ b/codai/api/transcriptions.py
@@ -134,8 +134,12 @@ async def create_transcription(
    if len(file_content) > _MAX_AUDIO_BYTES:
        raise HTTPException(status_code=413, detail="Audio file too large (max 100 MB)")

-    # Check if the requested model maps to a configured whisper-server instance first
-    whisper_server = multi_model_manager.whisper_servers.get(model)
+    # Check if the requested model maps to a configured whisper-server instance first.
+    # Try alias round-robin resolution before direct ID lookup.
+    whisper_server = (
+        multi_model_manager.resolve_whisper_alias(model)
+        or multi_model_manager.whisper_servers.get(model)
+    )
    if whisper_server is not None:
        multi_model_manager.request_model(requested_model=model, model_type="audio")
        if not whisper_server.is_running():

--- a/codai/main.py
+++ b/codai/main.py
@@ -373,12 +373,14 @@ def main():
            continue
        if isinstance(m, dict) and m.get("backend") == "whisper-server":
            cfg = _model_cfg(m, "audio")
+            alias = (m.get("alias") or "").strip() or None
            cfg.update({
                "backend": "whisper-server",
                "server_path": m.get("server_path", ""),
                "model_path": m.get("model_path") or None,
                "port": int(m.get("port", 8744)),
                "gpu_device": int(m.get("gpu_device", 0)),
+                "alias": alias,
            })
            multi_model_manager.register_whisper_server(
                model_id=mid,
@@ -387,6 +389,7 @@ def main():
                port=int(m.get("port", 8744)),
                gpu_device=int(m.get("gpu_device", 0)),
                config=cfg,
+                alias=alias,
            )
        else:
            multi_model_manager.set_audio_model(mid, config=_model_cfg(m, "audio"))

--- a/codai/models/manager.py
+++ b/codai/models/manager.py
@@ -499,6 +499,8 @@ class MultiModelManager:
        self.model_aliases: Dict[str, str] = {}
        self.whisper_server: Optional[WhisperServerManager] = None  # legacy single-instance compat
        self.whisper_servers: Dict[str, WhisperServerManager] = {}  # id -> manager
+        self.whisper_aliases: Dict[str, List[str]] = {}  # alias -> [model_id, ...]
+        self._whisper_alias_counters: Dict[str, int] = {}  # alias -> next round-robin index
        self.model_backend_types: Dict[str, str] = {}
        self.tool_breaker = FuzzyToolBreaker(threshold=3)  # Circuit breaker for repetitive tool calls
        self._load_lock = threading.Lock()  # Prevents duplicate on-demand model loads
@@ -761,7 +763,8 @@ class MultiModelManager:
            print(f"Audio model '{model_name}' cached as: {resolved_model}")

    def register_whisper_server(self, model_id: str, server_path: str, model_path: str = None,
-                                 port: int = 8744, gpu_device: int = 0, config: Dict = None):
+                                 port: int = 8744, gpu_device: int = 0, config: Dict = None,
+                                 alias: str = None):
        """Register a whisper-server instance as an audio model."""
        wsm = WhisperServerManager(server_path=server_path, port=port)
        wsm._model_path = model_path
@@ -776,8 +779,25 @@ class MultiModelManager:
        if model_id not in self.audio_models:
            self.audio_models.append(model_id)
        self.config[f"audio:{model_id}"] = cfg
-        print(f"Registered whisper-server audio model: {model_id} (server: {server_path})")
+        # Register alias for round-robin routing
+        if alias:
+            wsm._alias = alias
+            ids = self.whisper_aliases.setdefault(alias, [])
+            if model_id not in ids:
+                ids.append(model_id)
+            self._whisper_alias_counters.setdefault(alias, 0)
+        print(f"Registered whisper-server audio model: {model_id} (server: {server_path})"
+              + (f" alias={alias}" if alias else ""))
        return wsm
+
+    def resolve_whisper_alias(self, name: str) -> Optional[WhisperServerManager]:
+        """Return the next round-robin WhisperServerManager for an alias, or None."""
+        ids = self.whisper_aliases.get(name)
+        if not ids:
+            return None
+        idx = self._whisper_alias_counters.get(name, 0) % len(ids)
+        self._whisper_alias_counters[name] = idx + 1
+        return self.whisper_servers.get(ids[idx])
    
    def set_tts_model(self, model_name: str, config: Dict = None):
        """Set the text-to-speech model and download/cache it if needed."""
@@ -2033,6 +2053,8 @@ class MultiModelManager:
                capabilities=caps.to_list(),
                backend=meta.get("backend"),
                model_path=meta.get("model_path"),
+                server_path=meta.get("server_path"),
+                alias=meta.get("alias"),
                port=meta.get("port"),
                gpu_device=meta.get("gpu_device"),
                load_mode=meta.get("load_mode"),
@@ -2051,13 +2073,19 @@ class MultiModelManager:
                        if isinstance(m, str):
                            mid = m
                        else:
-                            mid = m.get("alias") or m.get("path") or m.get("id") or ""
                            raw = m.get("path") or m.get("id") or ""
-                            if raw and raw != mid:
-                                _add(raw, mtype, m)
-                                short = raw.split("/")[-1] if "/" in raw else raw
-                                if short != raw:
-                                    _add(short, mtype, m)
+                            alias = m.get("alias") or ""
+                            # whisper-server aliases are round-robin group keys shared across
+                            # multiple instances — don't expose the alias as a separate model
+                            if m.get("backend") == "whisper-server":
+                                mid = raw
+                            else:
+                                mid = alias or raw
+                                if raw and raw != mid:
+                                    _add(raw, mtype, m)
+                                    short = raw.split("/")[-1] if "/" in raw else raw
+                                    if short != raw:
+                                        _add(short, mtype, m)
                        if mid:
                            _add(mid, mtype, m if isinstance(m, dict) else None)
                            short = mid.split("/")[-1] if "/" in mid else mid

--- a/codai/pydantic/textrequest.py
+++ b/codai/pydantic/textrequest.py
@@ -123,6 +123,8 @@ class ModelInfo(BaseModel):
    capabilities: Optional[List[str]] = None  # list of capability strings
    backend: Optional[str] = None
    model_path: Optional[str] = None
+    server_path: Optional[str] = None
+    alias: Optional[str] = None
    port: Optional[int] = None
    gpu_device: Optional[int] = None
    load_mode: Optional[str] = None