Fix multimodel

aa34ff9a · Stefy Lanza (nextime / spora ) · f524118d · aa34ff9a · aa34ff9a · aa34ff9a
Commit aa34ff9a authored May 07, 2026 by Stefy Lanza (nextime / spora )
7 changed files
--- a/codai/admin/routes.py
+++ b/codai/admin/routes.py
@@ -338,17 +338,27 @@ async def api_status(username: str = Depends(require_auth)):
    except Exception:
        pass
-    # Enabled (configured) models
+    # Enabled (configured) models + aliases
    enabled_models = []
+    enabled_aliases: dict = {}  # alias -> [model_id, ...]
    try:
        if config_manager:
            md = config_manager.models_data
            for cat in ("text_models", "image_models", "audio_models", "vision_models", "tts_models",
                        "video_models", "audio_gen_models", "embedding_models"):
                for m in md.get(cat, []):
-                    mid = (m.get("path") or m.get("id") or m) if isinstance(m, dict) else m
+                    if isinstance(m, dict):
+                        mid = m.get("path") or m.get("id") or ""
+                        alias = (m.get("alias") or "").strip()
+                    else:
+                        mid = m
+                        alias = ""
                    if mid and mid not in enabled_models:
                        enabled_models.append(mid)
+                    if alias:
+                        enabled_aliases.setdefault(alias, [])
+                        if mid and mid not in enabled_aliases[alias]:
+                            enabled_aliases[alias].append(mid)
    except Exception:
        pass
@@ -399,6 +409,7 @@ async def api_status(username: str = Depends(require_auth)):
        "models_loaded": len(loaded_keys),
        "loaded_models": loaded_keys,
        "enabled_models": enabled_models,
+        "enabled_aliases": enabled_aliases,
        "vram": vram,
        "cuda": is_cuda,
        "requests": {
@@ -1356,13 +1367,13 @@ async def api_model_configure(request: Request, username: str = Depends(require_
        gpu_device = int(data.get("gpu_device", 0))
        if gpu_device < 0:
            raise HTTPException(status_code=400, detail="gpu_device must be >= 0")
-        for existing in config_manager.models_data.get("audio_models", []):
+        # Remove existing entry with same id (update semantics)
-            if (
+        audio_list = config_manager.models_data.get("audio_models", [])
-                isinstance(existing, dict)
+        config_manager.models_data["audio_models"] = [
-                and existing.get("backend") == "whisper-server"
+            m for m in audio_list
-                and existing.get("id") == model_id
+            if not (isinstance(m, dict) and m.get("id") == model_id)
-            ):
+        ]
-                raise HTTPException(status_code=409, detail=f"whisper-server model '{model_id}' already exists")
+        alias = (data.get("alias") or "").strip() or None
        entry = {
            "id": model_id,
            "backend": "whisper-server",
@@ -1374,11 +1385,16 @@ async def api_model_configure(request: Request, username: str = Depends(require_
            "model_type": "audio_models",
            "model_types": ["audio_models"],
        }
+        if alias:
+            entry["alias"] = alias
        if data.get("used_vram_gb") is not None:
            entry["used_vram_gb"] = data["used_vram_gb"]
        config_manager.models_data.setdefault("audio_models", []).append(entry)
        config_manager.save_models()
-        return {"success": True, "model_id": model_id, "model_path": model_path, "server_path": server_path}
+        result = {"success": True, "model_id": model_id, "model_path": model_path, "server_path": server_path}
+        if alias:
+            result["alias"] = alias
+        return result
    path = data.get("path") or data.get("model_id", "")
    valid = {"text_models", "image_models", "audio_models", "tts_models", "vision_models", "video_models",
             "audio_gen_models", "embedding_models"}

--- a/codai/admin/templates/dashboard.html
+++ b/codai/admin/templates/dashboard.html
@@ -79,11 +79,19 @@ async function poll() {
    const loaded = d.loaded_models || [];
    const enabled = d.enabled_models || [];
+    const aliases = d.enabled_aliases || {};
    const loadedSet = new Set(loaded);
    const notLoaded = enabled.filter(m => !loadedSet.has(m));
    let html = '';
    if(loaded.length) html += loaded.map(m => `<span class="badge badge-admin" style="margin:.125rem" title="Loaded">● ${m}</span>`).join('');
    if(notLoaded.length) html += notLoaded.map(m => `<span class="badge" style="margin:.125rem;opacity:.55" title="Enabled, not loaded">○ ${m}</span>`).join('');
+    const aliasEntries = Object.entries(aliases);
+    if(aliasEntries.length) {
+      html += aliasEntries.map(([alias, ids]) => {
+        const tip = ids.length > 1 ? `Round-robin alias → ${ids.join(', ')}` : `Alias → ${ids[0] || ''}`;
+        return `<span class="badge" style="margin:.125rem;background:var(--bg);border:1px solid var(--border);color:var(--text-2)" title="${tip}">⇄ ${alias}</span>`;
+      }).join('');
+    }
    document.getElementById('active-models').innerHTML = html || '<span class="muted small">No models loaded</span>';
    if (d.vram) {

--- a/codai/admin/templates/models.html
+++ b/codai/admin/templates/models.html
--- a/codai/api/transcriptions.py
+++ b/codai/api/transcriptions.py
@@ -134,8 +134,12 @@ async def create_transcription(
    if len(file_content) > _MAX_AUDIO_BYTES:
        raise HTTPException(status_code=413, detail="Audio file too large (max 100 MB)")
-    # Check if the requested model maps to a configured whisper-server instance first
+    # Check if the requested model maps to a configured whisper-server instance first.
-    whisper_server = multi_model_manager.whisper_servers.get(model)
+    # Try alias round-robin resolution before direct ID lookup.
+    whisper_server = (
+        multi_model_manager.resolve_whisper_alias(model)
+        or multi_model_manager.whisper_servers.get(model)
+    )
    if whisper_server is not None:
        multi_model_manager.request_model(requested_model=model, model_type="audio")
        if not whisper_server.is_running():

--- a/codai/main.py
+++ b/codai/main.py
@@ -373,12 +373,14 @@ def main():
            continue
        if isinstance(m, dict) and m.get("backend") == "whisper-server":
            cfg = _model_cfg(m, "audio")
+            alias = (m.get("alias") or "").strip() or None
            cfg.update({
                "backend": "whisper-server",
                "server_path": m.get("server_path", ""),
                "model_path": m.get("model_path") or None,
                "port": int(m.get("port", 8744)),
                "gpu_device": int(m.get("gpu_device", 0)),
+                "alias": alias,
            })
            multi_model_manager.register_whisper_server(
                model_id=mid,
@@ -387,6 +389,7 @@ def main():
                port=int(m.get("port", 8744)),
                gpu_device=int(m.get("gpu_device", 0)),
                config=cfg,
+                alias=alias,
            )
        else:
            multi_model_manager.set_audio_model(mid, config=_model_cfg(m, "audio"))

--- a/codai/models/manager.py
+++ b/codai/models/manager.py
@@ -499,6 +499,8 @@ class MultiModelManager:
        self.model_aliases: Dict[str, str] = {}
        self.whisper_server: Optional[WhisperServerManager] = None  # legacy single-instance compat
        self.whisper_servers: Dict[str, WhisperServerManager] = {}  # id -> manager
+        self.whisper_aliases: Dict[str, List[str]] = {}  # alias -> [model_id, ...]
+        self._whisper_alias_counters: Dict[str, int] = {}  # alias -> next round-robin index
        self.model_backend_types: Dict[str, str] = {}
        self.tool_breaker = FuzzyToolBreaker(threshold=3)  # Circuit breaker for repetitive tool calls
        self._load_lock = threading.Lock()  # Prevents duplicate on-demand model loads
@@ -761,7 +763,8 @@ class MultiModelManager:
            print(f"Audio model '{model_name}' cached as: {resolved_model}")
    def register_whisper_server(self, model_id: str, server_path: str, model_path: str = None,
-                                 port: int = 8744, gpu_device: int = 0, config: Dict = None):
+                                 port: int = 8744, gpu_device: int = 0, config: Dict = None,
+                                 alias: str = None):
        """Register a whisper-server instance as an audio model."""
        wsm = WhisperServerManager(server_path=server_path, port=port)
        wsm._model_path = model_path
@@ -776,8 +779,25 @@ class MultiModelManager:
        if model_id not in self.audio_models:
            self.audio_models.append(model_id)
        self.config[f"audio:{model_id}"] = cfg
-        print(f"Registered whisper-server audio model: {model_id} (server: {server_path})")
+        # Register alias for round-robin routing
+        if alias:
+            wsm._alias = alias
+            ids = self.whisper_aliases.setdefault(alias, [])
+            if model_id not in ids:
+                ids.append(model_id)
+            self._whisper_alias_counters.setdefault(alias, 0)
+        print(f"Registered whisper-server audio model: {model_id} (server: {server_path})"
+              + (f" alias={alias}" if alias else ""))
        return wsm
+    def resolve_whisper_alias(self, name: str) -> Optional[WhisperServerManager]:
+        """Return the next round-robin WhisperServerManager for an alias, or None."""
+        ids = self.whisper_aliases.get(name)
+        if not ids:
+            return None
+        idx = self._whisper_alias_counters.get(name, 0) % len(ids)
+        self._whisper_alias_counters[name] = idx + 1
+        return self.whisper_servers.get(ids[idx])
    def set_tts_model(self, model_name: str, config: Dict = None):
        """Set the text-to-speech model and download/cache it if needed."""
@@ -2033,6 +2053,8 @@ class MultiModelManager:
                capabilities=caps.to_list(),
                backend=meta.get("backend"),
                model_path=meta.get("model_path"),
+                server_path=meta.get("server_path"),
+                alias=meta.get("alias"),
                port=meta.get("port"),
                gpu_device=meta.get("gpu_device"),
                load_mode=meta.get("load_mode"),
@@ -2051,13 +2073,19 @@ class MultiModelManager:
                        if isinstance(m, str):
                            mid = m
                        else:
-                            mid = m.get("alias") or m.get("path") or m.get("id") or ""
                            raw = m.get("path") or m.get("id") or ""
-                            if raw and raw != mid:
+                            alias = m.get("alias") or ""
-                                _add(raw, mtype, m)
+                            # whisper-server aliases are round-robin group keys shared across
-                                short = raw.split("/")[-1] if "/" in raw else raw
+                            # multiple instances — don't expose the alias as a separate model
-                                if short != raw:
+                            if m.get("backend") == "whisper-server":
-                                    _add(short, mtype, m)
+                                mid = raw
+                            else:
+                                mid = alias or raw
+                                if raw and raw != mid:
+                                    _add(raw, mtype, m)
+                                    short = raw.split("/")[-1] if "/" in raw else raw
+                                    if short != raw:
+                                        _add(short, mtype, m)
                        if mid:
                            _add(mid, mtype, m if isinstance(m, dict) else None)
                            short = mid.split("/")[-1] if "/" in mid else mid

--- a/codai/pydantic/textrequest.py
+++ b/codai/pydantic/textrequest.py
@@ -123,6 +123,8 @@ class ModelInfo(BaseModel):
    capabilities: Optional[List[str]] = None  # list of capability strings
    backend: Optional[str] = None
    model_path: Optional[str] = None
+    server_path: Optional[str] = None
+    alias: Optional[str] = None
    port: Optional[int] = None
    gpu_device: Optional[int] = None
    load_mode: Optional[str] = None