Fix multimodel

parent f524118d
...@@ -338,17 +338,27 @@ async def api_status(username: str = Depends(require_auth)): ...@@ -338,17 +338,27 @@ async def api_status(username: str = Depends(require_auth)):
except Exception: except Exception:
pass pass
# Enabled (configured) models # Enabled (configured) models + aliases
enabled_models = [] enabled_models = []
enabled_aliases: dict = {} # alias -> [model_id, ...]
try: try:
if config_manager: if config_manager:
md = config_manager.models_data md = config_manager.models_data
for cat in ("text_models", "image_models", "audio_models", "vision_models", "tts_models", for cat in ("text_models", "image_models", "audio_models", "vision_models", "tts_models",
"video_models", "audio_gen_models", "embedding_models"): "video_models", "audio_gen_models", "embedding_models"):
for m in md.get(cat, []): for m in md.get(cat, []):
mid = (m.get("path") or m.get("id") or m) if isinstance(m, dict) else m if isinstance(m, dict):
mid = m.get("path") or m.get("id") or ""
alias = (m.get("alias") or "").strip()
else:
mid = m
alias = ""
if mid and mid not in enabled_models: if mid and mid not in enabled_models:
enabled_models.append(mid) enabled_models.append(mid)
if alias:
enabled_aliases.setdefault(alias, [])
if mid and mid not in enabled_aliases[alias]:
enabled_aliases[alias].append(mid)
except Exception: except Exception:
pass pass
...@@ -399,6 +409,7 @@ async def api_status(username: str = Depends(require_auth)): ...@@ -399,6 +409,7 @@ async def api_status(username: str = Depends(require_auth)):
"models_loaded": len(loaded_keys), "models_loaded": len(loaded_keys),
"loaded_models": loaded_keys, "loaded_models": loaded_keys,
"enabled_models": enabled_models, "enabled_models": enabled_models,
"enabled_aliases": enabled_aliases,
"vram": vram, "vram": vram,
"cuda": is_cuda, "cuda": is_cuda,
"requests": { "requests": {
...@@ -1356,13 +1367,13 @@ async def api_model_configure(request: Request, username: str = Depends(require_ ...@@ -1356,13 +1367,13 @@ async def api_model_configure(request: Request, username: str = Depends(require_
gpu_device = int(data.get("gpu_device", 0)) gpu_device = int(data.get("gpu_device", 0))
if gpu_device < 0: if gpu_device < 0:
raise HTTPException(status_code=400, detail="gpu_device must be >= 0") raise HTTPException(status_code=400, detail="gpu_device must be >= 0")
for existing in config_manager.models_data.get("audio_models", []): # Remove existing entry with same id (update semantics)
if ( audio_list = config_manager.models_data.get("audio_models", [])
isinstance(existing, dict) config_manager.models_data["audio_models"] = [
and existing.get("backend") == "whisper-server" m for m in audio_list
and existing.get("id") == model_id if not (isinstance(m, dict) and m.get("id") == model_id)
): ]
raise HTTPException(status_code=409, detail=f"whisper-server model '{model_id}' already exists") alias = (data.get("alias") or "").strip() or None
entry = { entry = {
"id": model_id, "id": model_id,
"backend": "whisper-server", "backend": "whisper-server",
...@@ -1374,11 +1385,16 @@ async def api_model_configure(request: Request, username: str = Depends(require_ ...@@ -1374,11 +1385,16 @@ async def api_model_configure(request: Request, username: str = Depends(require_
"model_type": "audio_models", "model_type": "audio_models",
"model_types": ["audio_models"], "model_types": ["audio_models"],
} }
if alias:
entry["alias"] = alias
if data.get("used_vram_gb") is not None: if data.get("used_vram_gb") is not None:
entry["used_vram_gb"] = data["used_vram_gb"] entry["used_vram_gb"] = data["used_vram_gb"]
config_manager.models_data.setdefault("audio_models", []).append(entry) config_manager.models_data.setdefault("audio_models", []).append(entry)
config_manager.save_models() config_manager.save_models()
return {"success": True, "model_id": model_id, "model_path": model_path, "server_path": server_path} result = {"success": True, "model_id": model_id, "model_path": model_path, "server_path": server_path}
if alias:
result["alias"] = alias
return result
path = data.get("path") or data.get("model_id", "") path = data.get("path") or data.get("model_id", "")
valid = {"text_models", "image_models", "audio_models", "tts_models", "vision_models", "video_models", valid = {"text_models", "image_models", "audio_models", "tts_models", "vision_models", "video_models",
"audio_gen_models", "embedding_models"} "audio_gen_models", "embedding_models"}
......
...@@ -79,11 +79,19 @@ async function poll() { ...@@ -79,11 +79,19 @@ async function poll() {
const loaded = d.loaded_models || []; const loaded = d.loaded_models || [];
const enabled = d.enabled_models || []; const enabled = d.enabled_models || [];
const aliases = d.enabled_aliases || {};
const loadedSet = new Set(loaded); const loadedSet = new Set(loaded);
const notLoaded = enabled.filter(m => !loadedSet.has(m)); const notLoaded = enabled.filter(m => !loadedSet.has(m));
let html = ''; let html = '';
if(loaded.length) html += loaded.map(m => `<span class="badge badge-admin" style="margin:.125rem" title="Loaded">● ${m}</span>`).join(''); if(loaded.length) html += loaded.map(m => `<span class="badge badge-admin" style="margin:.125rem" title="Loaded">● ${m}</span>`).join('');
if(notLoaded.length) html += notLoaded.map(m => `<span class="badge" style="margin:.125rem;opacity:.55" title="Enabled, not loaded">○ ${m}</span>`).join(''); if(notLoaded.length) html += notLoaded.map(m => `<span class="badge" style="margin:.125rem;opacity:.55" title="Enabled, not loaded">○ ${m}</span>`).join('');
const aliasEntries = Object.entries(aliases);
if(aliasEntries.length) {
html += aliasEntries.map(([alias, ids]) => {
const tip = ids.length > 1 ? `Round-robin alias → ${ids.join(', ')}` : `Alias → ${ids[0] || ''}`;
return `<span class="badge" style="margin:.125rem;background:var(--bg);border:1px solid var(--border);color:var(--text-2)" title="${tip}">⇄ ${alias}</span>`;
}).join('');
}
document.getElementById('active-models').innerHTML = html || '<span class="muted small">No models loaded</span>'; document.getElementById('active-models').innerHTML = html || '<span class="muted small">No models loaded</span>';
if (d.vram) { if (d.vram) {
......
This diff is collapsed.
...@@ -134,8 +134,12 @@ async def create_transcription( ...@@ -134,8 +134,12 @@ async def create_transcription(
if len(file_content) > _MAX_AUDIO_BYTES: if len(file_content) > _MAX_AUDIO_BYTES:
raise HTTPException(status_code=413, detail="Audio file too large (max 100 MB)") raise HTTPException(status_code=413, detail="Audio file too large (max 100 MB)")
# Check if the requested model maps to a configured whisper-server instance first # Check if the requested model maps to a configured whisper-server instance first.
whisper_server = multi_model_manager.whisper_servers.get(model) # Try alias round-robin resolution before direct ID lookup.
whisper_server = (
multi_model_manager.resolve_whisper_alias(model)
or multi_model_manager.whisper_servers.get(model)
)
if whisper_server is not None: if whisper_server is not None:
multi_model_manager.request_model(requested_model=model, model_type="audio") multi_model_manager.request_model(requested_model=model, model_type="audio")
if not whisper_server.is_running(): if not whisper_server.is_running():
......
...@@ -373,12 +373,14 @@ def main(): ...@@ -373,12 +373,14 @@ def main():
continue continue
if isinstance(m, dict) and m.get("backend") == "whisper-server": if isinstance(m, dict) and m.get("backend") == "whisper-server":
cfg = _model_cfg(m, "audio") cfg = _model_cfg(m, "audio")
alias = (m.get("alias") or "").strip() or None
cfg.update({ cfg.update({
"backend": "whisper-server", "backend": "whisper-server",
"server_path": m.get("server_path", ""), "server_path": m.get("server_path", ""),
"model_path": m.get("model_path") or None, "model_path": m.get("model_path") or None,
"port": int(m.get("port", 8744)), "port": int(m.get("port", 8744)),
"gpu_device": int(m.get("gpu_device", 0)), "gpu_device": int(m.get("gpu_device", 0)),
"alias": alias,
}) })
multi_model_manager.register_whisper_server( multi_model_manager.register_whisper_server(
model_id=mid, model_id=mid,
...@@ -387,6 +389,7 @@ def main(): ...@@ -387,6 +389,7 @@ def main():
port=int(m.get("port", 8744)), port=int(m.get("port", 8744)),
gpu_device=int(m.get("gpu_device", 0)), gpu_device=int(m.get("gpu_device", 0)),
config=cfg, config=cfg,
alias=alias,
) )
else: else:
multi_model_manager.set_audio_model(mid, config=_model_cfg(m, "audio")) multi_model_manager.set_audio_model(mid, config=_model_cfg(m, "audio"))
......
...@@ -499,6 +499,8 @@ class MultiModelManager: ...@@ -499,6 +499,8 @@ class MultiModelManager:
self.model_aliases: Dict[str, str] = {} self.model_aliases: Dict[str, str] = {}
self.whisper_server: Optional[WhisperServerManager] = None # legacy single-instance compat self.whisper_server: Optional[WhisperServerManager] = None # legacy single-instance compat
self.whisper_servers: Dict[str, WhisperServerManager] = {} # id -> manager self.whisper_servers: Dict[str, WhisperServerManager] = {} # id -> manager
self.whisper_aliases: Dict[str, List[str]] = {} # alias -> [model_id, ...]
self._whisper_alias_counters: Dict[str, int] = {} # alias -> next round-robin index
self.model_backend_types: Dict[str, str] = {} self.model_backend_types: Dict[str, str] = {}
self.tool_breaker = FuzzyToolBreaker(threshold=3) # Circuit breaker for repetitive tool calls self.tool_breaker = FuzzyToolBreaker(threshold=3) # Circuit breaker for repetitive tool calls
self._load_lock = threading.Lock() # Prevents duplicate on-demand model loads self._load_lock = threading.Lock() # Prevents duplicate on-demand model loads
...@@ -761,7 +763,8 @@ class MultiModelManager: ...@@ -761,7 +763,8 @@ class MultiModelManager:
print(f"Audio model '{model_name}' cached as: {resolved_model}") print(f"Audio model '{model_name}' cached as: {resolved_model}")
def register_whisper_server(self, model_id: str, server_path: str, model_path: str = None, def register_whisper_server(self, model_id: str, server_path: str, model_path: str = None,
port: int = 8744, gpu_device: int = 0, config: Dict = None): port: int = 8744, gpu_device: int = 0, config: Dict = None,
alias: str = None):
"""Register a whisper-server instance as an audio model.""" """Register a whisper-server instance as an audio model."""
wsm = WhisperServerManager(server_path=server_path, port=port) wsm = WhisperServerManager(server_path=server_path, port=port)
wsm._model_path = model_path wsm._model_path = model_path
...@@ -776,8 +779,25 @@ class MultiModelManager: ...@@ -776,8 +779,25 @@ class MultiModelManager:
if model_id not in self.audio_models: if model_id not in self.audio_models:
self.audio_models.append(model_id) self.audio_models.append(model_id)
self.config[f"audio:{model_id}"] = cfg self.config[f"audio:{model_id}"] = cfg
print(f"Registered whisper-server audio model: {model_id} (server: {server_path})") # Register alias for round-robin routing
if alias:
wsm._alias = alias
ids = self.whisper_aliases.setdefault(alias, [])
if model_id not in ids:
ids.append(model_id)
self._whisper_alias_counters.setdefault(alias, 0)
print(f"Registered whisper-server audio model: {model_id} (server: {server_path})"
+ (f" alias={alias}" if alias else ""))
return wsm return wsm
def resolve_whisper_alias(self, name: str) -> Optional[WhisperServerManager]:
"""Return the next round-robin WhisperServerManager for an alias, or None."""
ids = self.whisper_aliases.get(name)
if not ids:
return None
idx = self._whisper_alias_counters.get(name, 0) % len(ids)
self._whisper_alias_counters[name] = idx + 1
return self.whisper_servers.get(ids[idx])
def set_tts_model(self, model_name: str, config: Dict = None): def set_tts_model(self, model_name: str, config: Dict = None):
"""Set the text-to-speech model and download/cache it if needed.""" """Set the text-to-speech model and download/cache it if needed."""
...@@ -2033,6 +2053,8 @@ class MultiModelManager: ...@@ -2033,6 +2053,8 @@ class MultiModelManager:
capabilities=caps.to_list(), capabilities=caps.to_list(),
backend=meta.get("backend"), backend=meta.get("backend"),
model_path=meta.get("model_path"), model_path=meta.get("model_path"),
server_path=meta.get("server_path"),
alias=meta.get("alias"),
port=meta.get("port"), port=meta.get("port"),
gpu_device=meta.get("gpu_device"), gpu_device=meta.get("gpu_device"),
load_mode=meta.get("load_mode"), load_mode=meta.get("load_mode"),
...@@ -2051,13 +2073,19 @@ class MultiModelManager: ...@@ -2051,13 +2073,19 @@ class MultiModelManager:
if isinstance(m, str): if isinstance(m, str):
mid = m mid = m
else: else:
mid = m.get("alias") or m.get("path") or m.get("id") or ""
raw = m.get("path") or m.get("id") or "" raw = m.get("path") or m.get("id") or ""
if raw and raw != mid: alias = m.get("alias") or ""
_add(raw, mtype, m) # whisper-server aliases are round-robin group keys shared across
short = raw.split("/")[-1] if "/" in raw else raw # multiple instances — don't expose the alias as a separate model
if short != raw: if m.get("backend") == "whisper-server":
_add(short, mtype, m) mid = raw
else:
mid = alias or raw
if raw and raw != mid:
_add(raw, mtype, m)
short = raw.split("/")[-1] if "/" in raw else raw
if short != raw:
_add(short, mtype, m)
if mid: if mid:
_add(mid, mtype, m if isinstance(m, dict) else None) _add(mid, mtype, m if isinstance(m, dict) else None)
short = mid.split("/")[-1] if "/" in mid else mid short = mid.split("/")[-1] if "/" in mid else mid
......
...@@ -123,6 +123,8 @@ class ModelInfo(BaseModel): ...@@ -123,6 +123,8 @@ class ModelInfo(BaseModel):
capabilities: Optional[List[str]] = None # list of capability strings capabilities: Optional[List[str]] = None # list of capability strings
backend: Optional[str] = None backend: Optional[str] = None
model_path: Optional[str] = None model_path: Optional[str] = None
server_path: Optional[str] = None
alias: Optional[str] = None
port: Optional[int] = None port: Optional[int] = None
gpu_device: Optional[int] = None gpu_device: Optional[int] = None
load_mode: Optional[str] = None load_mode: Optional[str] = None
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment