feat: manage whisper-server models from local models page

parent f31e99bd
...@@ -94,6 +94,29 @@ ...@@ -94,6 +94,29 @@
<div class="card-title">GGUF files <span id="gguf-file-badge" class="muted small"></span></div> <div class="card-title">GGUF files <span id="gguf-file-badge" class="muted small"></span></div>
<div id="gguf-models-list"><span class="muted small">Loading…</span></div> <div id="gguf-models-list"><span class="muted small">Loading…</span></div>
</div> </div>
<div class="card mb-0" style="margin-top:1rem" id="ws-model-builder">
<div class="card-title">Whisper-server simulated models</div>
<p class="muted small" style="margin-top:0">Create local audio models backed by dedicated whisper-server subprocess configurations.</p>
<div style="display:grid;grid-template-columns:repeat(3,minmax(0,1fr));gap:.75rem">
<input id="ws-model-id" class="form-input" placeholder="whisper-vulkan-base">
<input id="ws-server-path" class="form-input" placeholder="/usr/local/bin/whisper-server">
<input id="ws-model-path" class="form-input" placeholder="/models/ggml-base.bin">
<input id="ws-port" class="form-input" type="number" value="8744" min="1" max="65535">
<input id="ws-gpu-device" class="form-input" type="number" value="0" min="0">
<select id="ws-load-mode" class="form-input">
<option value="on-request">On request</option>
<option value="load">Load</option>
</select>
</div>
<div style="display:grid;grid-template-columns:repeat(2,minmax(0,1fr));gap:.75rem;margin-top:.75rem">
<input id="ws-used-vram" class="form-input" type="number" min="0" step="0.1" placeholder="Used VRAM (optional)">
<div></div>
</div>
<div class="form-actions" style="margin-top:.75rem">
<button class="btn btn-primary" onclick="addWhisperServerModel()">Add model</button>
</div>
</div>
</div> </div>
<!-- SEARCH --> <!-- SEARCH -->
...@@ -939,6 +962,57 @@ async function loadCacheStats(){ ...@@ -939,6 +962,57 @@ async function loadCacheStats(){
let _localModels = []; let _localModels = [];
function _renderWhisperServerRows(models){
if(!models.length) return '';
const rows = models.map(m=>{
const idx = _localModels.length;
_localModels.push({
label:m.id,
path:m.id,
cacheType:'whisper-server',
size_gb:0,
defaultType:'audio_models',
settings:{
backend:m.backend || 'whisper-server',
load_mode:m.load_mode || 'on-request',
model_type:'audio_models',
model_path:m.model_path || '',
port:m.port,
gpu_device:m.gpu_device,
},
in_config:true,
capabilities:m.capabilities || ['speech_to_text']
});
const loaded = _loadedKeys.has(`audio:${m.id}`) || _loadedKeys.has(m.id);
return `<tr style="border-top:1px solid var(--border)">
<td style="padding:.4rem .25rem;font-family:monospace;font-size:12px">${esc(m.id)}</td>
<td style="padding:.4rem .25rem"><span class="badge badge-ok">${esc(m.backend || 'whisper-server')}</span></td>
<td style="padding:.4rem .25rem;font-size:11px;color:var(--text-2)">${esc(m.model_path || '—')}</td>
<td style="padding:.4rem .25rem;font-size:11px;color:var(--text-2)">${m.port ?? '—'} / GPU ${m.gpu_device ?? 0}</td>
<td style="padding:.4rem .25rem;font-size:11px;color:var(--text-2)">${esc(m.load_mode || 'on-request')}</td>
<td style="padding:.4rem .25rem;text-align:center">${loaded?'<span class="badge badge-ok">loaded</span>':'<span class="muted small">idle</span>'}</td>
<td style="padding:.4rem .25rem;text-align:right;white-space:nowrap">
${loaded
?`<button class="btn btn-ghost btn-sm" onclick="unloadModel(${idx})">Unload</button>`
:`<button class="btn btn-primary btn-sm" onclick="loadModel(${idx})">Load now</button>`}
<button class="btn btn-secondary btn-sm" onclick="openCfgModal(${idx})">Configure</button>
<button class="btn btn-ghost btn-sm" onclick="disableModel(${idx})">Remove</button>
</td>
</tr>`;
});
return '<div class="card" style="margin-top:1rem">'+
'<div class="card-title">Configured whisper-server models</div>'+
'<table style="width:100%;border-collapse:collapse;font-size:13px">'+
'<thead><tr style="color:var(--text-2);font-size:10px;text-transform:uppercase;letter-spacing:.05em">'+
'<th style="text-align:left;padding:.3rem .25rem;font-weight:700">Model</th>'+
'<th style="text-align:left;padding:.3rem .25rem;font-weight:700">Backend</th>'+
'<th style="text-align:left;padding:.3rem .25rem;font-weight:700">Model path</th>'+
'<th style="text-align:left;padding:.3rem .25rem;font-weight:700">Port / GPU</th>'+
'<th style="text-align:left;padding:.3rem .25rem;font-weight:700">Load mode</th>'+
'<th style="text-align:center;padding:.3rem .25rem;font-weight:700">Status</th>'+
'<th></th></tr></thead><tbody>'+rows.join('')+'</tbody></table></div>';
}
async function loadCachedModels(){ async function loadCachedModels(){
_localModels = []; _localModels = [];
const hfEl = document.getElementById('hf-models-list'); const hfEl = document.getElementById('hf-models-list');
...@@ -948,6 +1022,8 @@ async function loadCachedModels(){ ...@@ -948,6 +1022,8 @@ async function loadCachedModels(){
const r = await fetch('/admin/api/cached-models'); const r = await fetch('/admin/api/cached-models');
if(!r.ok) throw new Error((await r.json()).detail||r.statusText); if(!r.ok) throw new Error((await r.json()).detail||r.statusText);
const d = await r.json(); const d = await r.json();
const whisperModels = (await fetch('/admin/api/models').then(r=>r.ok?r.json():[]))
.filter(m => m.backend === 'whisper-server');
// HF models // HF models
const hf = d.hf||[]; const hf = d.hf||[];
...@@ -1022,6 +1098,7 @@ async function loadCachedModels(){ ...@@ -1022,6 +1098,7 @@ async function loadCachedModels(){
'<th style="text-align:center;padding:.3rem .25rem;font-weight:700">Config</th>'+ '<th style="text-align:center;padding:.3rem .25rem;font-weight:700">Config</th>'+
'<th></th></tr></thead><tbody>'+rows.join('')+'</tbody></table>'; '<th></th></tr></thead><tbody>'+rows.join('')+'</tbody></table>';
} }
ggufEl.insertAdjacentHTML('afterend', _renderWhisperServerRows(whisperModels));
}catch(e){ }catch(e){
hfEl.innerHTML = ggufEl.innerHTML = `<span class="muted small">Error: ${esc(e.message)}</span>`; hfEl.innerHTML = ggufEl.innerHTML = `<span class="muted small">Error: ${esc(e.message)}</span>`;
} }
...@@ -1103,6 +1180,9 @@ function openCfgModal(idx){ ...@@ -1103,6 +1180,9 @@ function openCfgModal(idx){
document.getElementById('cfg-parser').value = s.parser || 'auto'; document.getElementById('cfg-parser').value = s.parser || 'auto';
document.getElementById('cfg-tools').checked = !!s.tools_closer_prompt; document.getElementById('cfg-tools').checked = !!s.tools_closer_prompt;
document.getElementById('cfg-grammar').checked = !!s.grammar_guided; document.getElementById('cfg-grammar').checked = !!s.grammar_guided;
if (m.cacheType === 'whisper-server') {
document.getElementById('cfg-backend').value = 'cpu';
}
openModal('cfg-modal'); openModal('cfg-modal');
} }
...@@ -1150,6 +1230,31 @@ async function saveModelConfig(){ ...@@ -1150,6 +1230,31 @@ async function saveModelConfig(){
}catch(e){ alert('Error: '+e.message); } }catch(e){ alert('Error: '+e.message); }
} }
async function addWhisperServerModel(){
const usedVram = parseFloat(document.getElementById('ws-used-vram').value);
const payload = {
model_id: document.getElementById('ws-model-id').value.trim(),
model_type: 'audio_models',
backend: 'whisper-server',
server_path: document.getElementById('ws-server-path').value.trim(),
model_path: document.getElementById('ws-model-path').value.trim() || null,
port: parseInt(document.getElementById('ws-port').value, 10) || 8744,
gpu_device: parseInt(document.getElementById('ws-gpu-device').value, 10) || 0,
load_mode: document.getElementById('ws-load-mode').value,
used_vram_gb: Number.isNaN(usedVram) ? null : usedVram,
};
try{
const r = await fetch('/admin/api/model-configure', {
method:'POST',
headers:{'Content-Type':'application/json'},
body: JSON.stringify(payload)
});
const d = await r.json();
if(!r.ok) throw new Error(d.detail || 'Failed to add whisper-server model');
refreshLocal();
}catch(e){ alert('Error: '+e.message); }
}
async function loadModel(idx){ async function loadModel(idx){
const m = _localModels[idx]; const m = _localModels[idx];
// Find the button and show loading state // Find the button and show loading state
......
...@@ -1744,16 +1744,22 @@ class MultiModelManager: ...@@ -1744,16 +1744,22 @@ class MultiModelManager:
"embedding_models": "embedding", "embedding_models": "embedding",
} }
def _add(model_id: str, model_type: str = None): def _add(model_id: str, model_type: str = None, meta: Dict[str, Any] = None):
if model_id in seen_ids: if model_id in seen_ids:
return return
seen_ids.add(model_id) seen_ids.add(model_id)
caps = detect_model_capabilities(model_id) caps = detect_model_capabilities(model_id)
resolved_type = model_type or (caps.to_list()[0].split("_")[0] if caps.to_list() else "text") resolved_type = model_type or (caps.to_list()[0].split("_")[0] if caps.to_list() else "text")
meta = meta or {}
models.append(ModelInfo( models.append(ModelInfo(
id=model_id, id=model_id,
type=resolved_type, type=resolved_type,
capabilities=caps.to_list(), capabilities=caps.to_list(),
backend=meta.get("backend"),
model_path=meta.get("model_path"),
port=meta.get("port"),
gpu_device=meta.get("gpu_device"),
load_mode=meta.get("load_mode"),
)) ))
# --- Models from config (the authoritative source) --- # --- Models from config (the authoritative source) ---
...@@ -1772,15 +1778,15 @@ class MultiModelManager: ...@@ -1772,15 +1778,15 @@ class MultiModelManager:
mid = m.get("alias") or m.get("path") or m.get("id") or "" mid = m.get("alias") or m.get("path") or m.get("id") or ""
raw = m.get("path") or m.get("id") or "" raw = m.get("path") or m.get("id") or ""
if raw and raw != mid: if raw and raw != mid:
_add(raw, mtype) _add(raw, mtype, m)
short = raw.split("/")[-1] if "/" in raw else raw short = raw.split("/")[-1] if "/" in raw else raw
if short != raw: if short != raw:
_add(short, mtype) _add(short, mtype, m)
if mid: if mid:
_add(mid, mtype) _add(mid, mtype, m if isinstance(m, dict) else None)
short = mid.split("/")[-1] if "/" in mid else mid short = mid.split("/")[-1] if "/" in mid else mid
if short != mid: if short != mid:
_add(short, mtype) _add(short, mtype, m if isinstance(m, dict) else None)
except Exception: except Exception:
pass pass
......
...@@ -121,8 +121,13 @@ class ModelInfo(BaseModel): ...@@ -121,8 +121,13 @@ class ModelInfo(BaseModel):
owned_by: str = "huggingface" owned_by: str = "huggingface"
type: Optional[str] = None # e.g. "text", "image", "video", "audio", "tts", "vision", "embedding" type: Optional[str] = None # e.g. "text", "image", "video", "audio", "tts", "vision", "embedding"
capabilities: Optional[List[str]] = None # list of capability strings capabilities: Optional[List[str]] = None # list of capability strings
backend: Optional[str] = None
model_path: Optional[str] = None
port: Optional[int] = None
gpu_device: Optional[int] = None
load_mode: Optional[str] = None
class ModelList(BaseModel): class ModelList(BaseModel):
object: str = "list" object: str = "list"
data: List[ModelInfo] data: List[ModelInfo]
\ No newline at end of file
...@@ -290,3 +290,44 @@ def test_settings_api_does_not_return_whisper_fields(monkeypatch): ...@@ -290,3 +290,44 @@ def test_settings_api_does_not_return_whisper_fields(monkeypatch):
assert "whisper" not in response.json() assert "whisper" not in response.json()
app.dependency_overrides.clear() app.dependency_overrides.clear()
def test_models_template_contains_whisper_server_add_model_form():
template = Path("codai/admin/templates/models.html").read_text()
assert "Whisper-server simulated models" in template
assert "Add model" in template
assert "ws-model-id" in template
assert "ws-server-path" in template
def test_settings_template_no_longer_contains_whisper_server_section():
template = Path("codai/admin/templates/settings.html").read_text()
assert "Whisper Server" not in template
assert "wsStart" not in template
assert "wsStop" not in template
def test_model_info_supports_whisper_server_metadata_fields():
content = Path("codai/pydantic/textrequest.py").read_text()
assert "backend: Optional[str] = None" in content
assert "model_path: Optional[str] = None" in content
assert "port: Optional[int] = None" in content
assert "gpu_device: Optional[int] = None" in content
assert "load_mode: Optional[str] = None" in content
def test_removed_whisper_server_admin_routes_return_not_found(monkeypatch):
from codai.admin import routes
from codai.api.app import app
app.dependency_overrides[routes.require_admin] = lambda: "admin"
client = TestClient(app)
assert client.get("/admin/api/whisper-server/status").status_code == 404
assert client.post("/admin/api/whisper-server/start", json={}).status_code == 404
assert client.post("/admin/api/whisper-server/stop", json={}).status_code == 404
app.dependency_overrides.clear()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment