Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in
Toggle navigation
C
coderai
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
nexlab
coderai
Commits
aa34ff9a
Commit
aa34ff9a
authored
May 07, 2026
by
Stefy Lanza (nextime / spora )
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Fix multimodel
parent
f524118d
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
233 additions
and
37 deletions
+233
-37
routes.py
codai/admin/routes.py
+26
-10
dashboard.html
codai/admin/templates/dashboard.html
+8
-0
models.html
codai/admin/templates/models.html
+152
-17
transcriptions.py
codai/api/transcriptions.py
+6
-2
main.py
codai/main.py
+3
-0
manager.py
codai/models/manager.py
+36
-8
textrequest.py
codai/pydantic/textrequest.py
+2
-0
No files found.
codai/admin/routes.py
View file @
aa34ff9a
...
...
@@ -338,17 +338,27 @@ async def api_status(username: str = Depends(require_auth)):
except
Exception
:
pass
# Enabled (configured) models
# Enabled (configured) models
+ aliases
enabled_models
=
[]
enabled_aliases
:
dict
=
{}
# alias -> [model_id, ...]
try
:
if
config_manager
:
md
=
config_manager
.
models_data
for
cat
in
(
"text_models"
,
"image_models"
,
"audio_models"
,
"vision_models"
,
"tts_models"
,
"video_models"
,
"audio_gen_models"
,
"embedding_models"
):
for
m
in
md
.
get
(
cat
,
[]):
mid
=
(
m
.
get
(
"path"
)
or
m
.
get
(
"id"
)
or
m
)
if
isinstance
(
m
,
dict
)
else
m
if
isinstance
(
m
,
dict
):
mid
=
m
.
get
(
"path"
)
or
m
.
get
(
"id"
)
or
""
alias
=
(
m
.
get
(
"alias"
)
or
""
)
.
strip
()
else
:
mid
=
m
alias
=
""
if
mid
and
mid
not
in
enabled_models
:
enabled_models
.
append
(
mid
)
if
alias
:
enabled_aliases
.
setdefault
(
alias
,
[])
if
mid
and
mid
not
in
enabled_aliases
[
alias
]:
enabled_aliases
[
alias
]
.
append
(
mid
)
except
Exception
:
pass
...
...
@@ -399,6 +409,7 @@ async def api_status(username: str = Depends(require_auth)):
"models_loaded"
:
len
(
loaded_keys
),
"loaded_models"
:
loaded_keys
,
"enabled_models"
:
enabled_models
,
"enabled_aliases"
:
enabled_aliases
,
"vram"
:
vram
,
"cuda"
:
is_cuda
,
"requests"
:
{
...
...
@@ -1356,13 +1367,13 @@ async def api_model_configure(request: Request, username: str = Depends(require_
gpu_device
=
int
(
data
.
get
(
"gpu_device"
,
0
))
if
gpu_device
<
0
:
raise
HTTPException
(
status_code
=
400
,
detail
=
"gpu_device must be >= 0"
)
for
existing
in
config_manager
.
models_data
.
get
(
"audio_models"
,
[]):
if
(
isinstance
(
existing
,
dict
)
and
existing
.
get
(
"backend"
)
==
"whisper-server"
and
existing
.
get
(
"id"
)
==
model_id
):
raise
HTTPException
(
status_code
=
409
,
detail
=
f
"whisper-server model '{model_id}' already exists"
)
# Remove existing entry with same id (update semantics)
audio_list
=
config_manager
.
models_data
.
get
(
"audio_models"
,
[])
config_manager
.
models_data
[
"audio_models"
]
=
[
m
for
m
in
audio_list
if
not
(
isinstance
(
m
,
dict
)
and
m
.
get
(
"id"
)
==
model_id
)
]
alias
=
(
data
.
get
(
"alias"
)
or
""
)
.
strip
()
or
None
entry
=
{
"id"
:
model_id
,
"backend"
:
"whisper-server"
,
...
...
@@ -1374,11 +1385,16 @@ async def api_model_configure(request: Request, username: str = Depends(require_
"model_type"
:
"audio_models"
,
"model_types"
:
[
"audio_models"
],
}
if
alias
:
entry
[
"alias"
]
=
alias
if
data
.
get
(
"used_vram_gb"
)
is
not
None
:
entry
[
"used_vram_gb"
]
=
data
[
"used_vram_gb"
]
config_manager
.
models_data
.
setdefault
(
"audio_models"
,
[])
.
append
(
entry
)
config_manager
.
save_models
()
return
{
"success"
:
True
,
"model_id"
:
model_id
,
"model_path"
:
model_path
,
"server_path"
:
server_path
}
result
=
{
"success"
:
True
,
"model_id"
:
model_id
,
"model_path"
:
model_path
,
"server_path"
:
server_path
}
if
alias
:
result
[
"alias"
]
=
alias
return
result
path
=
data
.
get
(
"path"
)
or
data
.
get
(
"model_id"
,
""
)
valid
=
{
"text_models"
,
"image_models"
,
"audio_models"
,
"tts_models"
,
"vision_models"
,
"video_models"
,
"audio_gen_models"
,
"embedding_models"
}
...
...
codai/admin/templates/dashboard.html
View file @
aa34ff9a
...
...
@@ -79,11 +79,19 @@ async function poll() {
const
loaded
=
d
.
loaded_models
||
[];
const
enabled
=
d
.
enabled_models
||
[];
const
aliases
=
d
.
enabled_aliases
||
{};
const
loadedSet
=
new
Set
(
loaded
);
const
notLoaded
=
enabled
.
filter
(
m
=>
!
loadedSet
.
has
(
m
));
let
html
=
''
;
if
(
loaded
.
length
)
html
+=
loaded
.
map
(
m
=>
`<span class="badge badge-admin" style="margin:.125rem" title="Loaded">●
${
m
}
</span>`
).
join
(
''
);
if
(
notLoaded
.
length
)
html
+=
notLoaded
.
map
(
m
=>
`<span class="badge" style="margin:.125rem;opacity:.55" title="Enabled, not loaded">○
${
m
}
</span>`
).
join
(
''
);
const
aliasEntries
=
Object
.
entries
(
aliases
);
if
(
aliasEntries
.
length
)
{
html
+=
aliasEntries
.
map
(([
alias
,
ids
])
=>
{
const
tip
=
ids
.
length
>
1
?
`Round-robin alias →
${
ids
.
join
(
', '
)}
`
:
`Alias →
${
ids
[
0
]
||
''
}
`
;
return
`<span class="badge" style="margin:.125rem;background:var(--bg);border:1px solid var(--border);color:var(--text-2)" title="
${
tip
}
">⇄
${
alias
}
</span>`
;
}).
join
(
''
);
}
document
.
getElementById
(
'active-models'
).
innerHTML
=
html
||
'<span class="muted small">No models loaded</span>'
;
if
(
d
.
vram
)
{
...
...
codai/admin/templates/models.html
View file @
aa34ff9a
...
...
@@ -118,10 +118,10 @@
</div>
<div
style=
"display:grid;grid-template-columns:repeat(2,minmax(0,1fr));gap:.75rem;margin-top:.75rem"
>
<input
id=
"ws-used-vram"
class=
"form-input"
type=
"number"
min=
"0"
step=
"0.1"
placeholder=
"Used VRAM (optional)"
>
<
div></div
>
<
input
id=
"ws-alias"
class=
"form-input"
placeholder=
"Alias (optional, e.g. whisper)"
>
</div>
<div
class=
"form-actions"
style=
"margin-top:.75rem"
>
<button
class=
"btn btn-primary"
onclick=
"addWhisperServerModel()"
>
Add model
</button>
<button
class=
"btn btn-primary"
id=
"ws-submit-btn"
onclick=
"addWhisperServerModel()"
>
Add model
</button>
</div>
</div>
</div>
...
...
@@ -329,6 +329,69 @@
</div>
</div>
<!-- Whisper-server edit modal -->
<div
id=
"ws-edit-modal"
class=
"modal"
>
<div
class=
"modal-box"
style=
"max-width:520px"
>
<div
class=
"modal-head"
>
<span
class=
"modal-title"
>
Edit whisper-server model
</span>
<button
class=
"modal-close"
onclick=
"closeModal('ws-edit-modal')"
>
×
</button>
</div>
<div
class=
"modal-body"
>
<input
type=
"hidden"
id=
"wse-model-id"
>
<div
class=
"form-row"
>
<label
class=
"form-label"
>
Model ID
</label>
<div
id=
"wse-id-label"
style=
"font-size:12px;font-family:monospace;color:var(--text-2);padding:.3rem 0"
></div>
</div>
<div
class=
"form-row"
>
<label
class=
"form-label"
>
Alias
<span
class=
"muted small"
>
(optional — shared by multiple instances for round-robin)
</span></label>
<input
id=
"wse-alias"
class=
"form-input"
placeholder=
"e.g. whisper"
>
</div>
<div
class=
"form-row"
>
<label
class=
"form-label"
>
Server path
</label>
<input
id=
"wse-server-path"
class=
"form-input"
placeholder=
"/usr/local/bin/whisper-server"
>
</div>
<div
class=
"form-row"
>
<label
class=
"form-label"
>
Model source
</label>
<select
id=
"wse-model-source"
class=
"form-input"
onchange=
"toggleWseModelSource()"
>
<option
value=
"cached-gguf"
>
Downloaded GGUF
</option>
<option
value=
"manual-path"
>
Manual path
</option>
</select>
</div>
<div
class=
"form-row"
id=
"wse-gguf-row"
>
<label
class=
"form-label"
>
Downloaded GGUF
</label>
<select
id=
"wse-gguf-select"
class=
"form-input"
>
<option
value=
""
>
Select downloaded GGUF
</option>
</select>
</div>
<div
class=
"form-row"
id=
"wse-path-row"
style=
"display:none"
>
<label
class=
"form-label"
>
Model path
</label>
<input
id=
"wse-model-path"
class=
"form-input"
placeholder=
"/path/to/ggml-model.bin"
>
</div>
<div
style=
"display:grid;grid-template-columns:1fr 1fr 1fr;gap:.75rem"
>
<div
class=
"form-row"
style=
"margin:0"
>
<label
class=
"form-label"
>
Port
</label>
<input
id=
"wse-port"
class=
"form-input"
type=
"number"
min=
"1"
max=
"65535"
>
</div>
<div
class=
"form-row"
style=
"margin:0"
>
<label
class=
"form-label"
>
GPU device
</label>
<input
id=
"wse-gpu-device"
class=
"form-input"
type=
"number"
min=
"0"
>
</div>
<div
class=
"form-row"
style=
"margin:0"
>
<label
class=
"form-label"
>
Load mode
</label>
<select
id=
"wse-load-mode"
class=
"form-input"
>
<option
value=
"on-request"
>
On request
</option>
<option
value=
"load"
>
Load
</option>
</select>
</div>
</div>
<div
class=
"form-actions"
style=
"margin-top:1rem"
>
<button
class=
"btn btn-primary"
onclick=
"saveWhisperServerEdit()"
>
Save
</button>
<button
class=
"btn btn-ghost"
onclick=
"closeModal('ws-edit-modal')"
>
Cancel
</button>
</div>
</div>
</div>
</div>
<!-- Model configuration modal -->
<div
id=
"cfg-modal"
class=
"modal"
>
<div
class=
"modal-box"
style=
"max-width:600px;max-height:92vh;overflow-y:auto"
>
...
...
@@ -1037,20 +1100,21 @@ function _renderWhisperServerRows(models){
size_gb
:
0
,
defaultType
:
'audio_models'
,
settings
:{
backend
:
m
.
backend
||
'whisper-server'
,
load_mode
:
m
.
load_mode
||
'on-request'
,
model_type
:
'audio_models'
,
model_path
:
m
.
model_path
||
''
,
port
:
m
.
port
,
gpu_device
:
m
.
gpu_device
,
backend
:
m
.
backend
||
'whisper-server'
,
load_mode
:
m
.
load_mode
||
'on-request'
,
model_type
:
'audio_models'
,
model_path
:
m
.
model_path
||
''
,
server_path
:
m
.
server_path
||
''
,
alias
:
m
.
alias
||
''
,
port
:
m
.
port
??
8744
,
gpu_device
:
m
.
gpu_device
??
0
,
},
in_config
:
true
,
capabilities
:
m
.
capabilities
||
[
'speech_to_text'
]
});
const
loaded
=
_loadedKeys
.
has
(
`audio:
${
m
.
id
}
`
)
||
_loadedKeys
.
has
(
m
.
id
);
return
`<tr style="border-top:1px solid var(--border)">
<td style="padding:.4rem .25rem;font-family:monospace;font-size:12px">
${
esc
(
m
.
id
)}
</td>
<td style="padding:.4rem .25rem"><span class="badge badge-ok">
${
esc
(
m
.
backend
||
'whisper-server'
)}
</span></td>
<td style="padding:.4rem .25rem;font-family:monospace;font-size:12px">
${
esc
(
m
.
id
)}${
m
.
alias
?
`<br><span style="color:var(--text-2);font-size:10px">alias:
${
esc
(
m
.
alias
)}
</span>`
:
''
}
</td>
<td style="padding:.4rem .25rem;font-size:11px;color:var(--text-2);max-width:160px;overflow:hidden;text-overflow:ellipsis;display:-webkit-box;-webkit-line-clamp:2;-webkit-box-orient:vertical;line-height:1.25;max-height:2.5em" title="
${
esc
(
m
.
model_path
||
"—"
)}
">
${
esc
(
m
.
model_path
||
"—"
)}
</td>
<td style="padding:.4rem .25rem;font-size:11px;color:var(--text-2)">
${
m
.
port
??
'—'
}
/ GPU ${m.gpu_device
??
0}</
td
>
<
td
style
=
"padding:.4rem .25rem;font-size:11px;color:var(--text-2)"
>
$
{
esc
(
m
.
load_mode
||
'on-request'
)}
<
/td
>
...
...
@@ -1059,7 +1123,7 @@ function _renderWhisperServerRows(models){
$
{
loaded
?
`<button class="btn btn-ghost btn-sm" onclick="unloadModel(
${
idx
}
)">Unload</button>`
:
`<button class="btn btn-primary btn-sm" onclick="loadModel(
${
idx
}
)">Load now</button>`
}
<
button
class
=
"btn btn-secondary btn-sm"
onclick
=
"openCfgModal(${idx})"
>
Configure
<
/button
>
<
button
class
=
"btn btn-secondary btn-sm"
onclick
=
"openCfgModal(${idx})"
>
Edit
<
/button
>
<
button
class
=
"btn btn-ghost btn-sm"
onclick
=
"disableModel(${idx})"
>
Remove
<
/button
>
<
/td
>
<
/tr>`
;
...
...
@@ -1068,8 +1132,7 @@ function _renderWhisperServerRows(models){
'<div class="card-title">Configured whisper-server models</div>'
+
'<table style="width:100%;border-collapse:collapse;font-size:13px">'
+
'<thead><tr style="color:var(--text-2);font-size:10px;text-transform:uppercase;letter-spacing:.05em">'
+
'<th style="text-align:left;padding:.3rem .25rem;font-weight:700">Model</th>'
+
'<th style="text-align:left;padding:.3rem .25rem;font-weight:700">Backend</th>'
+
'<th style="text-align:left;padding:.3rem .25rem;font-weight:700">Model / Alias</th>'
+
'<th style="text-align:left;padding:.3rem .25rem;font-weight:700">Model path</th>'
+
'<th style="text-align:left;padding:.3rem .25rem;font-weight:700">Port / GPU</th>'
+
'<th style="text-align:left;padding:.3rem .25rem;font-weight:700">Load mode</th>'
+
...
...
@@ -1234,7 +1297,6 @@ async function loadCachedModels(){
'<th style="text-align:center;padding:.3rem .25rem;font-weight:700">Config</th>'
+
'<th></th></tr></thead><tbody>'
+
rows
.
join
(
''
)
+
'</tbody></table>'
;
}
ggufEl
.
insertAdjacentHTML
(
'afterend'
,
_renderWhisperServerRows
(
whisperModels
));
// Remove any previously rendered whisper-server card before inserting the new one
document
.
querySelectorAll
(
'#ws-rendered-card'
).
forEach
(
el
=>
el
.
remove
());
const
wsHtml
=
_renderWhisperServerRows
(
whisperModels
);
...
...
@@ -1363,8 +1425,81 @@ function onCfgQuantChange(){
document
.
getElementById
(
'cfg-id-label'
).
textContent
=
m
.
label
;
}
function
toggleWseModelSource
()
{
const
source
=
document
.
getElementById
(
'wse-model-source'
).
value
;
document
.
getElementById
(
'wse-gguf-row'
).
style
.
display
=
source
===
'cached-gguf'
?
''
:
'none'
;
document
.
getElementById
(
'wse-path-row'
).
style
.
display
=
source
===
'manual-path'
?
''
:
'none'
;
}
function
_getWseModelPath
()
{
const
source
=
document
.
getElementById
(
'wse-model-source'
).
value
;
return
source
===
'cached-gguf'
?
document
.
getElementById
(
'wse-gguf-select'
).
value
:
document
.
getElementById
(
'wse-model-path'
).
value
.
trim
();
}
function
_openWhisperServerEdit
(
m
)
{
const
s
=
m
.
settings
||
{};
document
.
getElementById
(
'wse-model-id'
).
value
=
m
.
path
;
document
.
getElementById
(
'wse-id-label'
).
textContent
=
m
.
path
;
document
.
getElementById
(
'wse-alias'
).
value
=
s
.
alias
||
''
;
document
.
getElementById
(
'wse-server-path'
).
value
=
s
.
server_path
||
defaultWhisperServerPath
();
document
.
getElementById
(
'wse-port'
).
value
=
s
.
port
??
8744
;
document
.
getElementById
(
'wse-gpu-device'
).
value
=
s
.
gpu_device
??
0
;
document
.
getElementById
(
'wse-load-mode'
).
value
=
s
.
load_mode
||
'on-request'
;
// Populate the GGUF select with current options from the add-form's list
const
srcSelect
=
document
.
getElementById
(
'ws-gguf-select'
);
const
dstSelect
=
document
.
getElementById
(
'wse-gguf-select'
);
dstSelect
.
innerHTML
=
srcSelect
?
srcSelect
.
innerHTML
:
'<option value="">Select downloaded GGUF</option>'
;
const
modelPath
=
s
.
model_path
||
''
;
const
inGguf
=
modelPath
&&
_ggufFiles
.
some
(
f
=>
f
.
path
===
modelPath
);
if
(
inGguf
)
{
document
.
getElementById
(
'wse-model-source'
).
value
=
'cached-gguf'
;
dstSelect
.
value
=
modelPath
;
}
else
{
document
.
getElementById
(
'wse-model-source'
).
value
=
'manual-path'
;
document
.
getElementById
(
'wse-model-path'
).
value
=
modelPath
;
}
toggleWseModelSource
();
openModal
(
'ws-edit-modal'
);
}
async
function
saveWhisperServerEdit
()
{
const
model_id
=
document
.
getElementById
(
'wse-model-id'
).
value
;
const
model_source
=
document
.
getElementById
(
'wse-model-source'
).
value
;
const
model_path
=
_getWseModelPath
();
if
(
!
model_path
)
{
alert
(
'Model path is required'
);
return
;
}
const
payload
=
{
model_id
,
backend
:
'whisper-server'
,
model_source
,
server_path
:
document
.
getElementById
(
'wse-server-path'
).
value
.
trim
(),
model_path
,
port
:
parseInt
(
document
.
getElementById
(
'wse-port'
).
value
,
10
)
||
8744
,
gpu_device
:
parseInt
(
document
.
getElementById
(
'wse-gpu-device'
).
value
,
10
)
||
0
,
load_mode
:
document
.
getElementById
(
'wse-load-mode'
).
value
,
alias
:
document
.
getElementById
(
'wse-alias'
).
value
.
trim
()
||
null
,
};
try
{
const
r
=
await
fetch
(
'/admin/api/model-configure'
,
{
method
:
'POST'
,
headers
:
{
'Content-Type'
:
'application/json'
},
body
:
JSON
.
stringify
(
payload
)
});
const
d
=
await
r
.
json
();
if
(
!
r
.
ok
)
throw
new
Error
(
d
.
detail
||
'Failed to save'
);
closeModal
(
'ws-edit-modal'
);
refreshLocal
();
}
catch
(
e
)
{
alert
(
'Error: '
+
e
.
message
);
}
}
function
openCfgModal
(
idx
){
const
m
=
_localModels
[
idx
];
if
(
m
.
cacheType
===
'whisper-server'
)
{
_openWhisperServerEdit
(
m
);
return
;
}
const
s
=
m
.
settings
||
{};
document
.
getElementById
(
'cfg-modal-title'
).
textContent
=
m
.
in_config
?
'Configure model'
:
'Add to CoderAI'
;
document
.
getElementById
(
'cfg-id-label'
).
textContent
=
m
.
label
;
...
...
@@ -1453,9 +1588,6 @@ function openCfgModal(idx){
document
.
getElementById
(
'cfg-parser'
).
value
=
s
.
parser
||
'auto'
;
document
.
getElementById
(
'cfg-tools'
).
checked
=
!!
s
.
tools_closer_prompt
;
document
.
getElementById
(
'cfg-grammar'
).
checked
=
!!
s
.
grammar_guided
;
if
(
m
.
cacheType
===
'whisper-server'
)
{
document
.
getElementById
(
'cfg-backend'
).
value
=
'cpu'
;
}
openModal
(
'cfg-modal'
);
}
...
...
@@ -1533,6 +1665,7 @@ async function addWhisperServerModel(){
gpu_device
:
parseInt
(
document
.
getElementById
(
'ws-gpu-device'
).
value
,
10
)
||
0
,
load_mode
:
document
.
getElementById
(
'ws-load-mode'
).
value
,
used_vram_gb
:
Number
.
isNaN
(
usedVram
)
?
null
:
usedVram
,
alias
:
document
.
getElementById
(
'ws-alias'
).
value
.
trim
()
||
null
,
};
try
{
const
r
=
await
fetch
(
'/admin/api/model-configure'
,
{
...
...
@@ -1551,6 +1684,8 @@ async function addWhisperServerModel(){
document
.
getElementById
(
'ws-gpu-device'
).
value
=
'0'
;
document
.
getElementById
(
'ws-load-mode'
).
value
=
'on-request'
;
document
.
getElementById
(
'ws-used-vram'
).
value
=
''
;
document
.
getElementById
(
'ws-alias'
).
value
=
''
;
document
.
getElementById
(
'ws-submit-btn'
).
textContent
=
'Add model'
;
toggleWhisperModelSource
();
refreshLocal
();
}
catch
(
e
){
alert
(
'Error: '
+
e
.
message
);
}
...
...
codai/api/transcriptions.py
View file @
aa34ff9a
...
...
@@ -134,8 +134,12 @@ async def create_transcription(
if
len
(
file_content
)
>
_MAX_AUDIO_BYTES
:
raise
HTTPException
(
status_code
=
413
,
detail
=
"Audio file too large (max 100 MB)"
)
# Check if the requested model maps to a configured whisper-server instance first
whisper_server
=
multi_model_manager
.
whisper_servers
.
get
(
model
)
# Check if the requested model maps to a configured whisper-server instance first.
# Try alias round-robin resolution before direct ID lookup.
whisper_server
=
(
multi_model_manager
.
resolve_whisper_alias
(
model
)
or
multi_model_manager
.
whisper_servers
.
get
(
model
)
)
if
whisper_server
is
not
None
:
multi_model_manager
.
request_model
(
requested_model
=
model
,
model_type
=
"audio"
)
if
not
whisper_server
.
is_running
():
...
...
codai/main.py
View file @
aa34ff9a
...
...
@@ -373,12 +373,14 @@ def main():
continue
if
isinstance
(
m
,
dict
)
and
m
.
get
(
"backend"
)
==
"whisper-server"
:
cfg
=
_model_cfg
(
m
,
"audio"
)
alias
=
(
m
.
get
(
"alias"
)
or
""
)
.
strip
()
or
None
cfg
.
update
({
"backend"
:
"whisper-server"
,
"server_path"
:
m
.
get
(
"server_path"
,
""
),
"model_path"
:
m
.
get
(
"model_path"
)
or
None
,
"port"
:
int
(
m
.
get
(
"port"
,
8744
)),
"gpu_device"
:
int
(
m
.
get
(
"gpu_device"
,
0
)),
"alias"
:
alias
,
})
multi_model_manager
.
register_whisper_server
(
model_id
=
mid
,
...
...
@@ -387,6 +389,7 @@ def main():
port
=
int
(
m
.
get
(
"port"
,
8744
)),
gpu_device
=
int
(
m
.
get
(
"gpu_device"
,
0
)),
config
=
cfg
,
alias
=
alias
,
)
else
:
multi_model_manager
.
set_audio_model
(
mid
,
config
=
_model_cfg
(
m
,
"audio"
))
...
...
codai/models/manager.py
View file @
aa34ff9a
...
...
@@ -499,6 +499,8 @@ class MultiModelManager:
self
.
model_aliases
:
Dict
[
str
,
str
]
=
{}
self
.
whisper_server
:
Optional
[
WhisperServerManager
]
=
None
# legacy single-instance compat
self
.
whisper_servers
:
Dict
[
str
,
WhisperServerManager
]
=
{}
# id -> manager
self
.
whisper_aliases
:
Dict
[
str
,
List
[
str
]]
=
{}
# alias -> [model_id, ...]
self
.
_whisper_alias_counters
:
Dict
[
str
,
int
]
=
{}
# alias -> next round-robin index
self
.
model_backend_types
:
Dict
[
str
,
str
]
=
{}
self
.
tool_breaker
=
FuzzyToolBreaker
(
threshold
=
3
)
# Circuit breaker for repetitive tool calls
self
.
_load_lock
=
threading
.
Lock
()
# Prevents duplicate on-demand model loads
...
...
@@ -761,7 +763,8 @@ class MultiModelManager:
print
(
f
"Audio model '{model_name}' cached as: {resolved_model}"
)
def
register_whisper_server
(
self
,
model_id
:
str
,
server_path
:
str
,
model_path
:
str
=
None
,
port
:
int
=
8744
,
gpu_device
:
int
=
0
,
config
:
Dict
=
None
):
port
:
int
=
8744
,
gpu_device
:
int
=
0
,
config
:
Dict
=
None
,
alias
:
str
=
None
):
"""Register a whisper-server instance as an audio model."""
wsm
=
WhisperServerManager
(
server_path
=
server_path
,
port
=
port
)
wsm
.
_model_path
=
model_path
...
...
@@ -776,8 +779,25 @@ class MultiModelManager:
if
model_id
not
in
self
.
audio_models
:
self
.
audio_models
.
append
(
model_id
)
self
.
config
[
f
"audio:{model_id}"
]
=
cfg
print
(
f
"Registered whisper-server audio model: {model_id} (server: {server_path})"
)
# Register alias for round-robin routing
if
alias
:
wsm
.
_alias
=
alias
ids
=
self
.
whisper_aliases
.
setdefault
(
alias
,
[])
if
model_id
not
in
ids
:
ids
.
append
(
model_id
)
self
.
_whisper_alias_counters
.
setdefault
(
alias
,
0
)
print
(
f
"Registered whisper-server audio model: {model_id} (server: {server_path})"
+
(
f
" alias={alias}"
if
alias
else
""
))
return
wsm
def
resolve_whisper_alias
(
self
,
name
:
str
)
->
Optional
[
WhisperServerManager
]:
"""Return the next round-robin WhisperServerManager for an alias, or None."""
ids
=
self
.
whisper_aliases
.
get
(
name
)
if
not
ids
:
return
None
idx
=
self
.
_whisper_alias_counters
.
get
(
name
,
0
)
%
len
(
ids
)
self
.
_whisper_alias_counters
[
name
]
=
idx
+
1
return
self
.
whisper_servers
.
get
(
ids
[
idx
])
def
set_tts_model
(
self
,
model_name
:
str
,
config
:
Dict
=
None
):
"""Set the text-to-speech model and download/cache it if needed."""
...
...
@@ -2033,6 +2053,8 @@ class MultiModelManager:
capabilities
=
caps
.
to_list
(),
backend
=
meta
.
get
(
"backend"
),
model_path
=
meta
.
get
(
"model_path"
),
server_path
=
meta
.
get
(
"server_path"
),
alias
=
meta
.
get
(
"alias"
),
port
=
meta
.
get
(
"port"
),
gpu_device
=
meta
.
get
(
"gpu_device"
),
load_mode
=
meta
.
get
(
"load_mode"
),
...
...
@@ -2051,13 +2073,19 @@ class MultiModelManager:
if
isinstance
(
m
,
str
):
mid
=
m
else
:
mid
=
m
.
get
(
"alias"
)
or
m
.
get
(
"path"
)
or
m
.
get
(
"id"
)
or
""
raw
=
m
.
get
(
"path"
)
or
m
.
get
(
"id"
)
or
""
if
raw
and
raw
!=
mid
:
_add
(
raw
,
mtype
,
m
)
short
=
raw
.
split
(
"/"
)[
-
1
]
if
"/"
in
raw
else
raw
if
short
!=
raw
:
_add
(
short
,
mtype
,
m
)
alias
=
m
.
get
(
"alias"
)
or
""
# whisper-server aliases are round-robin group keys shared across
# multiple instances — don't expose the alias as a separate model
if
m
.
get
(
"backend"
)
==
"whisper-server"
:
mid
=
raw
else
:
mid
=
alias
or
raw
if
raw
and
raw
!=
mid
:
_add
(
raw
,
mtype
,
m
)
short
=
raw
.
split
(
"/"
)[
-
1
]
if
"/"
in
raw
else
raw
if
short
!=
raw
:
_add
(
short
,
mtype
,
m
)
if
mid
:
_add
(
mid
,
mtype
,
m
if
isinstance
(
m
,
dict
)
else
None
)
short
=
mid
.
split
(
"/"
)[
-
1
]
if
"/"
in
mid
else
mid
...
...
codai/pydantic/textrequest.py
View file @
aa34ff9a
...
...
@@ -123,6 +123,8 @@ class ModelInfo(BaseModel):
capabilities
:
Optional
[
List
[
str
]]
=
None
# list of capability strings
backend
:
Optional
[
str
]
=
None
model_path
:
Optional
[
str
]
=
None
server_path
:
Optional
[
str
]
=
None
alias
:
Optional
[
str
]
=
None
port
:
Optional
[
int
]
=
None
gpu_device
:
Optional
[
int
]
=
None
load_mode
:
Optional
[
str
]
=
None
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment