Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in
Toggle navigation
C
coderai
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
nexlab
coderai
Commits
ad758123
Commit
ad758123
authored
May 06, 2026
by
Stefy Lanza (nextime / spora )
Browse files
Options
Browse Files
Download
Plain Diff
merge: integrate whisper-server local model workflow
parents
b17e45a5
da83cc25
Changes
10
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
547 additions
and
275 deletions
+547
-275
.gitignore
.gitignore
+2
-0
routes.py
codai/admin/routes.py
+44
-87
models.html
codai/admin/templates/models.html
+102
-40
settings.html
codai/admin/templates/settings.html
+1
-104
transcriptions.py
codai/api/transcriptions.py
+22
-25
config.py
codai/config.py
+1
-5
main.py
codai/main.py
+12
-7
manager.py
codai/models/manager.py
+17
-6
textrequest.py
codai/pydantic/textrequest.py
+6
-1
test_whisper_server_local_models.py
tests/test_whisper_server_local_models.py
+340
-0
No files found.
.gitignore
View file @
ad758123
...
...
@@ -20,3 +20,5 @@ debug.log
# Test files
test_*.py
!tests/
!tests/test_whisper_server_local_models.py
codai/admin/routes.py
View file @
ad758123
...
...
@@ -1172,6 +1172,17 @@ async def api_model_load(request: Request, username: str = Depends(require_admin
raise
RuntimeError
(
"Model failed to load"
)
multi_model_manager
.
models
[
result
[
"model_key"
]
or
path
]
=
mm
multi_model_manager
.
active_in_vram
=
result
[
"model_key"
]
or
path
elif
model_type
==
"audio"
:
wsm
=
multi_model_manager
.
whisper_servers
.
get
(
path
)
if
wsm
is
not
None
:
started
=
wsm
.
start
(
getattr
(
wsm
,
"_model_path"
,
None
),
gpu_device
=
getattr
(
wsm
,
"_gpu_device"
,
0
))
if
not
wsm
.
is_running
():
raise
RuntimeError
(
"whisper-server failed to start"
)
model_key
=
f
"audio:{path}"
multi_model_manager
.
models
[
model_key
]
=
wsm
multi_model_manager
.
active_in_vram
=
model_key
multi_model_manager
.
models_in_vram
.
add
(
model_key
)
return
{
"success"
:
True
,
"already_loaded"
:
False
,
"started_model"
:
started
}
elif
model_type
==
"image"
:
from
codai.api.images
import
_load_diffusers_pipeline
,
_is_gguf_model
,
_load_sdcpp_model
from
codai.api.state
import
get_global_args
...
...
@@ -1243,6 +1254,38 @@ async def api_model_configure(request: Request, username: str = Depends(require_
if
config_manager
is
None
:
raise
HTTPException
(
status_code
=
503
,
detail
=
"Config manager not initialized"
)
data
=
await
request
.
json
()
if
data
.
get
(
"backend"
)
==
"whisper-server"
:
model_id
=
(
data
.
get
(
"model_id"
)
or
""
)
.
strip
()
if
not
model_id
:
raise
HTTPException
(
status_code
=
400
,
detail
=
"model_id is required"
)
server_path
=
(
data
.
get
(
"server_path"
)
or
""
)
.
strip
()
if
not
server_path
:
raise
HTTPException
(
status_code
=
400
,
detail
=
"server_path is required"
)
port
=
int
(
data
.
get
(
"port"
,
8744
))
if
port
<
1
or
port
>
65535
:
raise
HTTPException
(
status_code
=
400
,
detail
=
"port must be between 1 and 65535"
)
gpu_device
=
int
(
data
.
get
(
"gpu_device"
,
0
))
if
gpu_device
<
0
:
raise
HTTPException
(
status_code
=
400
,
detail
=
"gpu_device must be >= 0"
)
for
existing
in
config_manager
.
models_data
.
get
(
"audio_models"
,
[]):
if
isinstance
(
existing
,
dict
)
and
existing
.
get
(
"id"
)
==
model_id
:
raise
HTTPException
(
status_code
=
409
,
detail
=
f
"whisper-server model '{model_id}' already exists"
)
entry
=
{
"id"
:
model_id
,
"backend"
:
"whisper-server"
,
"server_path"
:
server_path
,
"model_path"
:
(
data
.
get
(
"model_path"
)
or
""
)
.
strip
()
or
None
,
"port"
:
port
,
"gpu_device"
:
gpu_device
,
"load_mode"
:
data
.
get
(
"load_mode"
,
"on-request"
),
"model_type"
:
"audio_models"
,
"model_types"
:
[
"audio_models"
],
}
if
data
.
get
(
"used_vram_gb"
)
is
not
None
:
entry
[
"used_vram_gb"
]
=
data
[
"used_vram_gb"
]
config_manager
.
models_data
.
setdefault
(
"audio_models"
,
[])
.
append
(
entry
)
config_manager
.
save_models
()
return
{
"success"
:
True
}
path
=
data
.
get
(
"path"
)
or
data
.
get
(
"model_id"
,
""
)
valid
=
{
"text_models"
,
"image_models"
,
"audio_models"
,
"tts_models"
,
"vision_models"
,
"video_models"
,
"audio_gen_models"
,
"embedding_models"
}
...
...
@@ -1375,10 +1418,6 @@ async def api_get_settings(username: str = Depends(require_admin)):
"device_id"
:
c
.
vulkan
.
device_id
,
"single_gpu"
:
c
.
vulkan
.
single_gpu
,
},
"whisper"
:
{
"server_path"
:
c
.
whisper
.
server_path
,
"server_port"
:
c
.
whisper
.
server_port
,
},
"system_prompt"
:
c
.
system_prompt
,
"tools_closer_prompt"
:
c
.
tools_closer_prompt
,
"grammar_guided"
:
c
.
grammar_guided
,
...
...
@@ -1442,11 +1481,6 @@ async def api_save_settings(request: Request, username: str = Depends(require_ad
c
.
vulkan
.
device_id
=
int
(
vk
.
get
(
"device_id"
,
c
.
vulkan
.
device_id
))
c
.
vulkan
.
single_gpu
=
bool
(
vk
.
get
(
"single_gpu"
,
c
.
vulkan
.
single_gpu
))
if
"whisper"
in
data
:
wh
=
data
[
"whisper"
]
c
.
whisper
.
server_path
=
wh
.
get
(
"server_path"
)
or
None
c
.
whisper
.
server_port
=
int
(
wh
.
get
(
"server_port"
,
c
.
whisper
.
server_port
))
if
"system_prompt"
in
data
:
c
.
system_prompt
=
data
[
"system_prompt"
]
or
None
if
"tools_closer_prompt"
in
data
:
...
...
@@ -1458,83 +1492,6 @@ async def api_save_settings(request: Request, username: str = Depends(require_ad
config_manager
.
save_config
()
return
{
"success"
:
True
}
# --- Whisper-server management ---
@
router
.
get
(
"/admin/api/whisper-server/status"
)
async
def
api_whisper_server_status
(
username
:
str
=
Depends
(
require_admin
)):
"""Return status of all registered whisper-server instances."""
from
codai.models.manager
import
multi_model_manager
if
multi_model_manager
.
whisper_servers
:
return
{
mid
:
wsm
.
get_status
()
for
mid
,
wsm
in
multi_model_manager
.
whisper_servers
.
items
()
}
# Legacy single-instance fallback
if
multi_model_manager
.
whisper_server
:
return
{
"whisper-server"
:
multi_model_manager
.
whisper_server
.
get_status
()}
return
{}
@
router
.
post
(
"/admin/api/whisper-server/start"
)
async
def
api_whisper_server_start
(
request
:
Request
,
username
:
str
=
Depends
(
require_admin
)):
"""Start (or restart) a whisper-server instance by model_id."""
from
codai.models.manager
import
multi_model_manager
data
=
await
request
.
json
()
model_id
=
data
.
get
(
"model_id"
,
"whisper-server"
)
server_path
=
data
.
get
(
"server_path"
,
""
)
model_path
=
data
.
get
(
"model_path"
)
or
None
port
=
int
(
data
.
get
(
"port"
,
8744
))
gpu_device
=
int
(
data
.
get
(
"gpu_device"
,
0
))
if
not
server_path
:
raise
HTTPException
(
status_code
=
400
,
detail
=
"server_path required"
)
wsm
=
multi_model_manager
.
whisper_servers
.
get
(
model_id
)
if
wsm
is
None
:
wsm
=
multi_model_manager
.
register_whisper_server
(
model_id
=
model_id
,
server_path
=
server_path
,
model_path
=
model_path
,
port
=
port
,
gpu_device
=
gpu_device
,
)
else
:
wsm
.
server_path
=
server_path
wsm
.
port
=
port
wsm
.
base_url
=
f
"http://127.0.0.1:{port}"
wsm
.
_model_path
=
model_path
wsm
.
_gpu_device
=
gpu_device
result
=
wsm
.
start
(
model_path
,
gpu_device
=
gpu_device
)
running
=
wsm
.
is_running
()
if
running
:
ws_key
=
f
"audio:{model_id}"
multi_model_manager
.
models
[
ws_key
]
=
wsm
multi_model_manager
.
active_in_vram
=
ws_key
multi_model_manager
.
models_in_vram
.
add
(
ws_key
)
return
{
"success"
:
running
,
"running"
:
running
,
"started_model"
:
result
}
@
router
.
post
(
"/admin/api/whisper-server/stop"
)
async
def
api_whisper_server_stop
(
request
:
Request
,
username
:
str
=
Depends
(
require_admin
)):
"""Stop a whisper-server instance by model_id."""
from
codai.models.manager
import
multi_model_manager
data
=
await
request
.
json
()
if
request
.
headers
.
get
(
"content-type"
,
""
)
.
startswith
(
"application/json"
)
else
{}
model_id
=
data
.
get
(
"model_id"
,
"whisper-server"
)
wsm
=
multi_model_manager
.
whisper_servers
.
get
(
model_id
)
or
multi_model_manager
.
whisper_server
if
wsm
:
wsm
.
stop
()
ws_key
=
f
"audio:{model_id}"
multi_model_manager
.
models
.
pop
(
ws_key
,
None
)
multi_model_manager
.
models_in_vram
.
discard
(
ws_key
)
if
multi_model_manager
.
active_in_vram
==
ws_key
:
multi_model_manager
.
active_in_vram
=
None
return
{
"success"
:
True
,
"running"
:
False
}
# --- HuggingFace model search proxy ---
import
re
as
_re
...
...
@@ -1773,4 +1730,4 @@ async def api_hf_model_info(model_id: str, username: str = Depends(require_admin
"params_label"
:
params_label
,
"gguf_files"
:
gguf_files
,
"file_count"
:
len
(
all_files
),
}
\ No newline at end of file
}
codai/admin/templates/models.html
View file @
ad758123
...
...
@@ -95,17 +95,26 @@
<div
id=
"gguf-models-list"
><span
class=
"muted small"
>
Loading…
</span></div>
</div>
<!-- Whisper Server -->
<div
class=
"card mb-0"
style=
"margin-top:1rem"
id=
"ws-card"
>
<div
style=
"display:flex;align-items:center;justify-content:space-between;flex-wrap:wrap;gap:.5rem"
>
<div>
<div
class=
"card-title"
style=
"margin:0"
>
whisper-server
<span
class=
"muted"
style=
"font-size:11px;font-weight:400"
>
— native subprocess (AMD/Vulkan)
</span></div>
<div
id=
"ws-model-status"
class=
"muted small"
style=
"margin-top:.25rem"
>
—
</div>
</div>
<div
style=
"display:flex;align-items:center;gap:.5rem"
>
<span
id=
"ws-running-badge"
style=
"font-size:12px;font-weight:500"
>
—
</span>
<a
href=
"/admin/settings"
class=
"btn btn-sm btn-ghost"
>
Configure
</a>
</div>
<div
class=
"card mb-0"
style=
"margin-top:1rem"
id=
"ws-model-builder"
>
<div
class=
"card-title"
>
Whisper-server simulated models
</div>
<p
class=
"muted small"
style=
"margin-top:0"
>
Create local audio models backed by dedicated whisper-server subprocess configurations.
</p>
<div
style=
"display:grid;grid-template-columns:repeat(3,minmax(0,1fr));gap:.75rem"
>
<input
id=
"ws-model-id"
class=
"form-input"
placeholder=
"whisper-vulkan-base"
>
<input
id=
"ws-server-path"
class=
"form-input"
placeholder=
"/usr/local/bin/whisper-server"
>
<input
id=
"ws-model-path"
class=
"form-input"
placeholder=
"/models/ggml-base.bin"
>
<input
id=
"ws-port"
class=
"form-input"
type=
"number"
value=
"8744"
min=
"1"
max=
"65535"
>
<input
id=
"ws-gpu-device"
class=
"form-input"
type=
"number"
value=
"0"
min=
"0"
>
<select
id=
"ws-load-mode"
class=
"form-input"
>
<option
value=
"on-request"
>
On request
</option>
<option
value=
"load"
>
Load
</option>
</select>
</div>
<div
style=
"display:grid;grid-template-columns:repeat(2,minmax(0,1fr));gap:.75rem;margin-top:.75rem"
>
<input
id=
"ws-used-vram"
class=
"form-input"
type=
"number"
min=
"0"
step=
"0.1"
placeholder=
"Used VRAM (optional)"
>
<div></div>
</div>
<div
class=
"form-actions"
style=
"margin-top:.75rem"
>
<button
class=
"btn btn-primary"
onclick=
"addWhisperServerModel()"
>
Add model
</button>
</div>
</div>
</div>
...
...
@@ -517,33 +526,6 @@ async function loadGlobalSettings(){
}
catch
{}
}
async
function
loadWsStatus
(){
try
{
const
s
=
await
fetch
(
'/admin/api/whisper-server/status'
).
then
(
r
=>
r
.
json
());
const
card
=
document
.
getElementById
(
'ws-card'
);
const
badge
=
document
.
getElementById
(
'ws-running-badge'
);
const
modelEl
=
document
.
getElementById
(
'ws-model-status'
);
const
entries
=
Object
.
entries
(
s
);
if
(
!
entries
.
length
){
card
.
style
.
display
=
'none'
;
return
;
}
card
.
style
.
display
=
''
;
const
running
=
entries
.
filter
(([,
v
])
=>
v
.
running
);
if
(
running
.
length
){
badge
.
textContent
=
`●
${
running
.
length
}
/
${
entries
.
length
}
running`
;
badge
.
style
.
color
=
'var(--green, #4ade80)'
;
card
.
style
.
borderColor
=
'rgba(74,222,128,.3)'
;
modelEl
.
textContent
=
running
.
map
(([
id
,
v
])
=>
`
${
id
}
:
${
v
.
model
||
'?'
}
@
${
v
.
url
}
`
).
join
(
' | '
);
}
else
{
badge
.
textContent
=
'○ stopped'
;
badge
.
style
.
color
=
'var(--text-2)'
;
card
.
style
.
borderColor
=
''
;
modelEl
.
textContent
=
entries
.
map
(([
id
])
=>
id
).
join
(
', '
)
+
' — not started'
;
}
}
catch
{}
}
/* ── GGUF format toggle ──────────────────────────────── */
let
_ggufMode
=
'gguf'
;
document
.
querySelectorAll
(
'.tog-btn'
).
forEach
(
btn
=>
{
...
...
@@ -982,6 +964,57 @@ async function loadCacheStats(){
let
_localModels
=
[];
function
_renderWhisperServerRows
(
models
){
if
(
!
models
.
length
)
return
''
;
const
rows
=
models
.
map
(
m
=>
{
const
idx
=
_localModels
.
length
;
_localModels
.
push
({
label
:
m
.
id
,
path
:
m
.
id
,
cacheType
:
'whisper-server'
,
size_gb
:
0
,
defaultType
:
'audio_models'
,
settings
:{
backend
:
m
.
backend
||
'whisper-server'
,
load_mode
:
m
.
load_mode
||
'on-request'
,
model_type
:
'audio_models'
,
model_path
:
m
.
model_path
||
''
,
port
:
m
.
port
,
gpu_device
:
m
.
gpu_device
,
},
in_config
:
true
,
capabilities
:
m
.
capabilities
||
[
'speech_to_text'
]
});
const
loaded
=
_loadedKeys
.
has
(
`audio:
${
m
.
id
}
`
)
||
_loadedKeys
.
has
(
m
.
id
);
return
`<tr style="border-top:1px solid var(--border)">
<td style="padding:.4rem .25rem;font-family:monospace;font-size:12px">
${
esc
(
m
.
id
)}
</td>
<td style="padding:.4rem .25rem"><span class="badge badge-ok">
${
esc
(
m
.
backend
||
'whisper-server'
)}
</span></td>
<td style="padding:.4rem .25rem;font-size:11px;color:var(--text-2)">
${
esc
(
m
.
model_path
||
'—'
)}
</td>
<td style="padding:.4rem .25rem;font-size:11px;color:var(--text-2)">
${
m
.
port
??
'—'
}
/ GPU ${m.gpu_device
??
0}</
td
>
<
td
style
=
"padding:.4rem .25rem;font-size:11px;color:var(--text-2)"
>
$
{
esc
(
m
.
load_mode
||
'on-request'
)}
<
/td
>
<
td
style
=
"padding:.4rem .25rem;text-align:center"
>
$
{
loaded
?
'<span class="badge badge-ok">loaded</span>'
:
'<span class="muted small">idle</span>'
}
<
/td
>
<
td
style
=
"padding:.4rem .25rem;text-align:right;white-space:nowrap"
>
$
{
loaded
?
`<button class="btn btn-ghost btn-sm" onclick="unloadModel(
${
idx
}
)">Unload</button>`
:
`<button class="btn btn-primary btn-sm" onclick="loadModel(
${
idx
}
)">Load now</button>`
}
<
button
class
=
"btn btn-secondary btn-sm"
onclick
=
"openCfgModal(${idx})"
>
Configure
<
/button
>
<
button
class
=
"btn btn-ghost btn-sm"
onclick
=
"disableModel(${idx})"
>
Remove
<
/button
>
<
/td
>
<
/tr>`
;
});
return
'<div class="card" style="margin-top:1rem">'
+
'<div class="card-title">Configured whisper-server models</div>'
+
'<table style="width:100%;border-collapse:collapse;font-size:13px">'
+
'<thead><tr style="color:var(--text-2);font-size:10px;text-transform:uppercase;letter-spacing:.05em">'
+
'<th style="text-align:left;padding:.3rem .25rem;font-weight:700">Model</th>'
+
'<th style="text-align:left;padding:.3rem .25rem;font-weight:700">Backend</th>'
+
'<th style="text-align:left;padding:.3rem .25rem;font-weight:700">Model path</th>'
+
'<th style="text-align:left;padding:.3rem .25rem;font-weight:700">Port / GPU</th>'
+
'<th style="text-align:left;padding:.3rem .25rem;font-weight:700">Load mode</th>'
+
'<th style="text-align:center;padding:.3rem .25rem;font-weight:700">Status</th>'
+
'<th></th></tr></thead><tbody>'
+
rows
.
join
(
''
)
+
'</tbody></table></div>'
;
}
async
function
loadCachedModels
(){
_localModels
=
[];
const
hfEl
=
document
.
getElementById
(
'hf-models-list'
);
...
...
@@ -991,6 +1024,8 @@ async function loadCachedModels(){
const
r
=
await
fetch
(
'/admin/api/cached-models'
);
if
(
!
r
.
ok
)
throw
new
Error
((
await
r
.
json
()).
detail
||
r
.
statusText
);
const
d
=
await
r
.
json
();
const
whisperModels
=
(
await
fetch
(
'/admin/api/models'
).
then
(
r
=>
r
.
ok
?
r
.
json
():[]))
.
filter
(
m
=>
m
.
backend
===
'whisper-server'
);
// HF models
const
hf
=
d
.
hf
||
[];
...
...
@@ -1067,6 +1102,7 @@ async function loadCachedModels(){
'<th style="text-align:center;padding:.3rem .25rem;font-weight:700">Config</th>'
+
'<th></th></tr></thead><tbody>'
+
rows
.
join
(
''
)
+
'</tbody></table>'
;
}
ggufEl
.
insertAdjacentHTML
(
'afterend'
,
_renderWhisperServerRows
(
whisperModels
));
}
catch
(
e
){
hfEl
.
innerHTML
=
ggufEl
.
innerHTML
=
`<span class="muted small">Error:
${
esc
(
e
.
message
)}
</span>`
;
}
...
...
@@ -1089,8 +1125,6 @@ async function refreshLocal(){
loadGlobalSettings
();
refreshLocal
();
loadWsStatus
();
setInterval
(
loadWsStatus
,
5000
);
async
function
clearCacheConfirm
(
type
){
const
labels
=
{
hf
:
'HuggingFace'
,
gguf
:
'GGUF'
,
all
:
'ALL'
};
...
...
@@ -1232,6 +1266,9 @@ function openCfgModal(idx){
document
.
getElementById
(
'cfg-parser'
).
value
=
s
.
parser
||
'auto'
;
document
.
getElementById
(
'cfg-tools'
).
checked
=
!!
s
.
tools_closer_prompt
;
document
.
getElementById
(
'cfg-grammar'
).
checked
=
!!
s
.
grammar_guided
;
if
(
m
.
cacheType
===
'whisper-server'
)
{
document
.
getElementById
(
'cfg-backend'
).
value
=
'cpu'
;
}
openModal
(
'cfg-modal'
);
}
...
...
@@ -1282,6 +1319,31 @@ async function saveModelConfig(){
}
catch
(
e
){
alert
(
'Error: '
+
e
.
message
);
}
}
async
function
addWhisperServerModel
(){
const
usedVram
=
parseFloat
(
document
.
getElementById
(
'ws-used-vram'
).
value
);
const
payload
=
{
model_id
:
document
.
getElementById
(
'ws-model-id'
).
value
.
trim
(),
model_type
:
'audio_models'
,
backend
:
'whisper-server'
,
server_path
:
document
.
getElementById
(
'ws-server-path'
).
value
.
trim
(),
model_path
:
document
.
getElementById
(
'ws-model-path'
).
value
.
trim
()
||
null
,
port
:
parseInt
(
document
.
getElementById
(
'ws-port'
).
value
,
10
)
||
8744
,
gpu_device
:
parseInt
(
document
.
getElementById
(
'ws-gpu-device'
).
value
,
10
)
||
0
,
load_mode
:
document
.
getElementById
(
'ws-load-mode'
).
value
,
used_vram_gb
:
Number
.
isNaN
(
usedVram
)
?
null
:
usedVram
,
};
try
{
const
r
=
await
fetch
(
'/admin/api/model-configure'
,
{
method
:
'POST'
,
headers
:{
'Content-Type'
:
'application/json'
},
body
:
JSON
.
stringify
(
payload
)
});
const
d
=
await
r
.
json
();
if
(
!
r
.
ok
)
throw
new
Error
(
d
.
detail
||
'Failed to add whisper-server model'
);
refreshLocal
();
}
catch
(
e
){
alert
(
'Error: '
+
e
.
message
);
}
}
async
function
loadModel
(
idx
){
const
m
=
_localModels
[
idx
];
// Find the button and show loading state
...
...
codai/admin/templates/settings.html
View file @
ad758123
...
...
@@ -69,48 +69,6 @@
<span
class=
"form-hint"
>
Models will inherit this as default when configured
</span>
</div>
</div>
<!-- Whisper Server -->
<div
class=
"card mb-0"
style=
"margin-top:1rem"
>
<div
style=
"display:flex;align-items:center;justify-content:space-between;flex-wrap:wrap;gap:.5rem;margin-bottom:1rem"
>
<div
class=
"card-title"
style=
"margin:0"
>
Whisper Server
<span
class=
"muted"
style=
"font-size:11px;font-weight:400"
>
(whisper.cpp native binary — recommended for AMD/Vulkan)
</span></div>
<div
style=
"display:flex;align-items:center;gap:.5rem"
>
<span
id=
"ws-badge"
class=
"muted small"
>
—
</span>
<button
class=
"btn btn-sm btn-secondary"
onclick=
"wsStart()"
>
Start
</button>
<button
class=
"btn btn-sm btn-danger"
onclick=
"wsStop()"
>
Stop
</button>
</div>
</div>
<div
style=
"display:grid;grid-template-columns:1fr 160px;gap:1rem;align-items:start"
>
<div
class=
"form-row"
style=
"margin:0"
>
<label
class=
"form-label"
>
Model ID
<span
class=
"muted"
>
(used in API calls, e.g. whisper-base)
</span></label>
<input
type=
"text"
id=
"ws-id"
class=
"form-input"
placeholder=
"whisper-server"
>
<span
class=
"form-hint"
>
The name clients use in the
<code>
model
</code>
field of transcription requests
</span>
</div>
<div
class=
"form-row"
style=
"margin:0"
>
<label
class=
"form-label"
>
Port
</label>
<input
type=
"number"
id=
"ws-port"
class=
"form-input"
placeholder=
"8744"
min=
"1024"
max=
"65535"
>
</div>
</div>
<div
style=
"display:grid;grid-template-columns:1fr 160px;gap:1rem;align-items:start;margin-top:1rem"
>
<div
class=
"form-row"
style=
"margin:0"
>
<label
class=
"form-label"
>
whisper-server binary path
</label>
<input
type=
"text"
id=
"ws-path"
class=
"form-input"
placeholder=
"/usr/local/bin/whisper-server"
>
</div>
<div
class=
"form-row"
style=
"margin:0"
>
<label
class=
"form-label"
>
GPU device index
</label>
<input
type=
"number"
id=
"ws-gpu"
class=
"form-input"
placeholder=
"0"
min=
"0"
>
</div>
</div>
<div
class=
"form-row"
style=
"margin-top:1rem;margin-bottom:0"
>
<label
class=
"form-label"
>
Model path
<span
class=
"muted"
>
(GGUF whisper model, e.g. ggml-base.bin)
</span></label>
<input
type=
"text"
id=
"ws-model"
class=
"form-input"
placeholder=
"/path/to/ggml-base.bin"
>
<span
class=
"form-hint"
>
Configure multiple instances by adding entries to
<code>
models.json
</code>
with
<code>
"backend": "whisper-server"
</code></span>
</div>
<p
class=
"form-hint"
style=
"margin-top:.75rem;margin-bottom:0"
>
When configured, the transcription endpoint uses this subprocess instead of the Python faster-whisper module.
Saves settings to
<code>
config.json
</code>
and takes effect immediately (no restart needed).
</p>
</div>
{% endblock %}
{% block scripts %}
...
...
@@ -140,65 +98,10 @@ async function loadSettings(){
document
.
getElementById
(
's-hf-cache'
).
value
=
d
.
models
?.
hf_cache_dir
??
''
;
document
.
getElementById
(
's-gguf-cache'
).
value
=
d
.
models
?.
gguf_cache_dir
??
''
;
document
.
getElementById
(
's-offload-dir'
).
value
=
d
.
offload
?.
directory
??
'./offload'
;
document
.
getElementById
(
'ws-path'
).
value
=
d
.
whisper
?.
server_path
??
''
;
document
.
getElementById
(
'ws-port'
).
value
=
d
.
whisper
?.
server_port
??
8744
;
toggleHttps
();
}
catch
(
e
){
showAlert
(
'error'
,
'Failed to load settings: '
+
e
.
message
);
}
}
async
function
loadWsStatus
(){
try
{
const
s
=
await
fetch
(
'/admin/api/whisper-server/status'
).
then
(
r
=>
r
.
json
());
const
badge
=
document
.
getElementById
(
'ws-badge'
);
// s is now a dict of {model_id: {running, model, url}}
const
entries
=
Object
.
entries
(
s
);
if
(
!
entries
.
length
){
badge
.
textContent
=
'○ not configured'
;
badge
.
style
.
color
=
'var(--text-2)'
;
return
;
}
const
running
=
entries
.
filter
(([,
v
])
=>
v
.
running
);
if
(
running
.
length
){
badge
.
textContent
=
`●
${
running
.
length
}
running`
;
badge
.
style
.
color
=
'var(--green, #4ade80)'
;
}
else
{
badge
.
textContent
=
'○ stopped'
;
badge
.
style
.
color
=
'var(--text-2)'
;
}
}
catch
(
e
){}
}
async
function
wsStart
(){
const
path
=
document
.
getElementById
(
'ws-path'
).
value
.
trim
();
if
(
!
path
){
showAlert
(
'error'
,
'Binary path required'
);
return
;
}
try
{
const
r
=
await
fetch
(
'/admin/api/whisper-server/start'
,{
method
:
'POST'
,
headers
:{
'Content-Type'
:
'application/json'
},
body
:
JSON
.
stringify
({
model_id
:
document
.
getElementById
(
'ws-id'
).
value
.
trim
()
||
'whisper-server'
,
server_path
:
path
,
model_path
:
document
.
getElementById
(
'ws-model'
).
value
.
trim
()
||
null
,
port
:
parseInt
(
document
.
getElementById
(
'ws-port'
).
value
)
||
8744
,
gpu_device
:
parseInt
(
document
.
getElementById
(
'ws-gpu'
).
value
)
||
0
,
})
});
const
d
=
await
r
.
json
();
if
(
d
.
success
)
showAlert
(
'info'
,
'whisper-server started'
);
else
showAlert
(
'error'
,
'Failed to start whisper-server'
);
loadWsStatus
();
}
catch
(
e
){
showAlert
(
'error'
,
'Error: '
+
e
.
message
);
}
}
async
function
wsStop
(){
const
modelId
=
document
.
getElementById
(
'ws-id'
).
value
.
trim
()
||
'whisper-server'
;
await
fetch
(
'/admin/api/whisper-server/stop'
,{
method
:
'POST'
,
headers
:{
'Content-Type'
:
'application/json'
},
body
:
JSON
.
stringify
({
model_id
:
modelId
})
});
showAlert
(
'info'
,
'whisper-server stopped'
);
loadWsStatus
();
}
async
function
saveSettings
(){
const
strOrNull
=
id
=>
document
.
getElementById
(
id
).
value
.
trim
()
||
null
;
const
data
=
{
...
...
@@ -216,11 +119,7 @@ async function saveSettings(){
},
offload
:{
directory
:
document
.
getElementById
(
's-offload-dir'
).
value
.
trim
()
||
'./offload'
,
},
whisper
:{
server_path
:
document
.
getElementById
(
'ws-path'
).
value
.
trim
()
||
null
,
server_port
:
parseInt
(
document
.
getElementById
(
'ws-port'
).
value
)
||
8744
,
},
}
};
try
{
const
r
=
await
fetch
(
'/admin/api/settings'
,{
...
...
@@ -233,7 +132,5 @@ async function saveSettings(){
}
loadSettings
();
loadWsStatus
();
setInterval
(
loadWsStatus
,
5000
);
</script>
{% endblock %}
codai/api/transcriptions.py
View file @
ad758123
...
...
@@ -134,33 +134,30 @@ async def create_transcription(
if
len
(
file_content
)
>
_MAX_AUDIO_BYTES
:
raise
HTTPException
(
status_code
=
413
,
detail
=
"Audio file too large (max 100 MB)"
)
# Check if the requested model is a whisper-server instance
wsm
=
multi_model_manager
.
whisper_servers
.
get
(
model
)
if
wsm
is
None
and
multi_model_manager
.
whisper_server
is
not
None
:
# Legacy single-instance fallback: use it if no specific match
if
not
multi_model_manager
.
whisper_servers
:
wsm
=
multi_model_manager
.
whisper_server
if
wsm
is
not
None
:
ws_key
=
f
"audio:{model}"
if
model
in
multi_model_manager
.
whisper_servers
else
"audio:whisper-server"
# Let the VRAM manager evict other models if needed
# Check if the requested model maps to a configured whisper-server instance first
whisper_server
=
multi_model_manager
.
whisper_servers
.
get
(
model
)
if
whisper_server
is
not
None
:
multi_model_manager
.
request_model
(
requested_model
=
model
,
model_type
=
"audio"
)
# Start the subprocess if it isn't running (on-demand)
if
not
wsm
.
is_running
():
wsm
.
start
(
getattr
(
wsm
,
'_model_path'
,
None
),
gpu_device
=
getattr
(
wsm
,
'_gpu_device'
,
0
))
if
wsm
.
is_running
():
multi_model_manager
.
models
[
ws_key
]
=
wsm
if
not
whisper_server
.
is_running
():
whisper_server
.
start
(
getattr
(
whisper_server
,
"_model_path"
,
None
),
gpu_device
=
getattr
(
whisper_server
,
"_gpu_device"
,
0
),
)
if
whisper_server
.
is_running
():
ws_key
=
f
"audio:{model}"
multi_model_manager
.
models
[
ws_key
]
=
whisper_server
multi_model_manager
.
active_in_vram
=
ws_key
multi_model_manager
.
models_in_vram
.
add
(
ws_key
)
if
wsm
.
is_running
():
result
=
wsm
.
transcribe
(
file_content
,
language
=
language
,
prompt
=
prompt
)
if
"error"
in
result
:
raise
HTTPException
(
status_code
=
500
,
detail
=
result
[
"error"
])
return
_format_response
(
response_format
,
result
.
get
(
"text"
,
""
),
[])
# Fall through to Python backends if subprocess failed to start
if
not
whisper_server
.
is_running
():
raise
HTTPException
(
status_code
=
500
,
detail
=
"whisper-server failed to start"
)
result
=
whisper_server
.
transcribe
(
file_content
,
language
=
language
,
prompt
=
prompt
)
if
"error"
in
result
:
raise
HTTPException
(
status_code
=
500
,
detail
=
result
[
"error"
])
return
_format_response
(
response_format
,
result
.
get
(
"text"
,
""
),
[])
# Use the manager to resolve the model and manage VRAM
model_info
=
multi_model_manager
.
request_model
(
...
...
@@ -265,4 +262,4 @@ async def create_transcription(
try
:
os
.
unlink
(
tmp_path
)
except
Exception
:
pass
\ No newline at end of file
pass
codai/config.py
View file @
ad758123
...
...
@@ -344,10 +344,6 @@ class ConfigManager:
"vae_tiling"
:
self
.
config
.
image
.
vae_tiling
,
"clip_on_cpu"
:
self
.
config
.
image
.
clip_on_cpu
},
"whisper"
:
{
"server_path"
:
self
.
config
.
whisper
.
server_path
,
"server_port"
:
self
.
config
.
whisper
.
server_port
},
"system_prompt"
:
self
.
config
.
system_prompt
,
"tools_closer_prompt"
:
self
.
config
.
tools_closer_prompt
,
"grammar_guided"
:
self
.
config
.
grammar_guided
,
...
...
@@ -377,4 +373,4 @@ class ConfigManager:
def
reload
(
self
):
"""Reload all configuration files."""
return
self
.
load
()
\ No newline at end of file
return
self
.
load
()
codai/main.py
View file @
ad758123
...
...
@@ -370,16 +370,21 @@ def main():
mid
=
_model_id
(
m
)
if
not
mid
:
continue
backend
=
m
.
get
(
"backend"
,
""
)
if
isinstance
(
m
,
dict
)
else
""
if
backend
==
"whisper-server"
:
# Register as a whisper-server instance
if
isinstance
(
m
,
dict
)
and
m
.
get
(
"backend"
)
==
"whisper-server"
:
cfg
=
_model_cfg
(
m
,
"audio"
)
cfg
.
update
({
"backend"
:
"whisper-server"
,
"server_path"
:
m
.
get
(
"server_path"
,
""
),
"model_path"
:
m
.
get
(
"model_path"
)
or
None
,
"port"
:
int
(
m
.
get
(
"port"
,
8744
)),
"gpu_device"
:
int
(
m
.
get
(
"gpu_device"
,
0
)),
})
multi_model_manager
.
register_whisper_server
(
model_id
=
mid
,
server_path
=
m
.
get
(
"server_path"
,
config
.
whisper
.
server_path
or
""
),
server_path
=
m
.
get
(
"server_path"
,
""
),
model_path
=
m
.
get
(
"model_path"
)
or
None
,
port
=
int
(
m
.
get
(
"port"
,
config
.
whisper
.
server_port
)),
gpu_device
=
int
(
m
.
get
(
"gpu_device"
,
config
.
vulkan
.
device_id
)),
port
=
int
(
m
.
get
(
"port"
,
8744
)),
gpu_device
=
int
(
m
.
get
(
"gpu_device"
,
0
)),
config
=
cfg
,
)
else
:
...
...
@@ -680,4 +685,4 @@ def main():
if
__name__
==
"__main__"
:
main
()
\ No newline at end of file
main
()
codai/models/manager.py
View file @
ad758123
...
...
@@ -450,6 +450,7 @@ class MultiModelManager:
self
.
whisper_server
.
stop
()
except
Exception
:
pass
self
.
whisper_servers
.
clear
()
# Clear all model lists
self
.
default_model
=
None
...
...
@@ -649,6 +650,10 @@ class MultiModelManager:
self
.
audio_models
.
append
(
model_name
)
self
.
config
[
f
"audio:{model_name}"
]
=
config
or
{}
if
isinstance
(
config
,
dict
)
and
config
.
get
(
"backend"
)
==
"whisper-server"
:
print
(
f
"Registered whisper-server audio model: {model_name}"
)
return
# Download/cache the model at startup if it's a URL or HF ID
resolved_model
=
self
.
load_model
(
model_name
)
if
resolved_model
!=
model_name
:
...
...
@@ -1803,16 +1808,22 @@ class MultiModelManager:
"embedding_models"
:
"embedding"
,
}
def
_add
(
model_id
:
str
,
model_type
:
str
=
None
):
def
_add
(
model_id
:
str
,
model_type
:
str
=
None
,
meta
:
Dict
[
str
,
Any
]
=
None
):
if
model_id
in
seen_ids
:
return
seen_ids
.
add
(
model_id
)
caps
=
detect_model_capabilities
(
model_id
)
resolved_type
=
model_type
or
(
caps
.
to_list
()[
0
]
.
split
(
"_"
)[
0
]
if
caps
.
to_list
()
else
"text"
)
meta
=
meta
or
{}
models
.
append
(
ModelInfo
(
id
=
model_id
,
type
=
resolved_type
,
capabilities
=
caps
.
to_list
(),
backend
=
meta
.
get
(
"backend"
),
model_path
=
meta
.
get
(
"model_path"
),
port
=
meta
.
get
(
"port"
),
gpu_device
=
meta
.
get
(
"gpu_device"
),
load_mode
=
meta
.
get
(
"load_mode"
),
))
# --- Models from config (the authoritative source) ---
...
...
@@ -1831,15 +1842,15 @@ class MultiModelManager:
mid
=
m
.
get
(
"alias"
)
or
m
.
get
(
"path"
)
or
m
.
get
(
"id"
)
or
""
raw
=
m
.
get
(
"path"
)
or
m
.
get
(
"id"
)
or
""
if
raw
and
raw
!=
mid
:
_add
(
raw
,
mtype
)
_add
(
raw
,
mtype
,
m
)
short
=
raw
.
split
(
"/"
)[
-
1
]
if
"/"
in
raw
else
raw
if
short
!=
raw
:
_add
(
short
,
mtype
)
_add
(
short
,
mtype
,
m
)
if
mid
:
_add
(
mid
,
mtype
)
_add
(
mid
,
mtype
,
m
if
isinstance
(
m
,
dict
)
else
None
)
short
=
mid
.
split
(
"/"
)[
-
1
]
if
"/"
in
mid
else
mid
if
short
!=
mid
:
_add
(
short
,
mtype
)
_add
(
short
,
mtype
,
m
if
isinstance
(
m
,
dict
)
else
None
)
except
Exception
:
pass
...
...
@@ -1901,4 +1912,4 @@ class MultiModelManager:
# Global singleton instances for convenience
model_manager
=
ModelManager
()
multi_model_manager
=
MultiModelManager
()
\ No newline at end of file
multi_model_manager
=
MultiModelManager
()
codai/pydantic/textrequest.py
View file @
ad758123
...
...
@@ -121,8 +121,13 @@ class ModelInfo(BaseModel):
owned_by
:
str
=
"huggingface"
type
:
Optional
[
str
]
=
None
# e.g. "text", "image", "video", "audio", "tts", "vision", "embedding"
capabilities
:
Optional
[
List
[
str
]]
=
None
# list of capability strings
backend
:
Optional
[
str
]
=
None
model_path
:
Optional
[
str
]
=
None
port
:
Optional
[
int
]
=
None
gpu_device
:
Optional
[
int
]
=
None
load_mode
:
Optional
[
str
]
=
None
class
ModelList
(
BaseModel
):
object
:
str
=
"list"
data
:
List
[
ModelInfo
]
\ No newline at end of file
data
:
List
[
ModelInfo
]
tests/test_whisper_server_local_models.py
0 → 100644
View file @
ad758123
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment