Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in
Toggle navigation
C
coderai
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
nexlab
coderai
Commits
c2915d44
Commit
c2915d44
authored
May 08, 2026
by
Stefy Lanza (nextime / spora )
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Better model vram requirements estimation
parent
71ba3d0d
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
53 additions
and
18 deletions
+53
-18
routes.py
codai/admin/routes.py
+18
-4
models.html
codai/admin/templates/models.html
+35
-14
No files found.
codai/admin/routes.py
View file @
c2915d44
...
...
@@ -1823,7 +1823,7 @@ async def api_hf_search(
if
effective_q
:
pairs
.
append
((
"search"
,
effective_q
))
pairs
.
extend
(
filter_pairs
)
pairs
+=
[(
"sort"
,
sort
),
(
"direction"
,
"-1"
),
(
"limit"
,
limit
),
(
"full"
,
"
fals
e"
)]
pairs
+=
[(
"sort"
,
sort
),
(
"direction"
,
"-1"
),
(
"limit"
,
limit
),
(
"full"
,
"
tru
e"
)]
url
=
"https://huggingface.co/api/models?"
+
urllib
.
parse
.
urlencode
(
pairs
)
rq
=
urllib
.
request
.
Request
(
url
,
headers
=
{
"User-Agent"
:
"coderai-admin/1.0"
})
def
_fetch
():
...
...
@@ -1857,12 +1857,14 @@ async def api_hf_search(
merged
=
[
m
for
m
in
merged
if
"gguf"
not
in
(
m
.
get
(
"modelId"
)
or
m
.
get
(
"id"
,
""
))
.
lower
()]
# Get VRAM info
vram_gb
=
None
vram_total_gb
=
None
vram_free_gb
=
None
try
:
import
torch
if
torch
.
cuda
.
is_available
():
free
,
total
=
torch
.
cuda
.
mem_get_info
()
vram_gb
=
round
(
free
/
1e9
,
2
)
vram_total_gb
=
round
(
total
/
1e9
,
2
)
vram_free_gb
=
round
(
free
/
1e9
,
2
)
except
Exception
:
pass
...
...
@@ -1876,12 +1878,24 @@ async def api_hf_search(
# Only cache when pipeline_tag gave us authoritative information
if
m
.
get
(
"pipeline_tag"
):
update_capability_cache
(
mid
,
caps
)
# Estimate size from safetensors metadata when available
safetensors_size_gb
=
None
sf
=
m
.
get
(
"safetensors"
)
or
{}
total_params
=
sf
.
get
(
"total"
,
0
)
if
total_params
:
params_by_dtype
=
sf
.
get
(
"parameters"
)
or
{}
dominant
=
max
(
params_by_dtype
,
key
=
params_by_dtype
.
get
)
if
params_by_dtype
else
"BF16"
bpp
=
{
"F32"
:
4
,
"F16"
:
2
,
"BF16"
:
2
,
"F8_E4M3"
:
1
,
"F8_E5M2"
:
1
,
"I8"
:
1
,
"I4"
:
0.5
,
"U8"
:
1
}
.
get
(
dominant
,
2
)
safetensors_size_gb
=
round
(
total_params
*
bpp
/
1e9
,
2
)
results
.
append
({
"id"
:
mid
,
"downloads"
:
m
.
get
(
"downloads"
,
0
),
"likes"
:
m
.
get
(
"likes"
,
0
),
"pipeline_tag"
:
m
.
get
(
"pipeline_tag"
,
""
),
"vram_available"
:
vram_gb
,
"vram_total"
:
vram_total_gb
,
"vram_free"
:
vram_free_gb
,
"safetensors_size_gb"
:
safetensors_size_gb
,
"capabilities"
:
caps
.
to_list
(),
})
return
results
...
...
codai/admin/templates/models.html
View file @
c2915d44
...
...
@@ -674,16 +674,33 @@ let _filesCache = {};
let
_activeQuants
=
new
Set
();
let
_cachedSearchIds
=
new
Set
();
// HF repo IDs (and GGUF source_repos) cached locally
function
estimateModelSize
(
modelId
){
function
estimateModelSize
(
modelId
,
safetensorsSizeGb
){
// 1. Safetensors metadata from HF API (most accurate)
if
(
safetensorsSizeGb
!=
null
)
return
safetensorsSizeGb
;
const
id
=
modelId
.
toLowerCase
();
// Extract parameter count (e.g., 7b, 13b, 70b)
// 2. Xb parameter count in model name
const
match
=
id
.
match
(
/
(\d
+
\.?\d
*
)
b/
);
if
(
!
match
)
return
8
;
// default guess
const
params
=
parseFloat
(
match
[
1
]);
// Rough estimate: Q4 ≈ 0.5GB per B params, Q8 ≈ 1GB per B, FP16 ≈ 2GB per B
if
(
id
.
includes
(
'q4'
)
||
id
.
includes
(
'4bit'
))
return
params
*
0.5
;
if
(
id
.
includes
(
'q8'
)
||
id
.
includes
(
'8bit'
))
return
params
*
1.0
;
return
params
*
2
;
// assume FP16
if
(
match
){
const
params
=
parseFloat
(
match
[
1
]);
if
(
id
.
includes
(
'q4'
)
||
id
.
includes
(
'4bit'
))
return
params
*
0.5
;
if
(
id
.
includes
(
'q8'
)
||
id
.
includes
(
'8bit'
))
return
params
*
1.0
;
return
params
*
2
;
// assume FP16
}
// 3. Keyword heuristics for models without a param count in their name
if
(
/
\b(
tiny|nano|micro
)\b
/
.
test
(
id
))
return
0.1
;
if
(
/
\b
small
\b
/
.
test
(
id
))
return
0.3
;
if
(
/
\b
base
\b
/
.
test
(
id
))
return
0.5
;
if
(
/
\b
medium
\b
/
.
test
(
id
))
return
1.0
;
if
(
/
\b
large
[
-_v
]
/
.
test
(
id
)
&&
/v
[
23
]
/
.
test
(
id
))
return
3.0
;
// large-v2/v3
if
(
/
\b
large
\b
/
.
test
(
id
))
return
2.0
;
if
(
/
\b
xxl
\b
/
.
test
(
id
))
return
10.0
;
if
(
/
\b
xl
\b
/
.
test
(
id
))
return
5.0
;
if
(
/
\b
huge
\b
/
.
test
(
id
))
return
7.0
;
return
null
;
}
document
.
getElementById
(
'search-q'
).
addEventListener
(
'keydown'
,
e
=>
{
if
(
e
.
key
===
'Enter'
)
doSearch
()});
...
...
@@ -727,14 +744,18 @@ async function doSearch(){
if
(
!
_results
.
length
){
out
.
innerHTML
=
'<span class="muted small">No results. Try different keywords or fewer filters.</span>'
;
return
}
const
vramAvail
=
_results
[
0
]?.
vram_available
;
const
vramTotal
=
_results
[
0
]?.
vram_total
;
const
vramFree
=
_results
[
0
]?.
vram_free
;
out
.
innerHTML
=
_results
.
map
((
m
,
i
)
=>
{
let
vramDot
=
''
;
if
(
vramAvail
){
const
estSize
=
estimateModelSize
(
m
.
id
);
const
color
=
estSize
<=
vramAvail
*
0.8
?
'#10b981'
:
estSize
<=
vramAvail
*
0.95
?
'#f59e0b'
:
'#ef4444'
;
vramDot
=
`<span style="display:inline-block;width:8px;height:8px;border-radius:50%;background:
${
color
}
;margin-right:.35rem" title="Est.
${
estSize
}
GB /
${
vramAvail
}
GB available"></span>`
;
if
(
vramTotal
){
const
estSize
=
estimateModelSize
(
m
.
id
,
m
.
safetensors_size_gb
??
null
);
if
(
estSize
!==
null
){
const
color
=
estSize
<=
vramTotal
*
0.8
?
'#10b981'
:
estSize
<=
vramTotal
*
0.95
?
'#f59e0b'
:
'#ef4444'
;
const
freeHint
=
vramFree
!=
null
?
` —
${
vramFree
}
GB free now`
:
''
;
vramDot
=
`<span style="display:inline-block;width:8px;height:8px;border-radius:50%;background:
${
color
}
;margin-right:.35rem" title="Est. ~
${
estSize
}
GB /
${
vramTotal
}
GB total
${
freeHint
}
"></span>`
;
}
}
const
capBadges
=
fmtCapabilities
(
m
.
capabilities
||
[]);
const
isDownloaded
=
_cachedSearchIds
.
has
(
m
.
id
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment