Better model vram requirements estimation

c2915d44 · Stefy Lanza (nextime / spora ) · 71ba3d0d · c2915d44 · c2915d44
Commit c2915d44 authored May 08, 2026 by Stefy Lanza (nextime / spora )
Hide whitespace changes
Inline Side-by-side

Showing with 53 additions and 18 deletions

routes.py codai/admin/routes.py +18 -4

models.html codai/admin/templates/models.html +35 -14

No files found.
--- a/codai/admin/routes.py
+++ b/codai/admin/routes.py
@@ -1823,7 +1823,7 @@ async def api_hf_search(
        if effective_q:
            pairs.append(("search", effective_q))
        pairs.extend(filter_pairs)
-        pairs += [("sort", sort), ("direction", "-1"), ("limit", limit), ("full", "false")]
+        pairs += [("sort", sort), ("direction", "-1"), ("limit", limit), ("full", "true")]
        url = "https://huggingface.co/api/models?" + urllib.parse.urlencode(pairs)
        rq = urllib.request.Request(url, headers={"User-Agent": "coderai-admin/1.0"})
        def _fetch():
@@ -1857,12 +1857,14 @@ async def api_hf_search(
            merged = [m for m in merged if "gguf" not in (m.get("modelId") or m.get("id", "")).lower()]

        # Get VRAM info
-        vram_gb = None
+        vram_total_gb = None
+        vram_free_gb = None
        try:
            import torch
            if torch.cuda.is_available():
                free, total = torch.cuda.mem_get_info()
-                vram_gb = round(free / 1e9, 2)
+                vram_total_gb = round(total / 1e9, 2)
+                vram_free_gb = round(free / 1e9, 2)
        except Exception:
            pass

@@ -1876,12 +1878,24 @@ async def api_hf_search(
            # Only cache when pipeline_tag gave us authoritative information
            if m.get("pipeline_tag"):
                update_capability_cache(mid, caps)
+            # Estimate size from safetensors metadata when available
+            safetensors_size_gb = None
+            sf = m.get("safetensors") or {}
+            total_params = sf.get("total", 0)
+            if total_params:
+                params_by_dtype = sf.get("parameters") or {}
+                dominant = max(params_by_dtype, key=params_by_dtype.get) if params_by_dtype else "BF16"
+                bpp = {"F32": 4, "F16": 2, "BF16": 2, "F8_E4M3": 1, "F8_E5M2": 1, "I8": 1, "I4": 0.5, "U8": 1}.get(dominant, 2)
+                safetensors_size_gb = round(total_params * bpp / 1e9, 2)
+
            results.append({
                "id": mid,
                "downloads": m.get("downloads", 0),
                "likes": m.get("likes", 0),
                "pipeline_tag": m.get("pipeline_tag", ""),
-                "vram_available": vram_gb,
+                "vram_total": vram_total_gb,
+                "vram_free": vram_free_gb,
+                "safetensors_size_gb": safetensors_size_gb,
                "capabilities": caps.to_list(),
            })
        return results

--- a/codai/admin/templates/models.html
+++ b/codai/admin/templates/models.html
@@ -674,16 +674,33 @@ let _filesCache = {};
 let _activeQuants = new Set();
 let _cachedSearchIds = new Set();  // HF repo IDs (and GGUF source_repos) cached locally

-function estimateModelSize(modelId){
+function estimateModelSize(modelId, safetensorsSizeGb){
+  // 1. Safetensors metadata from HF API (most accurate)
+  if(safetensorsSizeGb != null) return safetensorsSizeGb;
+
  const id = modelId.toLowerCase();
-  // Extract parameter count (e.g., 7b, 13b, 70b)
+
+  // 2. Xb parameter count in model name
  const match = id.match(/(\d+\.?\d*)b/);
-  if(!match) return 8; // default guess
-  const params = parseFloat(match[1]);
-  // Rough estimate: Q4 ≈ 0.5GB per B params, Q8 ≈ 1GB per B, FP16 ≈ 2GB per B
-  if(id.includes('q4') || id.includes('4bit')) return params * 0.5;
-  if(id.includes('q8') || id.includes('8bit')) return params * 1.0;
-  return params * 2; // assume FP16
+  if(match){
+    const params = parseFloat(match[1]);
+    if(id.includes('q4') || id.includes('4bit')) return params * 0.5;
+    if(id.includes('q8') || id.includes('8bit')) return params * 1.0;
+    return params * 2; // assume FP16
+  }
+
+  // 3. Keyword heuristics for models without a param count in their name
+  if(/\b(tiny|nano|micro)\b/.test(id)) return 0.1;
+  if(/\bsmall\b/.test(id))             return 0.3;
+  if(/\bbase\b/.test(id))              return 0.5;
+  if(/\bmedium\b/.test(id))            return 1.0;
+  if(/\blarge[-_v]/.test(id) && /v[23]/.test(id)) return 3.0; // large-v2/v3
+  if(/\blarge\b/.test(id))             return 2.0;
+  if(/\bxxl\b/.test(id))               return 10.0;
+  if(/\bxl\b/.test(id))                return 5.0;
+  if(/\bhuge\b/.test(id))              return 7.0;
+
+  return null;
 }

 document.getElementById('search-q').addEventListener('keydown',e=>{if(e.key==='Enter')doSearch()});
@@ -727,14 +744,18 @@ async function doSearch(){
    
    if(!_results.length){out.innerHTML='<span class="muted small">No results. Try different keywords or fewer filters.</span>';return}
    
-    const vramAvail = _results[0]?.vram_available;
-    
+    const vramTotal = _results[0]?.vram_total;
+    const vramFree  = _results[0]?.vram_free;
+
    out.innerHTML = _results.map((m,i)=>{
      let vramDot = '';
-      if(vramAvail){
-        const estSize = estimateModelSize(m.id);
-        const color = estSize <= vramAvail*0.8 ? '#10b981' : estSize <= vramAvail*0.95 ? '#f59e0b' : '#ef4444';
-        vramDot = `<span style="display:inline-block;width:8px;height:8px;border-radius:50%;background:${color};margin-right:.35rem" title="Est. ${estSize}GB / ${vramAvail}GB available"></span>`;
+      if(vramTotal){
+        const estSize = estimateModelSize(m.id, m.safetensors_size_gb ?? null);
+        if(estSize !== null){
+          const color = estSize <= vramTotal*0.8 ? '#10b981' : estSize <= vramTotal*0.95 ? '#f59e0b' : '#ef4444';
+          const freeHint = vramFree != null ? ` — ${vramFree}GB free now` : '';
+          vramDot = `<span style="display:inline-block;width:8px;height:8px;border-radius:50%;background:${color};margin-right:.35rem" title="Est. ~${estSize}GB / ${vramTotal}GB total${freeHint}"></span>`;
+        }
      }
      const capBadges = fmtCapabilities(m.capabilities||[]);
      const isDownloaded = _cachedSearchIds.has(m.id);