Add configurable VRAM overhead per model for worker selection

11d83f87 · Stefy Lanza (nextime / spora ) · 83ee5f79 · 11d83f87 · 11d83f87 · 11d83f87
Commit 11d83f87 authored Oct 09, 2025 by Stefy Lanza (nextime / spora )
Hide whitespace changes
Inline Side-by-side

Showing with 56 additions and 24 deletions

cluster_master.py vidai/cluster_master.py +2 -2

database.py vidai/database.py +21 -6

models.py vidai/models.py +33 -16

No files found.
--- a/vidai/cluster_master.py
+++ b/vidai/cluster_master.py
@@ -1117,9 +1117,9 @@ class ClusterMaster:
        print(f"DEBUG: Available processes: {list(self.processes.keys())}")
        print(f"DEBUG: Process queue for {process_type}: {self.process_queue.get(process_type, [])}")

-        # Step 1: Determine VRAM required for the model
+        # Step 1: Determine VRAM required for the model (includes overhead)
        required_vram_gb = estimate_model_vram_requirements(model_path)
-        print(f"DEBUG: Required VRAM: {required_vram_gb}GB")
+        print(f"DEBUG: Required VRAM: {required_vram_gb}GB (includes overhead)")

        # Step 2: Determine workers with sufficient GPU memory
        available_workers = []

--- a/vidai/database.py
+++ b/vidai/database.py
@@ -547,6 +547,7 @@ def init_db(conn) -> None:
                type VARCHAR(50) NOT NULL,
                path TEXT NOT NULL UNIQUE,
                vram_estimate INT DEFAULT 0,
+                vram_overhead_gb INT DEFAULT 6,
                available BOOLEAN DEFAULT 0,
                capabilities TEXT,
                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
@@ -561,6 +562,7 @@ def init_db(conn) -> None:
                type TEXT NOT NULL,
                path TEXT NOT NULL UNIQUE,
                vram_estimate INTEGER DEFAULT 0,
+                vram_overhead_gb INTEGER DEFAULT 6,
                available BOOLEAN DEFAULT 0,
                capabilities TEXT,
                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
@@ -588,6 +590,16 @@ def init_db(conn) -> None:
        # Column might already exist
        pass

+    # Add vram_overhead_gb column if it doesn't exist
+    try:
+        if config['type'] == 'mysql':
+            cursor.execute('ALTER TABLE models ADD COLUMN vram_overhead_gb INT DEFAULT 6')
+        else:
+            cursor.execute('ALTER TABLE models ADD COLUMN vram_overhead_gb INTEGER DEFAULT 6')
+    except:
+        # Column might already exist
+        pass
+
    # Clean up duplicate models (keep only the first one for each path)
    if config['type'] == 'mysql':
        cursor.execute('''
@@ -2079,13 +2091,13 @@ def get_available_models() -> List[Dict[str, Any]]:
    return [dict(row) for row in rows]


-def create_model(name: str, model_type: str, path: str, vram_estimate: int = 0, available: bool = False, capabilities: str = '') -> bool:
+def create_model(name: str, model_type: str, path: str, vram_estimate: int = 0, vram_overhead_gb: int = 6, available: bool = False, capabilities: str = '') -> bool:
    """Create a new model."""
    conn = get_db_connection()
    cursor = conn.cursor()
    try:
-        cursor.execute('INSERT INTO models (name, type, path, vram_estimate, available, capabilities) VALUES (?, ?, ?, ?, ?, ?)',
-                       (name, model_type, path, vram_estimate, 1 if available else 0, capabilities))
+        cursor.execute('INSERT INTO models (name, type, path, vram_estimate, vram_overhead_gb, available, capabilities) VALUES (?, ?, ?, ?, ?, ?, ?)',
+                       (name, model_type, path, vram_estimate, vram_overhead_gb, 1 if available else 0, capabilities))
        conn.commit()
        return True
    except sqlite3.IntegrityError:
@@ -2094,7 +2106,7 @@ def create_model(name: str, model_type: str, path: str, vram_estimate: int = 0,
        conn.close()


-def update_model(model_id: int, name: str = None, model_type: str = None, path: str = None, vram_estimate: int = None, available: bool = None, capabilities: str = None) -> bool:
+def update_model(model_id: int, name: str = None, model_type: str = None, path: str = None, vram_estimate: int = None, vram_overhead_gb: int = None, available: bool = None, capabilities: str = None) -> bool:
    """Update a model."""
    conn = get_db_connection()
    cursor = conn.cursor()
@@ -2114,6 +2126,9 @@ def update_model(model_id: int, name: str = None, model_type: str = None, path:
    if vram_estimate is not None:
        update_fields.append('vram_estimate = ?')
        params.append(vram_estimate)
+    if vram_overhead_gb is not None:
+        update_fields.append('vram_overhead_gb = ?')
+        params.append(vram_overhead_gb)
    if available is not None:
        update_fields.append('available = ?')
        params.append(1 if available else 0)
@@ -2172,8 +2187,8 @@ def ensure_model_exists(name: str, model_type: str, path: str, vram_estimate: in
            capabilities = detect_model_capabilities(path)

        # Create the model
-        cursor.execute('INSERT INTO models (name, type, path, vram_estimate, available, capabilities) VALUES (?, ?, ?, ?, ?, ?)',
-                       (name, model_type, path, vram_estimate, 1 if available else 0, capabilities))
+        cursor.execute('INSERT INTO models (name, type, path, vram_estimate, vram_overhead_gb, available, capabilities) VALUES (?, ?, ?, ?, ?, ?, ?)',
+                       (name, model_type, path, vram_estimate, 6, 1 if available else 0, capabilities))
        conn.commit()
    else:
        # Update availability if it's not already available

--- a/vidai/models.py
+++ b/vidai/models.py
@@ -320,44 +320,61 @@ def unload_all_models() -> None:


 def estimate_model_vram_requirements(model_path: str) -> int:
-    """Estimate VRAM requirements for a model in GB."""
+    """Estimate VRAM requirements for a model in GB, including overhead."""
+    # First, try to get from database
+    try:
+        from .database import get_db_connection
+        conn = get_db_connection()
+        cursor = conn.cursor()
+        cursor.execute('SELECT vram_estimate, vram_overhead_gb FROM models WHERE path = ?', (model_path,))
+        row = cursor.fetchone()
+        conn.close()
+
+        if row:
+            base_vram = row['vram_estimate'] or 0
+            overhead = row['vram_overhead_gb'] or 6
+            return base_vram + overhead
+    except:
+        pass  # Fall back to estimation
+
+    # Fallback estimation
    model_path_lower = model_path.lower()

    # Vision-language models
    if 'qwen' in model_path_lower and ('vl' in model_path_lower or 'vision' in model_path_lower):
        if '7b' in model_path_lower:
-            return 16  # Qwen2.5-VL-7B requires ~16GB VRAM
+            return 16 + 6  # Qwen2.5-VL-7B requires ~16GB VRAM + 6GB overhead
        elif '3b' in model_path_lower:
-            return 8   # Qwen2.5-VL-3B requires ~8GB VRAM
+            return 8 + 6   # Qwen2.5-VL-3B requires ~8GB VRAM + 6GB overhead
        elif '72b' in model_path_lower:
-            return 144 # Qwen2.5-VL-72B requires ~144GB VRAM
+            return 144 + 6 # Qwen2.5-VL-72B requires ~144GB VRAM + 6GB overhead
        else:
-            return 24  # Default for other Qwen VL models
+            return 24 + 6  # Default for other Qwen VL models + 6GB overhead

    # Text-only models
    elif 'llama' in model_path_lower:
        if '70b' in model_path_lower:
-            return 40
+            return 40 + 6
        elif '65b' in model_path_lower:
-            return 32
+            return 32 + 6
        elif '30b' in model_path_lower:
-            return 16
+            return 16 + 6
        elif '13b' in model_path_lower:
-            return 8
+            return 8 + 6
        elif '7b' in model_path_lower:
-            return 4
+            return 4 + 6
        else:
-            return 16
+            return 16 + 6

    elif 'mistral' in model_path_lower:
        if '7b' in model_path_lower:
-            return 4
+            return 4 + 6
        else:
-            return 8
+            return 8 + 6

    elif 'gpt' in model_path_lower or 'chatgpt' in model_path_lower:
        # These are typically API-based, but if local
-        return 8
+        return 8 + 6

-    # Default estimate
-    return 8
\ No newline at end of file
+    # Default estimate + overhead
+    return 8 + 6
\ No newline at end of file