Fix VRAM overhead calculation - use only model base VRAM + configurable overhead, not +6GB extra

3790f911 · Stefy Lanza (nextime / spora ) · 7725cb9b · 3790f911
Commit 3790f911 authored Oct 09, 2025 by Stefy Lanza (nextime / spora )
Hide whitespace changes
Inline Side-by-side

Showing with 5 additions and 6 deletions

cluster_master.py vidai/cluster_master.py +5 -6

No files found.
--- a/vidai/cluster_master.py
+++ b/vidai/cluster_master.py
@@ -1119,7 +1119,7 @@ class ClusterMaster:

        # Step 1: Determine VRAM required for the model (includes overhead)
        required_vram_gb = estimate_model_vram_requirements(model_path)
-        print(f"DEBUG: Required VRAM: {required_vram_gb}GB (includes overhead)")
+        print(f"DEBUG: Required VRAM: {required_vram_gb}GB (includes model overhead)")

        # Step 2: Determine workers with sufficient GPU memory
        available_workers = []
@@ -1184,14 +1184,13 @@ class ClusterMaster:
                print(f"DEBUG: CPU-only worker detected")

            # Check if worker has enough VRAM (skip check for CPU workers)
-            # Worker must have at least 6GB more VRAM than required
-            required_vram_with_buffer = required_vram_gb + 6 if has_gpu else required_vram_gb
-            has_sufficient_vram = total_vram >= required_vram_with_buffer or not has_gpu
+            # Required VRAM already includes model-specific overhead
+            has_sufficient_vram = total_vram >= required_vram_gb or not has_gpu
            if has_sufficient_vram:
                available_workers.append((proc_key, client_info['weight'], total_vram))
-                print(f"DEBUG: Worker {proc_key} accepted (VRAM: {total_vram}GB, required: {required_vram_with_buffer}GB)")
+                print(f"DEBUG: Worker {proc_key} accepted (VRAM: {total_vram}GB, required: {required_vram_gb}GB)")
            else:
-                print(f"DEBUG: Worker {proc_key} rejected - insufficient VRAM ({total_vram}GB < {required_vram_with_buffer}GB)")
+                print(f"DEBUG: Worker {proc_key} rejected - insufficient VRAM ({total_vram}GB < {required_vram_gb}GB)")

        if not available_workers:
            return None