Fix VRAM overhead calculation - use only model base VRAM + configurable overhead, not +6GB extra

parent 7725cb9b
......@@ -1119,7 +1119,7 @@ class ClusterMaster:
# Step 1: Determine VRAM required for the model (includes overhead)
required_vram_gb = estimate_model_vram_requirements(model_path)
print(f"DEBUG: Required VRAM: {required_vram_gb}GB (includes overhead)")
print(f"DEBUG: Required VRAM: {required_vram_gb}GB (includes model overhead)")
# Step 2: Determine workers with sufficient GPU memory
available_workers = []
......@@ -1184,14 +1184,13 @@ class ClusterMaster:
print(f"DEBUG: CPU-only worker detected")
# Check if worker has enough VRAM (skip check for CPU workers)
# Worker must have at least 6GB more VRAM than required
required_vram_with_buffer = required_vram_gb + 6 if has_gpu else required_vram_gb
has_sufficient_vram = total_vram >= required_vram_with_buffer or not has_gpu
# Required VRAM already includes model-specific overhead
has_sufficient_vram = total_vram >= required_vram_gb or not has_gpu
if has_sufficient_vram:
available_workers.append((proc_key, client_info['weight'], total_vram))
print(f"DEBUG: Worker {proc_key} accepted (VRAM: {total_vram}GB, required: {required_vram_with_buffer}GB)")
print(f"DEBUG: Worker {proc_key} accepted (VRAM: {total_vram}GB, required: {required_vram_gb}GB)")
else:
print(f"DEBUG: Worker {proc_key} rejected - insufficient VRAM ({total_vram}GB < {required_vram_with_buffer}GB)")
print(f"DEBUG: Worker {proc_key} rejected - insufficient VRAM ({total_vram}GB < {required_vram_gb}GB)")
if not available_workers:
return None
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment