Add configurable VRAM overhead per model for worker selection

parent 83ee5f79
......@@ -1117,9 +1117,9 @@ class ClusterMaster:
print(f"DEBUG: Available processes: {list(self.processes.keys())}")
print(f"DEBUG: Process queue for {process_type}: {self.process_queue.get(process_type, [])}")
# Step 1: Determine VRAM required for the model
# Step 1: Determine VRAM required for the model (includes overhead)
required_vram_gb = estimate_model_vram_requirements(model_path)
print(f"DEBUG: Required VRAM: {required_vram_gb}GB")
print(f"DEBUG: Required VRAM: {required_vram_gb}GB (includes overhead)")
# Step 2: Determine workers with sufficient GPU memory
available_workers = []
......
......@@ -547,6 +547,7 @@ def init_db(conn) -> None:
type VARCHAR(50) NOT NULL,
path TEXT NOT NULL UNIQUE,
vram_estimate INT DEFAULT 0,
vram_overhead_gb INT DEFAULT 6,
available BOOLEAN DEFAULT 0,
capabilities TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
......@@ -561,6 +562,7 @@ def init_db(conn) -> None:
type TEXT NOT NULL,
path TEXT NOT NULL UNIQUE,
vram_estimate INTEGER DEFAULT 0,
vram_overhead_gb INTEGER DEFAULT 6,
available BOOLEAN DEFAULT 0,
capabilities TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
......@@ -588,6 +590,16 @@ def init_db(conn) -> None:
# Column might already exist
pass
# Add vram_overhead_gb column if it doesn't exist
try:
if config['type'] == 'mysql':
cursor.execute('ALTER TABLE models ADD COLUMN vram_overhead_gb INT DEFAULT 6')
else:
cursor.execute('ALTER TABLE models ADD COLUMN vram_overhead_gb INTEGER DEFAULT 6')
except:
# Column might already exist
pass
# Clean up duplicate models (keep only the first one for each path)
if config['type'] == 'mysql':
cursor.execute('''
......@@ -2079,13 +2091,13 @@ def get_available_models() -> List[Dict[str, Any]]:
return [dict(row) for row in rows]
def create_model(name: str, model_type: str, path: str, vram_estimate: int = 0, available: bool = False, capabilities: str = '') -> bool:
def create_model(name: str, model_type: str, path: str, vram_estimate: int = 0, vram_overhead_gb: int = 6, available: bool = False, capabilities: str = '') -> bool:
"""Create a new model."""
conn = get_db_connection()
cursor = conn.cursor()
try:
cursor.execute('INSERT INTO models (name, type, path, vram_estimate, available, capabilities) VALUES (?, ?, ?, ?, ?, ?)',
(name, model_type, path, vram_estimate, 1 if available else 0, capabilities))
cursor.execute('INSERT INTO models (name, type, path, vram_estimate, vram_overhead_gb, available, capabilities) VALUES (?, ?, ?, ?, ?, ?, ?)',
(name, model_type, path, vram_estimate, vram_overhead_gb, 1 if available else 0, capabilities))
conn.commit()
return True
except sqlite3.IntegrityError:
......@@ -2094,7 +2106,7 @@ def create_model(name: str, model_type: str, path: str, vram_estimate: int = 0,
conn.close()
def update_model(model_id: int, name: str = None, model_type: str = None, path: str = None, vram_estimate: int = None, available: bool = None, capabilities: str = None) -> bool:
def update_model(model_id: int, name: str = None, model_type: str = None, path: str = None, vram_estimate: int = None, vram_overhead_gb: int = None, available: bool = None, capabilities: str = None) -> bool:
"""Update a model."""
conn = get_db_connection()
cursor = conn.cursor()
......@@ -2114,6 +2126,9 @@ def update_model(model_id: int, name: str = None, model_type: str = None, path:
if vram_estimate is not None:
update_fields.append('vram_estimate = ?')
params.append(vram_estimate)
if vram_overhead_gb is not None:
update_fields.append('vram_overhead_gb = ?')
params.append(vram_overhead_gb)
if available is not None:
update_fields.append('available = ?')
params.append(1 if available else 0)
......@@ -2172,8 +2187,8 @@ def ensure_model_exists(name: str, model_type: str, path: str, vram_estimate: in
capabilities = detect_model_capabilities(path)
# Create the model
cursor.execute('INSERT INTO models (name, type, path, vram_estimate, available, capabilities) VALUES (?, ?, ?, ?, ?, ?)',
(name, model_type, path, vram_estimate, 1 if available else 0, capabilities))
cursor.execute('INSERT INTO models (name, type, path, vram_estimate, vram_overhead_gb, available, capabilities) VALUES (?, ?, ?, ?, ?, ?, ?)',
(name, model_type, path, vram_estimate, 6, 1 if available else 0, capabilities))
conn.commit()
else:
# Update availability if it's not already available
......
......@@ -320,44 +320,61 @@ def unload_all_models() -> None:
def estimate_model_vram_requirements(model_path: str) -> int:
"""Estimate VRAM requirements for a model in GB."""
"""Estimate VRAM requirements for a model in GB, including overhead."""
# First, try to get from database
try:
from .database import get_db_connection
conn = get_db_connection()
cursor = conn.cursor()
cursor.execute('SELECT vram_estimate, vram_overhead_gb FROM models WHERE path = ?', (model_path,))
row = cursor.fetchone()
conn.close()
if row:
base_vram = row['vram_estimate'] or 0
overhead = row['vram_overhead_gb'] or 6
return base_vram + overhead
except:
pass # Fall back to estimation
# Fallback estimation
model_path_lower = model_path.lower()
# Vision-language models
if 'qwen' in model_path_lower and ('vl' in model_path_lower or 'vision' in model_path_lower):
if '7b' in model_path_lower:
return 16 # Qwen2.5-VL-7B requires ~16GB VRAM
return 16 + 6 # Qwen2.5-VL-7B requires ~16GB VRAM + 6GB overhead
elif '3b' in model_path_lower:
return 8 # Qwen2.5-VL-3B requires ~8GB VRAM
return 8 + 6 # Qwen2.5-VL-3B requires ~8GB VRAM + 6GB overhead
elif '72b' in model_path_lower:
return 144 # Qwen2.5-VL-72B requires ~144GB VRAM
return 144 + 6 # Qwen2.5-VL-72B requires ~144GB VRAM + 6GB overhead
else:
return 24 # Default for other Qwen VL models
return 24 + 6 # Default for other Qwen VL models + 6GB overhead
# Text-only models
elif 'llama' in model_path_lower:
if '70b' in model_path_lower:
return 40
return 40 + 6
elif '65b' in model_path_lower:
return 32
return 32 + 6
elif '30b' in model_path_lower:
return 16
return 16 + 6
elif '13b' in model_path_lower:
return 8
return 8 + 6
elif '7b' in model_path_lower:
return 4
return 4 + 6
else:
return 16
return 16 + 6
elif 'mistral' in model_path_lower:
if '7b' in model_path_lower:
return 4
return 4 + 6
else:
return 8
return 8 + 6
elif 'gpt' in model_path_lower or 'chatgpt' in model_path_lower:
# These are typically API-based, but if local
return 8
return 8 + 6
# Default estimate
return 8
\ No newline at end of file
# Default estimate + overhead
return 8 + 6
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment