Add configurable VRAM overhead per model for worker selection

parent 83ee5f79
...@@ -1117,9 +1117,9 @@ class ClusterMaster: ...@@ -1117,9 +1117,9 @@ class ClusterMaster:
print(f"DEBUG: Available processes: {list(self.processes.keys())}") print(f"DEBUG: Available processes: {list(self.processes.keys())}")
print(f"DEBUG: Process queue for {process_type}: {self.process_queue.get(process_type, [])}") print(f"DEBUG: Process queue for {process_type}: {self.process_queue.get(process_type, [])}")
# Step 1: Determine VRAM required for the model # Step 1: Determine VRAM required for the model (includes overhead)
required_vram_gb = estimate_model_vram_requirements(model_path) required_vram_gb = estimate_model_vram_requirements(model_path)
print(f"DEBUG: Required VRAM: {required_vram_gb}GB") print(f"DEBUG: Required VRAM: {required_vram_gb}GB (includes overhead)")
# Step 2: Determine workers with sufficient GPU memory # Step 2: Determine workers with sufficient GPU memory
available_workers = [] available_workers = []
......
...@@ -547,6 +547,7 @@ def init_db(conn) -> None: ...@@ -547,6 +547,7 @@ def init_db(conn) -> None:
type VARCHAR(50) NOT NULL, type VARCHAR(50) NOT NULL,
path TEXT NOT NULL UNIQUE, path TEXT NOT NULL UNIQUE,
vram_estimate INT DEFAULT 0, vram_estimate INT DEFAULT 0,
vram_overhead_gb INT DEFAULT 6,
available BOOLEAN DEFAULT 0, available BOOLEAN DEFAULT 0,
capabilities TEXT, capabilities TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
...@@ -561,6 +562,7 @@ def init_db(conn) -> None: ...@@ -561,6 +562,7 @@ def init_db(conn) -> None:
type TEXT NOT NULL, type TEXT NOT NULL,
path TEXT NOT NULL UNIQUE, path TEXT NOT NULL UNIQUE,
vram_estimate INTEGER DEFAULT 0, vram_estimate INTEGER DEFAULT 0,
vram_overhead_gb INTEGER DEFAULT 6,
available BOOLEAN DEFAULT 0, available BOOLEAN DEFAULT 0,
capabilities TEXT, capabilities TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
...@@ -588,6 +590,16 @@ def init_db(conn) -> None: ...@@ -588,6 +590,16 @@ def init_db(conn) -> None:
# Column might already exist # Column might already exist
pass pass
# Add vram_overhead_gb column if it doesn't exist
try:
if config['type'] == 'mysql':
cursor.execute('ALTER TABLE models ADD COLUMN vram_overhead_gb INT DEFAULT 6')
else:
cursor.execute('ALTER TABLE models ADD COLUMN vram_overhead_gb INTEGER DEFAULT 6')
except:
# Column might already exist
pass
# Clean up duplicate models (keep only the first one for each path) # Clean up duplicate models (keep only the first one for each path)
if config['type'] == 'mysql': if config['type'] == 'mysql':
cursor.execute(''' cursor.execute('''
...@@ -2079,13 +2091,13 @@ def get_available_models() -> List[Dict[str, Any]]: ...@@ -2079,13 +2091,13 @@ def get_available_models() -> List[Dict[str, Any]]:
return [dict(row) for row in rows] return [dict(row) for row in rows]
def create_model(name: str, model_type: str, path: str, vram_estimate: int = 0, available: bool = False, capabilities: str = '') -> bool: def create_model(name: str, model_type: str, path: str, vram_estimate: int = 0, vram_overhead_gb: int = 6, available: bool = False, capabilities: str = '') -> bool:
"""Create a new model.""" """Create a new model."""
conn = get_db_connection() conn = get_db_connection()
cursor = conn.cursor() cursor = conn.cursor()
try: try:
cursor.execute('INSERT INTO models (name, type, path, vram_estimate, available, capabilities) VALUES (?, ?, ?, ?, ?, ?)', cursor.execute('INSERT INTO models (name, type, path, vram_estimate, vram_overhead_gb, available, capabilities) VALUES (?, ?, ?, ?, ?, ?, ?)',
(name, model_type, path, vram_estimate, 1 if available else 0, capabilities)) (name, model_type, path, vram_estimate, vram_overhead_gb, 1 if available else 0, capabilities))
conn.commit() conn.commit()
return True return True
except sqlite3.IntegrityError: except sqlite3.IntegrityError:
...@@ -2094,7 +2106,7 @@ def create_model(name: str, model_type: str, path: str, vram_estimate: int = 0, ...@@ -2094,7 +2106,7 @@ def create_model(name: str, model_type: str, path: str, vram_estimate: int = 0,
conn.close() conn.close()
def update_model(model_id: int, name: str = None, model_type: str = None, path: str = None, vram_estimate: int = None, available: bool = None, capabilities: str = None) -> bool: def update_model(model_id: int, name: str = None, model_type: str = None, path: str = None, vram_estimate: int = None, vram_overhead_gb: int = None, available: bool = None, capabilities: str = None) -> bool:
"""Update a model.""" """Update a model."""
conn = get_db_connection() conn = get_db_connection()
cursor = conn.cursor() cursor = conn.cursor()
...@@ -2114,6 +2126,9 @@ def update_model(model_id: int, name: str = None, model_type: str = None, path: ...@@ -2114,6 +2126,9 @@ def update_model(model_id: int, name: str = None, model_type: str = None, path:
if vram_estimate is not None: if vram_estimate is not None:
update_fields.append('vram_estimate = ?') update_fields.append('vram_estimate = ?')
params.append(vram_estimate) params.append(vram_estimate)
if vram_overhead_gb is not None:
update_fields.append('vram_overhead_gb = ?')
params.append(vram_overhead_gb)
if available is not None: if available is not None:
update_fields.append('available = ?') update_fields.append('available = ?')
params.append(1 if available else 0) params.append(1 if available else 0)
...@@ -2172,8 +2187,8 @@ def ensure_model_exists(name: str, model_type: str, path: str, vram_estimate: in ...@@ -2172,8 +2187,8 @@ def ensure_model_exists(name: str, model_type: str, path: str, vram_estimate: in
capabilities = detect_model_capabilities(path) capabilities = detect_model_capabilities(path)
# Create the model # Create the model
cursor.execute('INSERT INTO models (name, type, path, vram_estimate, available, capabilities) VALUES (?, ?, ?, ?, ?, ?)', cursor.execute('INSERT INTO models (name, type, path, vram_estimate, vram_overhead_gb, available, capabilities) VALUES (?, ?, ?, ?, ?, ?, ?)',
(name, model_type, path, vram_estimate, 1 if available else 0, capabilities)) (name, model_type, path, vram_estimate, 6, 1 if available else 0, capabilities))
conn.commit() conn.commit()
else: else:
# Update availability if it's not already available # Update availability if it's not already available
......
...@@ -320,44 +320,61 @@ def unload_all_models() -> None: ...@@ -320,44 +320,61 @@ def unload_all_models() -> None:
def estimate_model_vram_requirements(model_path: str) -> int: def estimate_model_vram_requirements(model_path: str) -> int:
"""Estimate VRAM requirements for a model in GB.""" """Estimate VRAM requirements for a model in GB, including overhead."""
# First, try to get from database
try:
from .database import get_db_connection
conn = get_db_connection()
cursor = conn.cursor()
cursor.execute('SELECT vram_estimate, vram_overhead_gb FROM models WHERE path = ?', (model_path,))
row = cursor.fetchone()
conn.close()
if row:
base_vram = row['vram_estimate'] or 0
overhead = row['vram_overhead_gb'] or 6
return base_vram + overhead
except:
pass # Fall back to estimation
# Fallback estimation
model_path_lower = model_path.lower() model_path_lower = model_path.lower()
# Vision-language models # Vision-language models
if 'qwen' in model_path_lower and ('vl' in model_path_lower or 'vision' in model_path_lower): if 'qwen' in model_path_lower and ('vl' in model_path_lower or 'vision' in model_path_lower):
if '7b' in model_path_lower: if '7b' in model_path_lower:
return 16 # Qwen2.5-VL-7B requires ~16GB VRAM return 16 + 6 # Qwen2.5-VL-7B requires ~16GB VRAM + 6GB overhead
elif '3b' in model_path_lower: elif '3b' in model_path_lower:
return 8 # Qwen2.5-VL-3B requires ~8GB VRAM return 8 + 6 # Qwen2.5-VL-3B requires ~8GB VRAM + 6GB overhead
elif '72b' in model_path_lower: elif '72b' in model_path_lower:
return 144 # Qwen2.5-VL-72B requires ~144GB VRAM return 144 + 6 # Qwen2.5-VL-72B requires ~144GB VRAM + 6GB overhead
else: else:
return 24 # Default for other Qwen VL models return 24 + 6 # Default for other Qwen VL models + 6GB overhead
# Text-only models # Text-only models
elif 'llama' in model_path_lower: elif 'llama' in model_path_lower:
if '70b' in model_path_lower: if '70b' in model_path_lower:
return 40 return 40 + 6
elif '65b' in model_path_lower: elif '65b' in model_path_lower:
return 32 return 32 + 6
elif '30b' in model_path_lower: elif '30b' in model_path_lower:
return 16 return 16 + 6
elif '13b' in model_path_lower: elif '13b' in model_path_lower:
return 8 return 8 + 6
elif '7b' in model_path_lower: elif '7b' in model_path_lower:
return 4 return 4 + 6
else: else:
return 16 return 16 + 6
elif 'mistral' in model_path_lower: elif 'mistral' in model_path_lower:
if '7b' in model_path_lower: if '7b' in model_path_lower:
return 4 return 4 + 6
else: else:
return 8 return 8 + 6
elif 'gpt' in model_path_lower or 'chatgpt' in model_path_lower: elif 'gpt' in model_path_lower or 'chatgpt' in model_path_lower:
# These are typically API-based, but if local # These are typically API-based, but if local
return 8 return 8 + 6
# Default estimate # Default estimate + overhead
return 8 return 8 + 6
\ No newline at end of file \ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment