Add --allow-bigger-models option to allow models larger than VRAM using system RAM

parent 1774f810
...@@ -164,6 +164,16 @@ def clear_memory(clear_cuda=True, aggressive=False): ...@@ -164,6 +164,16 @@ def clear_memory(clear_cuda=True, aggressive=False):
gc.collect() gc.collect()
def get_available_ram_gb():
"""Get available system RAM in GB"""
try:
import psutil
mem = psutil.virtual_memory()
return mem.available / (1024 ** 3) # Convert bytes to GB
except Exception:
return 0.0
def get_memory_usage(): def get_memory_usage():
"""Get current memory usage statistics """Get current memory usage statistics
...@@ -3661,7 +3671,7 @@ def detect_generation_type(prompt, prompt_image=None, prompt_animation=None, arg ...@@ -3661,7 +3671,7 @@ def detect_generation_type(prompt, prompt_image=None, prompt_animation=None, arg
return result return result
def select_best_model(gen_type, models, vram_gb=24, prefer_quality=True, return_all=False, offload_strategy=None): def select_best_model(gen_type, models, vram_gb=24, prefer_quality=True, return_all=False, offload_strategy=None, allow_bigger_models=False):
"""Select the best model based on generation type and constraints """Select the best model based on generation type and constraints
Args: Args:
...@@ -3671,6 +3681,7 @@ def select_best_model(gen_type, models, vram_gb=24, prefer_quality=True, return_ ...@@ -3671,6 +3681,7 @@ def select_best_model(gen_type, models, vram_gb=24, prefer_quality=True, return_
prefer_quality: Prefer quality over speed prefer_quality: Prefer quality over speed
return_all: If True, return all candidates sorted by score return_all: If True, return all candidates sorted by score
offload_strategy: If an offload strategy is specified, allow larger models offload_strategy: If an offload strategy is specified, allow larger models
allow_bigger_models: If True, allow models larger than VRAM by using system RAM for offloading
Returns: (model_name, model_info, reason) or [(model_name, model_info, reason), ...] if return_all=True Returns: (model_name, model_info, reason) or [(model_name, model_info, reason), ...] if return_all=True
...@@ -3754,7 +3765,13 @@ def select_best_model(gen_type, models, vram_gb=24, prefer_quality=True, return_ ...@@ -3754,7 +3765,13 @@ def select_best_model(gen_type, models, vram_gb=24, prefer_quality=True, return_
# Check VRAM compatibility using base model requirements # Check VRAM compatibility using base model requirements
# LoRAs add a small overhead (~1-2GB) # LoRAs add a small overhead (~1-2GB)
vram_est = parse_vram_estimate(base_model_info.get("vram", "~10 GB")) + 2 vram_est = parse_vram_estimate(base_model_info.get("vram", "~10 GB")) + 2
if offload_strategy: if allow_bigger_models:
# If allowing bigger models, check if VRAM + 75% of available RAM is sufficient
available_ram = get_available_ram_gb()
total_available = vram_gb + (available_ram * 0.75)
if vram_est > total_available:
continue
elif offload_strategy:
# If using offload, allow models up to full VRAM # If using offload, allow models up to full VRAM
if vram_est > vram_gb: if vram_est > vram_gb:
continue continue
...@@ -3912,7 +3929,13 @@ def select_best_model(gen_type, models, vram_gb=24, prefer_quality=True, return_ ...@@ -3912,7 +3929,13 @@ def select_best_model(gen_type, models, vram_gb=24, prefer_quality=True, return_
# Non-LoRA model handling (original logic) # Non-LoRA model handling (original logic)
# Check VRAM compatibility # Check VRAM compatibility
vram_est = parse_vram_estimate(info.get("vram", "~10 GB")) vram_est = parse_vram_estimate(info.get("vram", "~10 GB"))
if offload_strategy: if allow_bigger_models:
# If allowing bigger models, check if VRAM + 75% of available RAM is sufficient
available_ram = get_available_ram_gb()
total_available = vram_gb + (available_ram * 0.75)
if vram_est > total_available:
continue
elif offload_strategy:
# If using offload, allow models up to full VRAM # If using offload, allow models up to full VRAM
if vram_est > vram_gb: if vram_est > vram_gb:
continue continue
...@@ -4255,6 +4278,11 @@ def run_auto_mode(args, models): ...@@ -4255,6 +4278,11 @@ def run_auto_mode(args, models):
print("🤖 AUTO MODE - Analyzing prompts and selecting models") print("🤖 AUTO MODE - Analyzing prompts and selecting models")
print("=" * 60) print("=" * 60)
# If --allow-bigger-models is specified, enable sequential offload strategy
if args.allow_bigger_models and args.offload_strategy == "model":
args.offload_strategy = "sequential"
print(f" 📦 --allow-bigger-models enabled, using sequential offload strategy")
# Track which settings were explicitly provided by user # Track which settings were explicitly provided by user
# These are settings that have non-default values # These are settings that have non-default values
user_provided = { user_provided = {
...@@ -4278,6 +4306,7 @@ def run_auto_mode(args, models): ...@@ -4278,6 +4306,7 @@ def run_auto_mode(args, models):
'prompt_image': getattr(args, 'prompt_image', None) is not None, 'prompt_image': getattr(args, 'prompt_image', None) is not None,
'prompt_animation': getattr(args, 'prompt_animation', None) is not None, 'prompt_animation': getattr(args, 'prompt_animation', None) is not None,
'image': getattr(args, 'image', None) is not None, 'image': getattr(args, 'image', None) is not None,
'allow_bigger_models': args.allow_bigger_models,
} }
# Store alternative models for retry in auto mode # Store alternative models for retry in auto mode
...@@ -4315,7 +4344,7 @@ def run_auto_mode(args, models): ...@@ -4315,7 +4344,7 @@ def run_auto_mode(args, models):
if not user_provided['model']: if not user_provided['model']:
# Get all candidate models for retry support # Get all candidate models for retry support
all_candidates = select_best_model(gen_type, models, vram_gb, prefer_quality, return_all=True, offload_strategy=args.offload_strategy) all_candidates = select_best_model(gen_type, models, vram_gb, prefer_quality, return_all=True, offload_strategy=args.offload_strategy, allow_bigger_models=args.allow_bigger_models)
if not all_candidates: if not all_candidates:
print(" Could not find a suitable model!") print(" Could not find a suitable model!")
...@@ -4368,7 +4397,7 @@ def run_auto_mode(args, models): ...@@ -4368,7 +4397,7 @@ def run_auto_mode(args, models):
# Get all image model candidates # Get all image model candidates
all_img_candidates = select_best_model( all_img_candidates = select_best_model(
img_gen_type, models, vram_gb, prefer_quality=True, return_all=True, offload_strategy=args.offload_strategy img_gen_type, models, vram_gb, prefer_quality=True, return_all=True, offload_strategy=args.offload_strategy, allow_bigger_models=args.allow_bigger_models
) )
if all_img_candidates: if all_img_candidates:
...@@ -9585,6 +9614,8 @@ List TTS voices: ...@@ -9585,6 +9614,8 @@ List TTS voices:
parser.add_argument("--remove-cached-model", type=str, default=None, parser.add_argument("--remove-cached-model", type=str, default=None,
metavar="MODEL_ID", metavar="MODEL_ID",
help="Remove a specific model from the local HuggingFace cache (e.g., stabilityai/stable-video-diffusion-img2vid-xt-1-1)") help="Remove a specific model from the local HuggingFace cache (e.g., stabilityai/stable-video-diffusion-img2vid-xt-1-1)")
parser.add_argument("--allow-bigger-models", action="store_true",
help="Allow models larger than available VRAM by using system RAM for offloading (implies --offload_strategy sequential)")
parser.add_argument("--clear-cache", action="store_true", parser.add_argument("--clear-cache", action="store_true",
help="Clear the entire local HuggingFace cache") help="Clear the entire local HuggingFace cache")
parser.add_argument("--update-models", action="store_true", parser.add_argument("--update-models", action="store_true",
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment