Add --allow-bigger-models option to allow models larger than VRAM using system RAM

parent 1774f810
......@@ -164,6 +164,16 @@ def clear_memory(clear_cuda=True, aggressive=False):
gc.collect()
def get_available_ram_gb():
"""Get available system RAM in GB"""
try:
import psutil
mem = psutil.virtual_memory()
return mem.available / (1024 ** 3) # Convert bytes to GB
except Exception:
return 0.0
def get_memory_usage():
"""Get current memory usage statistics
......@@ -3661,7 +3671,7 @@ def detect_generation_type(prompt, prompt_image=None, prompt_animation=None, arg
return result
def select_best_model(gen_type, models, vram_gb=24, prefer_quality=True, return_all=False, offload_strategy=None):
def select_best_model(gen_type, models, vram_gb=24, prefer_quality=True, return_all=False, offload_strategy=None, allow_bigger_models=False):
"""Select the best model based on generation type and constraints
Args:
......@@ -3671,6 +3681,7 @@ def select_best_model(gen_type, models, vram_gb=24, prefer_quality=True, return_
prefer_quality: Prefer quality over speed
return_all: If True, return all candidates sorted by score
offload_strategy: If an offload strategy is specified, allow larger models
allow_bigger_models: If True, allow models larger than VRAM by using system RAM for offloading
Returns: (model_name, model_info, reason) or [(model_name, model_info, reason), ...] if return_all=True
......@@ -3754,7 +3765,13 @@ def select_best_model(gen_type, models, vram_gb=24, prefer_quality=True, return_
# Check VRAM compatibility using base model requirements
# LoRAs add a small overhead (~1-2GB)
vram_est = parse_vram_estimate(base_model_info.get("vram", "~10 GB")) + 2
if offload_strategy:
if allow_bigger_models:
# If allowing bigger models, check if VRAM + 75% of available RAM is sufficient
available_ram = get_available_ram_gb()
total_available = vram_gb + (available_ram * 0.75)
if vram_est > total_available:
continue
elif offload_strategy:
# If using offload, allow models up to full VRAM
if vram_est > vram_gb:
continue
......@@ -3912,7 +3929,13 @@ def select_best_model(gen_type, models, vram_gb=24, prefer_quality=True, return_
# Non-LoRA model handling (original logic)
# Check VRAM compatibility
vram_est = parse_vram_estimate(info.get("vram", "~10 GB"))
if offload_strategy:
if allow_bigger_models:
# If allowing bigger models, check if VRAM + 75% of available RAM is sufficient
available_ram = get_available_ram_gb()
total_available = vram_gb + (available_ram * 0.75)
if vram_est > total_available:
continue
elif offload_strategy:
# If using offload, allow models up to full VRAM
if vram_est > vram_gb:
continue
......@@ -4255,6 +4278,11 @@ def run_auto_mode(args, models):
print("🤖 AUTO MODE - Analyzing prompts and selecting models")
print("=" * 60)
# If --allow-bigger-models is specified, enable sequential offload strategy
if args.allow_bigger_models and args.offload_strategy == "model":
args.offload_strategy = "sequential"
print(f" 📦 --allow-bigger-models enabled, using sequential offload strategy")
# Track which settings were explicitly provided by user
# These are settings that have non-default values
user_provided = {
......@@ -4278,6 +4306,7 @@ def run_auto_mode(args, models):
'prompt_image': getattr(args, 'prompt_image', None) is not None,
'prompt_animation': getattr(args, 'prompt_animation', None) is not None,
'image': getattr(args, 'image', None) is not None,
'allow_bigger_models': args.allow_bigger_models,
}
# Store alternative models for retry in auto mode
......@@ -4315,7 +4344,7 @@ def run_auto_mode(args, models):
if not user_provided['model']:
# Get all candidate models for retry support
all_candidates = select_best_model(gen_type, models, vram_gb, prefer_quality, return_all=True, offload_strategy=args.offload_strategy)
all_candidates = select_best_model(gen_type, models, vram_gb, prefer_quality, return_all=True, offload_strategy=args.offload_strategy, allow_bigger_models=args.allow_bigger_models)
if not all_candidates:
print(" Could not find a suitable model!")
......@@ -4368,7 +4397,7 @@ def run_auto_mode(args, models):
# Get all image model candidates
all_img_candidates = select_best_model(
img_gen_type, models, vram_gb, prefer_quality=True, return_all=True, offload_strategy=args.offload_strategy
img_gen_type, models, vram_gb, prefer_quality=True, return_all=True, offload_strategy=args.offload_strategy, allow_bigger_models=args.allow_bigger_models
)
if all_img_candidates:
......@@ -9585,6 +9614,8 @@ List TTS voices:
parser.add_argument("--remove-cached-model", type=str, default=None,
metavar="MODEL_ID",
help="Remove a specific model from the local HuggingFace cache (e.g., stabilityai/stable-video-diffusion-img2vid-xt-1-1)")
parser.add_argument("--allow-bigger-models", action="store_true",
help="Allow models larger than available VRAM by using system RAM for offloading (implies --offload_strategy sequential)")
parser.add_argument("--clear-cache", action="store_true",
help="Clear the entire local HuggingFace cache")
parser.add_argument("--update-models", action="store_true",
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment