Add --allow-bigger-models option to allow models larger than VRAM using system RAM

81008fd6 · Stefy Lanza (nextime / spora ) · 1774f810 · 81008fd6
Commit 81008fd6 authored Feb 25, 2026 by Stefy Lanza (nextime / spora )
Hide whitespace changes
Inline Side-by-side

Showing with 36 additions and 5 deletions

videogen videogen +36 -5

No files found.
--- a/videogen
+++ b/videogen
@@ -164,6 +164,16 @@ def clear_memory(clear_cuda=True, aggressive=False):
        gc.collect()


+def get_available_ram_gb():
+    """Get available system RAM in GB"""
+    try:
+        import psutil
+        mem = psutil.virtual_memory()
+        return mem.available / (1024 ** 3)  # Convert bytes to GB
+    except Exception:
+        return 0.0
+
+
 def get_memory_usage():
    """Get current memory usage statistics
    
@@ -3661,7 +3671,7 @@ def detect_generation_type(prompt, prompt_image=None, prompt_animation=None, arg
    return result


-def select_best_model(gen_type, models, vram_gb=24, prefer_quality=True, return_all=False, offload_strategy=None):
+def select_best_model(gen_type, models, vram_gb=24, prefer_quality=True, return_all=False, offload_strategy=None, allow_bigger_models=False):
    """Select the best model based on generation type and constraints
    
    Args:
@@ -3671,6 +3681,7 @@ def select_best_model(gen_type, models, vram_gb=24, prefer_quality=True, return_
        prefer_quality: Prefer quality over speed
        return_all: If True, return all candidates sorted by score
        offload_strategy: If an offload strategy is specified, allow larger models
+        allow_bigger_models: If True, allow models larger than VRAM by using system RAM for offloading
    
    Returns: (model_name, model_info, reason) or [(model_name, model_info, reason), ...] if return_all=True
    
@@ -3754,7 +3765,13 @@ def select_best_model(gen_type, models, vram_gb=24, prefer_quality=True, return_
            # Check VRAM compatibility using base model requirements
            # LoRAs add a small overhead (~1-2GB)
            vram_est = parse_vram_estimate(base_model_info.get("vram", "~10 GB")) + 2
-            if offload_strategy:
+            if allow_bigger_models:
+                # If allowing bigger models, check if VRAM + 75% of available RAM is sufficient
+                available_ram = get_available_ram_gb()
+                total_available = vram_gb + (available_ram * 0.75)
+                if vram_est > total_available:
+                    continue
+            elif offload_strategy:
                # If using offload, allow models up to full VRAM
                if vram_est > vram_gb:
                    continue
@@ -3912,7 +3929,13 @@ def select_best_model(gen_type, models, vram_gb=24, prefer_quality=True, return_
            # Non-LoRA model handling (original logic)
            # Check VRAM compatibility
            vram_est = parse_vram_estimate(info.get("vram", "~10 GB"))
-            if offload_strategy:
+            if allow_bigger_models:
+                # If allowing bigger models, check if VRAM + 75% of available RAM is sufficient
+                available_ram = get_available_ram_gb()
+                total_available = vram_gb + (available_ram * 0.75)
+                if vram_est > total_available:
+                    continue
+            elif offload_strategy:
                # If using offload, allow models up to full VRAM
                if vram_est > vram_gb:
                    continue
@@ -4255,6 +4278,11 @@ def run_auto_mode(args, models):
    print("🤖 AUTO MODE - Analyzing prompts and selecting models")
    print("=" * 60)
    
+    # If --allow-bigger-models is specified, enable sequential offload strategy
+    if args.allow_bigger_models and args.offload_strategy == "model":
+        args.offload_strategy = "sequential"
+        print(f"  📦 --allow-bigger-models enabled, using sequential offload strategy")
+    
    # Track which settings were explicitly provided by user
    # These are settings that have non-default values
    user_provided = {
@@ -4278,6 +4306,7 @@ def run_auto_mode(args, models):
        'prompt_image': getattr(args, 'prompt_image', None) is not None,
        'prompt_animation': getattr(args, 'prompt_animation', None) is not None,
        'image': getattr(args, 'image', None) is not None,
+        'allow_bigger_models': args.allow_bigger_models,
    }
    
    # Store alternative models for retry in auto mode
@@ -4315,7 +4344,7 @@ def run_auto_mode(args, models):
    
    if not user_provided['model']:
        # Get all candidate models for retry support
-        all_candidates = select_best_model(gen_type, models, vram_gb, prefer_quality, return_all=True, offload_strategy=args.offload_strategy)
+        all_candidates = select_best_model(gen_type, models, vram_gb, prefer_quality, return_all=True, offload_strategy=args.offload_strategy, allow_bigger_models=args.allow_bigger_models)
        
        if not all_candidates:
            print("❌ Could not find a suitable model!")
@@ -4368,7 +4397,7 @@ def run_auto_mode(args, models):
            
            # Get all image model candidates
            all_img_candidates = select_best_model(
-                img_gen_type, models, vram_gb, prefer_quality=True, return_all=True, offload_strategy=args.offload_strategy
+                img_gen_type, models, vram_gb, prefer_quality=True, return_all=True, offload_strategy=args.offload_strategy, allow_bigger_models=args.allow_bigger_models
            )
            
            if all_img_candidates:
@@ -9585,6 +9614,8 @@ List TTS voices:
    parser.add_argument("--remove-cached-model", type=str, default=None,
                        metavar="MODEL_ID",
                        help="Remove a specific model from the local HuggingFace cache (e.g., stabilityai/stable-video-diffusion-img2vid-xt-1-1)")
+    parser.add_argument("--allow-bigger-models", action="store_true",
+                        help="Allow models larger than available VRAM by using system RAM for offloading (implies --offload_strategy sequential)")
    parser.add_argument("--clear-cache", action="store_true",
                        help="Clear the entire local HuggingFace cache")
    parser.add_argument("--update-models", action="store_true",