Adjust VRAM checking to allow models up to 10% less than available VRAM, or...

Adjust VRAM checking to allow models up to 10% less than available VRAM, or full VRAM with offload strategy

Adjust VRAM checking to allow models up to 10% less than available VRAM, or...
Adjust VRAM checking to allow models up to 10% less than available VRAM, or full VRAM with offload strategy
1774f810 · Stefy Lanza (nextime / spora ) · d3d67441 · 1774f810
Commit 1774f810 authored Feb 25, 2026 by Stefy Lanza (nextime / spora )
Show whitespace changes
Inline Side-by-side

Showing with 20 additions and 7 deletions

videogen videogen +20 -7

No files found.
--- a/videogen
+++ b/videogen
@@ -3661,7 +3661,7 @@ def detect_generation_type(prompt, prompt_image=None, prompt_animation=None, arg
    return result
-def select_best_model(gen_type, models, vram_gb=24, prefer_quality=True, return_all=False):
+def select_best_model(gen_type, models, vram_gb=24, prefer_quality=True, return_all=False, offload_strategy=None):
    """Select the best model based on generation type and constraints
    Args:
@@ -3670,6 +3670,7 @@ def select_best_model(gen_type, models, vram_gb=24, prefer_quality=True, return_
        vram_gb: Available VRAM in GB
        prefer_quality: Prefer quality over speed
        return_all: If True, return all candidates sorted by score
+        offload_strategy: If an offload strategy is specified, allow larger models
    Returns: (model_name, model_info, reason) or [(model_name, model_info, reason), ...] if return_all=True
@@ -3753,7 +3754,13 @@ def select_best_model(gen_type, models, vram_gb=24, prefer_quality=True, return_
            # Check VRAM compatibility using base model requirements
            # LoRAs add a small overhead (~1-2GB)
            vram_est = parse_vram_estimate(base_model_info.get("vram", "~10 GB")) + 2
-            if vram_est > vram_gb:  # No margin - only allow models that fit within available VRAM
+            if offload_strategy:
+                # If using offload, allow models up to full VRAM
+                if vram_est > vram_gb:
+                    continue
+            else:
+                # If no offload, only allow models that are up to 10% less than available VRAM to ensure comfort
+                if vram_est > vram_gb * 0.9:
                    continue
            # Get capabilities from base model
@@ -3905,7 +3912,13 @@ def select_best_model(gen_type, models, vram_gb=24, prefer_quality=True, return_
            # Non-LoRA model handling (original logic)
            # Check VRAM compatibility
            vram_est = parse_vram_estimate(info.get("vram", "~10 GB"))
-            if vram_est > vram_gb:  # No margin - only allow models that fit within available VRAM
+            if offload_strategy:
+                # If using offload, allow models up to full VRAM
+                if vram_est > vram_gb:
+                    continue
+            else:
+                # If no offload, only allow models that are up to 10% less than available VRAM to ensure comfort
+                if vram_est > vram_gb * 0.9:
                    continue
            # Check model capabilities
@@ -4302,7 +4315,7 @@ def run_auto_mode(args, models):
    if not user_provided['model']:
        # Get all candidate models for retry support
-        all_candidates = select_best_model(gen_type, models, vram_gb, prefer_quality, return_all=True)
+        all_candidates = select_best_model(gen_type, models, vram_gb, prefer_quality, return_all=True, offload_strategy=args.offload_strategy)
        if not all_candidates:
            print("❌ Could not find a suitable model!")
@@ -4355,7 +4368,7 @@ def run_auto_mode(args, models):
            # Get all image model candidates
            all_img_candidates = select_best_model(
-                img_gen_type, models, vram_gb, prefer_quality=True, return_all=True
+                img_gen_type, models, vram_gb, prefer_quality=True, return_all=True, offload_strategy=args.offload_strategy
            )
            if all_img_candidates: