Adjust VRAM checking to allow models up to 10% less than available VRAM, or...

Adjust VRAM checking to allow models up to 10% less than available VRAM, or full VRAM with offload strategy
parent d3d67441
......@@ -3661,7 +3661,7 @@ def detect_generation_type(prompt, prompt_image=None, prompt_animation=None, arg
return result
def select_best_model(gen_type, models, vram_gb=24, prefer_quality=True, return_all=False):
def select_best_model(gen_type, models, vram_gb=24, prefer_quality=True, return_all=False, offload_strategy=None):
"""Select the best model based on generation type and constraints
Args:
......@@ -3670,6 +3670,7 @@ def select_best_model(gen_type, models, vram_gb=24, prefer_quality=True, return_
vram_gb: Available VRAM in GB
prefer_quality: Prefer quality over speed
return_all: If True, return all candidates sorted by score
offload_strategy: If an offload strategy is specified, allow larger models
Returns: (model_name, model_info, reason) or [(model_name, model_info, reason), ...] if return_all=True
......@@ -3753,7 +3754,13 @@ def select_best_model(gen_type, models, vram_gb=24, prefer_quality=True, return_
# Check VRAM compatibility using base model requirements
# LoRAs add a small overhead (~1-2GB)
vram_est = parse_vram_estimate(base_model_info.get("vram", "~10 GB")) + 2
if vram_est > vram_gb: # No margin - only allow models that fit within available VRAM
if offload_strategy:
# If using offload, allow models up to full VRAM
if vram_est > vram_gb:
continue
else:
# If no offload, only allow models that are up to 10% less than available VRAM to ensure comfort
if vram_est > vram_gb * 0.9:
continue
# Get capabilities from base model
......@@ -3905,7 +3912,13 @@ def select_best_model(gen_type, models, vram_gb=24, prefer_quality=True, return_
# Non-LoRA model handling (original logic)
# Check VRAM compatibility
vram_est = parse_vram_estimate(info.get("vram", "~10 GB"))
if vram_est > vram_gb: # No margin - only allow models that fit within available VRAM
if offload_strategy:
# If using offload, allow models up to full VRAM
if vram_est > vram_gb:
continue
else:
# If no offload, only allow models that are up to 10% less than available VRAM to ensure comfort
if vram_est > vram_gb * 0.9:
continue
# Check model capabilities
......@@ -4302,7 +4315,7 @@ def run_auto_mode(args, models):
if not user_provided['model']:
# Get all candidate models for retry support
all_candidates = select_best_model(gen_type, models, vram_gb, prefer_quality, return_all=True)
all_candidates = select_best_model(gen_type, models, vram_gb, prefer_quality, return_all=True, offload_strategy=args.offload_strategy)
if not all_candidates:
print(" Could not find a suitable model!")
......@@ -4355,7 +4368,7 @@ def run_auto_mode(args, models):
# Get all image model candidates
all_img_candidates = select_best_model(
img_gen_type, models, vram_gb, prefer_quality=True, return_all=True
img_gen_type, models, vram_gb, prefer_quality=True, return_all=True, offload_strategy=args.offload_strategy
)
if all_img_candidates:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment