Adjust VRAM checking to allow models up to 10% less than available VRAM, or...

Adjust VRAM checking to allow models up to 10% less than available VRAM, or full VRAM with offload strategy
parent d3d67441
...@@ -3661,7 +3661,7 @@ def detect_generation_type(prompt, prompt_image=None, prompt_animation=None, arg ...@@ -3661,7 +3661,7 @@ def detect_generation_type(prompt, prompt_image=None, prompt_animation=None, arg
return result return result
def select_best_model(gen_type, models, vram_gb=24, prefer_quality=True, return_all=False): def select_best_model(gen_type, models, vram_gb=24, prefer_quality=True, return_all=False, offload_strategy=None):
"""Select the best model based on generation type and constraints """Select the best model based on generation type and constraints
Args: Args:
...@@ -3670,6 +3670,7 @@ def select_best_model(gen_type, models, vram_gb=24, prefer_quality=True, return_ ...@@ -3670,6 +3670,7 @@ def select_best_model(gen_type, models, vram_gb=24, prefer_quality=True, return_
vram_gb: Available VRAM in GB vram_gb: Available VRAM in GB
prefer_quality: Prefer quality over speed prefer_quality: Prefer quality over speed
return_all: If True, return all candidates sorted by score return_all: If True, return all candidates sorted by score
offload_strategy: If an offload strategy is specified, allow larger models
Returns: (model_name, model_info, reason) or [(model_name, model_info, reason), ...] if return_all=True Returns: (model_name, model_info, reason) or [(model_name, model_info, reason), ...] if return_all=True
...@@ -3753,7 +3754,13 @@ def select_best_model(gen_type, models, vram_gb=24, prefer_quality=True, return_ ...@@ -3753,7 +3754,13 @@ def select_best_model(gen_type, models, vram_gb=24, prefer_quality=True, return_
# Check VRAM compatibility using base model requirements # Check VRAM compatibility using base model requirements
# LoRAs add a small overhead (~1-2GB) # LoRAs add a small overhead (~1-2GB)
vram_est = parse_vram_estimate(base_model_info.get("vram", "~10 GB")) + 2 vram_est = parse_vram_estimate(base_model_info.get("vram", "~10 GB")) + 2
if vram_est > vram_gb: # No margin - only allow models that fit within available VRAM if offload_strategy:
# If using offload, allow models up to full VRAM
if vram_est > vram_gb:
continue
else:
# If no offload, only allow models that are up to 10% less than available VRAM to ensure comfort
if vram_est > vram_gb * 0.9:
continue continue
# Get capabilities from base model # Get capabilities from base model
...@@ -3905,7 +3912,13 @@ def select_best_model(gen_type, models, vram_gb=24, prefer_quality=True, return_ ...@@ -3905,7 +3912,13 @@ def select_best_model(gen_type, models, vram_gb=24, prefer_quality=True, return_
# Non-LoRA model handling (original logic) # Non-LoRA model handling (original logic)
# Check VRAM compatibility # Check VRAM compatibility
vram_est = parse_vram_estimate(info.get("vram", "~10 GB")) vram_est = parse_vram_estimate(info.get("vram", "~10 GB"))
if vram_est > vram_gb: # No margin - only allow models that fit within available VRAM if offload_strategy:
# If using offload, allow models up to full VRAM
if vram_est > vram_gb:
continue
else:
# If no offload, only allow models that are up to 10% less than available VRAM to ensure comfort
if vram_est > vram_gb * 0.9:
continue continue
# Check model capabilities # Check model capabilities
...@@ -4302,7 +4315,7 @@ def run_auto_mode(args, models): ...@@ -4302,7 +4315,7 @@ def run_auto_mode(args, models):
if not user_provided['model']: if not user_provided['model']:
# Get all candidate models for retry support # Get all candidate models for retry support
all_candidates = select_best_model(gen_type, models, vram_gb, prefer_quality, return_all=True) all_candidates = select_best_model(gen_type, models, vram_gb, prefer_quality, return_all=True, offload_strategy=args.offload_strategy)
if not all_candidates: if not all_candidates:
print(" Could not find a suitable model!") print(" Could not find a suitable model!")
...@@ -4355,7 +4368,7 @@ def run_auto_mode(args, models): ...@@ -4355,7 +4368,7 @@ def run_auto_mode(args, models):
# Get all image model candidates # Get all image model candidates
all_img_candidates = select_best_model( all_img_candidates = select_best_model(
img_gen_type, models, vram_gb, prefer_quality=True, return_all=True img_gen_type, models, vram_gb, prefer_quality=True, return_all=True, offload_strategy=args.offload_strategy
) )
if all_img_candidates: if all_img_candidates:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment