Fix component-only model loading for LTX-Video fine-tunes

- Set pipeline_loaded_successfully=True when component loading succeeds
- Fix indentation for LoRA and offloading code blocks
- Define 'off' variable inside the correct scope
- This fixes loading models like Muinez/ltxvideo-2b-nsfw which are
  fine-tuned transformer weights without a full pipeline
parent 4bdfc1ed
...@@ -8576,6 +8576,7 @@ def main(args): ...@@ -8576,6 +8576,7 @@ def main(args):
) )
print(f" ✅ Fine-tuned transformer loaded successfully!") print(f" ✅ Fine-tuned transformer loaded successfully!")
loaded_with_base = True loaded_with_base = True
pipeline_loaded_successfully = True
elif class_name == "AutoencoderKLLTXVideo": elif class_name == "AutoencoderKLLTXVideo":
from diffusers import AutoencoderKLLTXVideo from diffusers import AutoencoderKLLTXVideo
print(f" Loading fine-tuned VAE...") print(f" Loading fine-tuned VAE...")
...@@ -8586,6 +8587,7 @@ def main(args): ...@@ -8586,6 +8587,7 @@ def main(args):
) )
print(f" ✅ Fine-tuned VAE loaded successfully!") print(f" ✅ Fine-tuned VAE loaded successfully!")
loaded_with_base = True loaded_with_base = True
pipeline_loaded_successfully = True
except Exception as component_e: except Exception as component_e:
if debug: if debug:
print(f" [DEBUG] Component detection failed: {component_e}") print(f" [DEBUG] Component detection failed: {component_e}")
...@@ -8692,16 +8694,19 @@ def main(args): ...@@ -8692,16 +8694,19 @@ def main(args):
if components_loaded: if components_loaded:
print(f" ✅ Loaded components: {components_loaded}") print(f" ✅ Loaded components: {components_loaded}")
loaded_with_base = True loaded_with_base = True
pipeline_loaded_successfully = True
else: else:
print(f" ⚠️ No components could be loaded from fine-tuned model") print(f" ⚠️ No components could be loaded from fine-tuned model")
print(f" Using base model: {base_model}") print(f" Using base model: {base_model}")
loaded_with_base = True # Still use base model loaded_with_base = True # Still use base model
pipeline_loaded_successfully = True
except Exception as ft_e: except Exception as ft_e:
if debug: if debug:
print(f" [DEBUG] Fine-tuned loading failed: {ft_e}") print(f" [DEBUG] Fine-tuned loading failed: {ft_e}")
print(f" Using base model: {base_model}") print(f" Using base model: {base_model}")
loaded_with_base = True loaded_with_base = True
pipeline_loaded_successfully = True
break break
except Exception as base_e: except Exception as base_e:
if debug: if debug:
...@@ -8717,6 +8722,7 @@ def main(args): ...@@ -8717,6 +8722,7 @@ def main(args):
print(f" ✅ Successfully loaded I2V model with DiffusionPipeline") print(f" ✅ Successfully loaded I2V model with DiffusionPipeline")
PipelineClass = DiffusionPipeline PipelineClass = DiffusionPipeline
loaded_with_base = True loaded_with_base = True
pipeline_loaded_successfully = True
except Exception as generic_e: except Exception as generic_e:
if debug: if debug:
print(f" [DEBUG] Generic loader also failed: {generic_e}") print(f" [DEBUG] Generic loader also failed: {generic_e}")
...@@ -8728,49 +8734,52 @@ def main(args): ...@@ -8728,49 +8734,52 @@ def main(args):
raise e # Re-raise if all fallbacks failed raise e # Re-raise if all fallbacks failed
else: else:
raise e # Re-raise for non-404 errors raise e # Re-raise for non-404 errors
# Apply LoRA if this is a LoRA model # Apply LoRA if this is a LoRA model
if is_lora and lora_id: if is_lora and lora_id and pipeline_loaded_successfully:
print(f" Loading LoRA adapter: {lora_id}") print(f" Loading LoRA adapter: {lora_id}")
try: try:
pipe.load_lora_weights(lora_id) pipe.load_lora_weights(lora_id)
print(f" ✅ LoRA applied successfully") print(f" ✅ LoRA applied successfully")
except Exception as lora_e: except Exception as lora_e:
print(f" ⚠️ LoRA loading failed: {lora_e}") print(f" ⚠️ LoRA loading failed: {lora_e}")
print(f" Continuing with base model...") print(f" Continuing with base model...")
if args.no_filter and hasattr(pipe, "safety_checker"): # Apply safety checker and offloading only if pipeline loaded successfully
pipe.safety_checker = None if pipeline_loaded_successfully:
if args.no_filter and hasattr(pipe, "safety_checker"):
pipe.safety_checker = None
# Re-apply offloading strategy # Offloading
if off == "auto_map": off = args.offload_strategy
pipe.enable_model_cpu_offload() if off == "auto_map":
elif off == "sequential": pipe.enable_model_cpu_offload()
pipe.enable_sequential_cpu_offload() elif off == "sequential":
elif off == "group": pipe.enable_sequential_cpu_offload()
try: elif off == "group":
pipe.enable_group_offload(group_size=args.offload_group_size) try:
except: pipe.enable_group_offload(group_size=args.offload_group_size)
print("Group offload unavailable → model offload fallback") except:
pipe.enable_model_cpu_offload() print("Group offload unavailable → model offload fallback")
elif off == "model":
pipe.enable_model_cpu_offload() pipe.enable_model_cpu_offload()
else: elif off == "model":
pipe.to("cuda" if torch.cuda.is_available() else "cpu") pipe.enable_model_cpu_offload()
else:
pipe.to("cuda" if torch.cuda.is_available() else "cpu")
pipe.enable_attention_slicing("max") pipe.enable_attention_slicing("max")
try:
pipe.enable_vae_slicing()
pipe.enable_vae_tiling()
except:
pass
if torch.cuda.is_available():
try: try:
pipe.enable_vae_slicing() pipe.enable_xformers_memory_efficient_attention()
pipe.enable_vae_tiling()
except: except:
pass pass
if torch.cuda.is_available():
try:
pipe.enable_xformers_memory_efficient_attention()
except:
pass
if "wan" in args.model and hasattr(pipe, "scheduler"): if "wan" in args.model and hasattr(pipe, "scheduler"):
try: try:
pipe.scheduler = UniPCMultistepScheduler.from_config( pipe.scheduler = UniPCMultistepScheduler.from_config(
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment