Fix: Wrap LoRA loading and offloading in defer_i2v_loading check

When defer_i2v_loading=True (I2V mode without provided image), the code sets pipe=None but then tried to call pipe.load_lora_weights() and pipe.enable_model_cpu_offload() on None, causing AttributeError. This fix wraps the LoRA loading and offloading configuration blocks inside an 'if not defer_i2v_loading:' condition so they are skipped when the I2V model loading is deferred until after image generation.

Fix: Wrap LoRA loading and offloading in defer_i2v_loading check
When defer_i2v_loading=True (I2V mode without provided image), the code sets pipe=None but then tried to call pipe.load_lora_weights() and pipe.enable_model_cpu_offload() on None, causing AttributeError. This fix wraps the LoRA loading and offloading configuration blocks inside an 'if not defer_i2v_loading:' condition so they are skipped when the I2V model loading is deferred until after image generation.
0a1f210c · Stefy Lanza (nextime / spora ) · 0ccc1d52 · 0a1f210c
Commit 0a1f210c authored Feb 24, 2026 by Stefy Lanza (nextime / spora )
Hide whitespace changes
Inline Side-by-side

Showing with 49 additions and 47 deletions

videogen videogen +49 -47

No files found.
--- a/videogen
+++ b/videogen
@@ -3938,61 +3938,63 @@ def main(args):
    timing.end_step()  # model_loading
-    # Apply LoRA if this is a LoRA model
+    # Only apply LoRA and offloading if we actually loaded the model (not deferred)
-    if is_lora and lora_id:
+    if not defer_i2v_loading:
-        timing.begin_step("lora_loading")
+        # Apply LoRA if this is a LoRA model
-        print(f"  Loading LoRA adapter: {lora_id}")
+        if is_lora and lora_id:
-        try:
+            timing.begin_step("lora_loading")
-            # Load LoRA weights
+            print(f"  Loading LoRA adapter: {lora_id}")
-            pipe.load_lora_weights(lora_id)
+            try:
-            print(f"  ✅ LoRA applied successfully")
+                # Load LoRA weights
-        except Exception as e:
+                pipe.load_lora_weights(lora_id)
-            print(f"  ⚠️ LoRA loading failed: {e}")
+                print(f"  ✅ LoRA applied successfully")
-            print(f"     Continuing with base model...")
+            except Exception as e:
-        timing.end_step()  # lora_loading
+                print(f"  ⚠️ LoRA loading failed: {e}")
+                print(f"     Continuing with base model...")
+            timing.end_step()  # lora_loading
-    if args.no_filter and hasattr(pipe, "safety_checker"):
+        if args.no_filter and hasattr(pipe, "safety_checker"):
-        pipe.safety_checker = None
+            pipe.safety_checker = None
-    # Offloading
+        # Offloading
-    off = args.offload_strategy
+        off = args.offload_strategy
-    if off == "auto_map":
+        if off == "auto_map":
-        pipe.enable_model_cpu_offload()
-    elif off == "sequential":
-        pipe.enable_sequential_cpu_offload()
-    elif off == "group":
-        try:
-            pipe.enable_group_offload(group_size=args.offload_group_size)
-        except:
-            print("Group offload unavailable → model offload fallback")
            pipe.enable_model_cpu_offload()
-    elif off == "model":
+        elif off == "sequential":
-        pipe.enable_model_cpu_offload()
+            pipe.enable_sequential_cpu_offload()
-    else:
+        elif off == "group":
-        pipe.to("cuda" if torch.cuda.is_available() else "cpu")
+            try:
+                pipe.enable_group_offload(group_size=args.offload_group_size)
-    pipe.enable_attention_slicing("max")
+            except:
-    try:
+                print("Group offload unavailable → model offload fallback")
-        pipe.enable_vae_slicing()
+                pipe.enable_model_cpu_offload()
-        pipe.enable_vae_tiling()
+        elif off == "model":
-    except:
+            pipe.enable_model_cpu_offload()
-        pass
+        else:
+            pipe.to("cuda" if torch.cuda.is_available() else "cpu")
-    if torch.cuda.is_available():
+        pipe.enable_attention_slicing("max")
        try:
-            pipe.enable_xformers_memory_efficient_attention()
+            pipe.enable_vae_slicing()
+            pipe.enable_vae_tiling()
        except:
            pass
-    if "wan" in args.model and hasattr(pipe, "scheduler"):
+        if torch.cuda.is_available():
-        try:
+            try:
-            pipe.scheduler = UniPCMultistepScheduler.from_config(
+                pipe.enable_xformers_memory_efficient_attention()
-                pipe.scheduler.config,
+            except:
-                prediction_type="flow_prediction",
+                pass
-                flow_shift=extra.get("flow_shift", 3.0)
-            )
+        if "wan" in args.model and hasattr(pipe, "scheduler"):
-        except:
+            try:
-            pass
+                pipe.scheduler = UniPCMultistepScheduler.from_config(
+                    pipe.scheduler.config,
+                    prediction_type="flow_prediction",
+                    flow_shift=extra.get("flow_shift", 3.0)
+                )
+            except:
+                pass
    # ─── Generation ────────────────────────────────────────────────────────────
    seed = args.seed if args.seed >= 0 else random.randint(0, 2**31 - 1)