Add dynamic pipeline class selection for T2V/I2V/V2V and I2I/T2I

- Add WanImageToVideoPipeline and WanVideoToVideoPipeline to PIPELINE_CLASS_MAP
- Add CogVideoXImageToVideoPipeline and CogVideoXVideoToVideoPipeline
- Add AnimateDiffVideoToVideoPipeline
- Add StableDiffusionImg2ImgPipeline for SD 1.5
- Add dynamic pipeline switching logic for Wan, LTX, CogVideoX, AnimateDiff
- The pipeline class is now selected at runtime based on task mode
- Fix detect_pipeline_class to correctly identify Wan models
- Remove duplicate LTX handling code
parent 30e0871f
...@@ -588,30 +588,43 @@ AUTO_DISABLE_FILE = CONFIG_DIR / "auto_disable.json" ...@@ -588,30 +588,43 @@ AUTO_DISABLE_FILE = CONFIG_DIR / "auto_disable.json"
# Pipeline class to model type mapping # Pipeline class to model type mapping
PIPELINE_CLASS_MAP = { PIPELINE_CLASS_MAP = {
"StableVideoDiffusionPipeline": {"type": "i2v", "default_vram": "~14-18 GB"}, # Video pipelines - T2V
"WanPipeline": {"type": "video", "default_vram": "~10-24 GB"}, "WanPipeline": {"type": "t2v", "default_vram": "~10-24 GB"},
"I2VGenXLPipeline": {"type": "i2v", "default_vram": "~18-24 GB"},
"LTXPipeline": {"type": "t2v", "default_vram": "~12-16 GB"}, "LTXPipeline": {"type": "t2v", "default_vram": "~12-16 GB"},
"CogVideoXPipeline": {"type": "t2v", "default_vram": "~20-30 GB"},
"MochiPipeline": {"type": "t2v", "default_vram": "~18-22 GB"},
"AnimateDiffPipeline": {"type": "t2v", "default_vram": "~10-14 GB"},
# Video pipelines - I2V
"StableVideoDiffusionPipeline": {"type": "i2v", "default_vram": "~14-18 GB"},
"WanImageToVideoPipeline": {"type": "i2v", "default_vram": "~10-24 GB"},
"LTXImageToVideoPipeline": {"type": "i2v", "default_vram": "~12-16 GB"}, "LTXImageToVideoPipeline": {"type": "i2v", "default_vram": "~12-16 GB"},
"CogVideoXImageToVideoPipeline": {"type": "i2v", "default_vram": "~20-30 GB"},
"I2VGenXLPipeline": {"type": "i2v", "default_vram": "~18-24 GB"},
# Video pipelines - V2V
"WanVideoToVideoPipeline": {"type": "v2v", "default_vram": "~10-24 GB"},
"CogVideoXVideoToVideoPipeline": {"type": "v2v", "default_vram": "~20-30 GB"},
"AnimateDiffVideoToVideoPipeline": {"type": "v2v", "default_vram": "~10-14 GB"},
# Image pipelines - T2I
"FluxPipeline": {"type": "t2i", "default_vram": "~20-25 GB"},
"StableDiffusionXLPipeline": {"type": "t2i", "default_vram": "~10-16 GB"},
"StableDiffusion3Pipeline": {"type": "t2i", "default_vram": "~15-20 GB"},
"StableDiffusionPipeline": {"type": "t2i", "default_vram": "~6-8 GB"},
# Image pipelines - I2I
"FluxImg2ImgPipeline": {"type": "i2i", "default_vram": "~20-25 GB"},
"StableDiffusionXLImg2ImgPipeline": {"type": "i2i", "default_vram": "~10-16 GB"},
"StableDiffusion3Img2ImgPipeline": {"type": "i2i", "default_vram": "~15-20 GB"},
"StableDiffusionImg2ImgPipeline": {"type": "i2i", "default_vram": "~6-8 GB"},
# Legacy/Other
"LuminaPipeline": {"type": "image", "default_vram": "~20-30 GB"}, "LuminaPipeline": {"type": "image", "default_vram": "~20-30 GB"},
"LuminaText2ImgPipeline": {"type": "image", "default_vram": "~20-30 GB"}, "LuminaText2ImgPipeline": {"type": "image", "default_vram": "~20-30 GB"},
"Lumina2Pipeline": {"type": "image", "default_vram": "~20-30 GB"}, "Lumina2Pipeline": {"type": "image", "default_vram": "~20-30 GB"},
"Lumina2Text2ImgPipeline": {"type": "image", "default_vram": "~20-30 GB"}, "Lumina2Text2ImgPipeline": {"type": "image", "default_vram": "~20-30 GB"},
"AnimateDiffPipeline": {"type": "video", "default_vram": "~10-14 GB"},
"TextToVideoSDPipeline": {"type": "t2v", "default_vram": "~7-9 GB"}, "TextToVideoSDPipeline": {"type": "t2v", "default_vram": "~7-9 GB"},
"TextToVideoZeroPipeline": {"type": "t2v", "default_vram": "~6-8 GB"}, "TextToVideoZeroPipeline": {"type": "t2v", "default_vram": "~6-8 GB"},
"MochiPipeline": {"type": "t2v", "default_vram": "~18-22 GB"},
"StableDiffusionXLPipeline": {"type": "t2i", "default_vram": "~10-16 GB"},
"StableDiffusionXLImg2ImgPipeline": {"type": "i2i", "default_vram": "~10-16 GB"},
"StableDiffusion3Pipeline": {"type": "t2i", "default_vram": "~15-20 GB"},
"StableDiffusion3Img2ImgPipeline": {"type": "i2i", "default_vram": "~15-20 GB"},
"FluxPipeline": {"type": "t2i", "default_vram": "~20-25 GB"},
"FluxImg2ImgPipeline": {"type": "i2i", "default_vram": "~20-25 GB"},
"AllegroPipeline": {"type": "t2v", "default_vram": "~35-45 GB"}, "AllegroPipeline": {"type": "t2v", "default_vram": "~35-45 GB"},
"HunyuanDiTPipeline": {"type": "t2v", "default_vram": "~40-55 GB"}, "HunyuanDiTPipeline": {"type": "t2v", "default_vram": "~40-55 GB"},
"OpenSoraPipeline": {"type": "video", "default_vram": "~45-65 GB"}, "OpenSoraPipeline": {"type": "video", "default_vram": "~45-65 GB"},
"StepVideoPipeline": {"type": "t2v", "default_vram": "~90-140 GB"}, "StepVideoPipeline": {"type": "t2v", "default_vram": "~90-140 GB"},
"CogVideoXPipeline": {"type": "t2v", "default_vram": "~20-30 GB"},
"HotshotXLPipeline": {"type": "video", "default_vram": "~8-12 GB"}, "HotshotXLPipeline": {"type": "video", "default_vram": "~8-12 GB"},
"LattePipeline": {"type": "t2v", "default_vram": "~20-30 GB"}, "LattePipeline": {"type": "t2v", "default_vram": "~20-30 GB"},
# Generic pipeline - auto-detects model type from loaded model # Generic pipeline - auto-detects model type from loaded model
...@@ -1092,6 +1105,9 @@ def detect_pipeline_class(model_info): ...@@ -1092,6 +1105,9 @@ def detect_pipeline_class(model_info):
if pipeline_tag: if pipeline_tag:
pipeline_tag_lower = pipeline_tag.lower() pipeline_tag_lower = pipeline_tag.lower()
if pipeline_tag_lower == "text-to-video": if pipeline_tag_lower == "text-to-video":
# Wan models can handle both I2V and T2V with WanPipeline
if "wan" in model_id:
return "WanPipeline"
# Check if it's I2V or T2V # Check if it's I2V or T2V
if "image-to-video" in tags or "i2v" in model_id: if "image-to-video" in tags or "i2v" in model_id:
return "StableVideoDiffusionPipeline" return "StableVideoDiffusionPipeline"
...@@ -1118,12 +1134,10 @@ def detect_pipeline_class(model_info): ...@@ -1118,12 +1134,10 @@ def detect_pipeline_class(model_info):
return "StableDiffusionXLImg2ImgPipeline" return "StableDiffusionXLImg2ImgPipeline"
# 3. Check model ID patterns (specific models first) # 3. Check model ID patterns (specific models first)
# Wan models (check for version patterns) # Wan models - these can handle both I2V and T2V with the same pipeline
if "wan2.1" in model_id or "wan2.2" in model_id: # Check for Wan first since it's more specific
return "WanPipeline"
if "wan2" in model_id:
return "WanPipeline"
if "wan" in model_id: if "wan" in model_id:
# Wan models have I2V and T2V variants but both use WanPipeline
return "WanPipeline" return "WanPipeline"
# Stable Video Diffusion # Stable Video Diffusion
...@@ -7799,15 +7813,6 @@ def main(args): ...@@ -7799,15 +7813,6 @@ def main(args):
m_info = MODELS[args.model] m_info = MODELS[args.model]
PipelineClass = get_pipeline_class(m_info["class"]) PipelineClass = get_pipeline_class(m_info["class"])
# Switch to I2V variant for LTX if in I2V mode
if m_info["class"] == "LTXPipeline" and (args.image_to_video or args.image):
try:
from diffusers import LTXImageToVideoPipeline
PipelineClass = LTXImageToVideoPipeline
print(f" 🔄 Switched to LTXImageToVideoPipeline for I2V mode")
except ImportError:
print(f" ⚠️ LTXImageToVideoPipeline not available, using LTXPipeline (T2V only)")
if not PipelineClass: if not PipelineClass:
pipeline_class = m_info['class'] pipeline_class = m_info['class']
print(f"❌ Pipeline class '{pipeline_class}' not found in your diffusers installation.") print(f"❌ Pipeline class '{pipeline_class}' not found in your diffusers installation.")
...@@ -7874,6 +7879,94 @@ def main(args): ...@@ -7874,6 +7879,94 @@ def main(args):
if variant := extra.get("variant"): if variant := extra.get("variant"):
pipe_kwargs["variant"] = variant pipe_kwargs["variant"] = variant
# ─── DYNAMIC PIPELINE CLASS SELECTION ───────────────────────────────────────
# Switch pipeline class based on task mode (I2V vs T2V, I2I vs T2I, V2V)
# This is similar to the LTX I2V handling already in place
is_i2v_mode = args.image_to_video or args.image
is_i2i_mode = args.image_to_image
is_v2v_mode = args.input_video is not None # Video-to-video if input video provided
# Handle WanPipeline - switch between T2V, I2V, V2V variants
if m_info["class"] == "WanPipeline":
if is_i2v_mode:
try:
from diffusers import WanImageToVideoPipeline
PipelineClass = WanImageToVideoPipeline
print(f" 🔄 Switched to WanImageToVideoPipeline for I2V mode")
except ImportError:
print(f" ⚠️ WanImageToVideoPipeline not available, using WanPipeline")
elif is_v2v_mode:
try:
from diffusers import WanVideoToVideoPipeline
PipelineClass = WanVideoToVideoPipeline
print(f" 🔄 Switched to WanVideoToVideoPipeline for V2V mode")
except ImportError:
print(f" ⚠️ WanVideoToVideoPipeline not available, using WanPipeline")
# Handle LTXPipeline - switch between T2V and I2V variants
if m_info["class"] == "LTXPipeline":
if is_i2v_mode:
try:
from diffusers import LTXImageToVideoPipeline
PipelineClass = LTXImageToVideoPipeline
print(f" 🔄 Switched to LTXImageToVideoPipeline for I2V mode")
except ImportError:
print(f" ⚠️ LTXImageToVideoPipeline not available, using LTXPipeline")
# Handle CogVideoXPipeline - switch between T2V and I2V variants
if m_info["class"] == "CogVideoXPipeline":
if is_i2v_mode:
try:
from diffusers import CogVideoXImageToVideoPipeline
PipelineClass = CogVideoXImageToVideoPipeline
print(f" 🔄 Switched to CogVideoXImageToVideoPipeline for I2V mode")
except ImportError:
print(f" ⚠️ CogVideoXImageToVideoPipeline not available, using CogVideoXPipeline")
elif is_v2v_mode:
try:
from diffusers import CogVideoXVideoToVideoPipeline
PipelineClass = CogVideoXVideoToVideoPipeline
print(f" 🔄 Switched to CogVideoXVideoToVideoPipeline for V2V mode")
except ImportError:
print(f" ⚠️ CogVideoXVideoToVideoPipeline not available, using CogVideoXPipeline")
# Handle AnimateDiffPipeline - switch between T2V and V2V variants
if m_info["class"] == "AnimateDiffPipeline":
if is_v2v_mode:
try:
from diffusers import AnimateDiffVideoToVideoPipeline
PipelineClass = AnimateDiffVideoToVideoPipeline
print(f" 🔄 Switched to AnimateDiffVideoToVideoPipeline for V2V mode")
except ImportError:
print(f" ⚠️ AnimateDiffVideoToVideoPipeline not available, using AnimateDiffPipeline")
# Handle Flux pipelines - can do T2I and I2I, need to switch based on mode
if "FluxPipeline" in m_info["class"] and is_i2i_mode:
try:
from diffusers import FluxImg2ImgPipeline
# Check if there's a specific FluxImg2Img available
PipelineClass = FluxImg2ImgPipeline
print(f" 🔄 Switched to FluxImg2ImgPipeline for I2I mode")
except ImportError:
print(f" ⚠️ FluxImg2ImgPipeline not available, using FluxPipeline")
# Handle StableDiffusionXLPipeline - can do T2I and I2I
if "StableDiffusionXLPipeline" in m_info["class"] and is_i2i_mode:
try:
from diffusers import StableDiffusionXLImg2ImgPipeline
PipelineClass = StableDiffusionXLImg2ImgPipeline
print(f" 🔄 Switched to StableDiffusionXLImg2ImgPipeline for I2I mode")
except ImportError:
print(f" ⚠️ StableDiffusionXLImg2ImgPipeline not available, using StableDiffusionXLPipeline")
# Handle StableDiffusion3Pipeline - can do T2I and I2I
if "StableDiffusion3Pipeline" in m_info["class"] and is_i2i_mode:
try:
from diffusers import StableDiffusion3Img2ImgPipeline
PipelineClass = StableDiffusion3Img2ImgPipeline
print(f" 🔄 Switched to StableDiffusion3Img2ImgPipeline for I2I mode")
except ImportError:
print(f" ⚠️ StableDiffusion3Img2ImgPipeline not available, using StableDiffusion3Pipeline")
# Handle LoRA models - need to load base model first # Handle LoRA models - need to load base model first
is_lora = m_info.get("is_lora", False) is_lora = m_info.get("is_lora", False)
lora_id = None lora_id = None
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment