Fix loading transformer-only fine-tuned models (like Muinez/ltxvideo-2b-nsfw)

Some models on HuggingFace are not full pipelines but just fine-tuned components
(e.g., just the transformer weights). These have a config.json at root level with
_class_name pointing to a component class like 'LTXVideoTransformer3DModel'.

This fix adds:

1. Detection of component-only models:
   - Check for config.json at root level
   - Read _class_name to determine component type
   - Detect if it's a transformer, VAE, or other component

2. Proper loading strategy:
   - Load the base pipeline first (e.g., Lightricks/LTX-Video)
   - Then load the fine-tuned component from the model repo
   - Replace the base component with the fine-tuned one

3. Supported component classes:
   - LTXVideoTransformer3DModel → Lightricks/LTX-Video
   - AutoencoderKLLTXVideo → Lightricks/LTX-Video
   - UNet2DConditionModel, UNet3DConditionModel, AutoencoderKL

This allows loading models like Muinez/ltxvideo-2b-nsfw which are
fine-tuned transformer weights without a full pipeline structure.
parent c5cdb9fd
......@@ -7349,88 +7349,181 @@ def main(args):
print(f"\n⚠️ I2V model model_index.json not found at root level")
print(f" Attempting alternative loading strategies...")
# Strategy 1: Try loading from base model and then fine-tuned weights
base_model_fallbacks = {
"ltx": "Lightricks/LTX-Video",
"ltxvideo": "Lightricks/LTX-Video",
"wan": "Wan-AI/Wan2.1-T2V-14B-Diffusers",
"svd": "stabilityai/stable-video-diffusion-img2vid-xt-1-1",
"cogvideo": "THUDM/CogVideoX-5b",
"mochi": "genmo/mochi-1-preview",
}
# Strategy 1: Check if this is a transformer-only fine-tune
# (model has config.json with _class_name pointing to a component, not a pipeline)
loaded_with_base = False
model_id_lower = model_id_to_load.lower()
for key, base_model in base_model_fallbacks.items():
if key in model_id_lower:
print(f" Trying to load base model first: {base_model}")
print(f" Then loading fine-tuned weights from: {model_id_to_load}")
try:
# Load base model
pipe = PipelineClass.from_pretrained(base_model, **pipe_kwargs)
print(f" ✅ Base model loaded")
try:
from huggingface_hub import hf_hub_download, list_repo_files
import json as json_module
# Check for config.json at root level
repo_files = list_repo_files(model_id_to_load, token=hf_token)
has_root_config = 'config.json' in repo_files
has_safetensors = any(f.endswith('.safetensors') for f in repo_files)
if debug:
print(f" [DEBUG] Root config.json: {has_root_config}")
print(f" [DEBUG] Has safetensors: {has_safetensors}")
if has_root_config:
# Download and read config.json to check what type of model this is
config_path = hf_hub_download(
model_id_to_load,
"config.json",
token=hf_token
)
with open(config_path, 'r') as cf:
model_config = json_module.load(cf)
class_name = model_config.get("_class_name", "")
if debug:
print(f" [DEBUG] Model class name: {class_name}")
# Check if this is a component-only model (transformer, unet, etc.)
component_classes = [
"LTXVideoTransformer3DModel",
"UNet2DConditionModel",
"UNet3DConditionModel",
"AutoencoderKL",
"AutoencoderKLLTXVideo",
]
if class_name in component_classes:
print(f" 📦 Detected component-only model: {class_name}")
print(f" This is a fine-tuned component, not a full pipeline.")
# Now try to load the fine-tuned components
# This works for models that have component folders but no model_index.json
try:
from huggingface_hub import hf_hub_download, list_repo_files
# List files in the repo to see what components exist
repo_files = list_repo_files(model_id_to_load, token=hf_token)
# Check for component folders
component_folders = set()
for f in repo_files:
parts = f.split('/')
if len(parts) > 1:
component_folders.add(parts[0])
print(f" Found component folders: {component_folders}")
# Determine base model based on component type
base_model_map = {
"LTXVideoTransformer3DModel": "Lightricks/LTX-Video",
"AutoencoderKLLTXVideo": "Lightricks/LTX-Video",
}
base_model = base_model_map.get(class_name)
# Also check model ID for hints
if not base_model:
if "ltx" in model_id_lower or "ltxvideo" in model_id_lower:
base_model = "Lightricks/LTX-Video"
elif "wan" in model_id_lower:
base_model = "Wan-AI/Wan2.1-T2V-14B-Diffusers"
elif "svd" in model_id_lower:
base_model = "stabilityai/stable-video-diffusion-img2vid-xt-1-1"
if base_model:
print(f" Loading base pipeline: {base_model}")
print(f" Then loading fine-tuned {class_name} from: {model_id_to_load}")
# Load each component that exists
components_loaded = []
for component in ['transformer', 'unet', 'vae', 'text_encoder', 'text_encoder_2']:
if component in component_folders:
try:
if component == 'transformer':
from diffusers import LTXVideoTransformer3DModel
pipe.transformer = LTXVideoTransformer3DModel.from_pretrained(
model_id_to_load, subfolder="transformer",
torch_dtype=pipe_kwargs.get("torch_dtype", torch.float16)
)
components_loaded.append(component)
elif component == 'vae':
from diffusers import AutoencoderKLLTXVideo
pipe.vae = AutoencoderKLLTXVideo.from_pretrained(
model_id_to_load, subfolder="vae",
torch_dtype=pipe_kwargs.get("torch_dtype", torch.float16)
)
components_loaded.append(component)
except Exception as comp_e:
if debug:
print(f" [DEBUG] Could not load {component}: {comp_e}")
# Load base pipeline
pipe = PipelineClass.from_pretrained(base_model, **pipe_kwargs)
print(f" ✅ Base pipeline loaded")
if components_loaded:
print(f" ✅ Loaded components: {components_loaded}")
# Load the fine-tuned component
if class_name == "LTXVideoTransformer3DModel":
from diffusers import LTXVideoTransformer3DModel
print(f" Loading fine-tuned transformer...")
pipe.transformer = LTXVideoTransformer3DModel.from_pretrained(
model_id_to_load,
torch_dtype=pipe_kwargs.get("torch_dtype", torch.float16),
token=hf_token
)
print(f" ✅ Fine-tuned transformer loaded successfully!")
loaded_with_base = True
else:
print(f" ⚠️ No components could be loaded from fine-tuned model")
print(f" Using base model: {base_model}")
loaded_with_base = True # Still use base model
elif class_name == "AutoencoderKLLTXVideo":
from diffusers import AutoencoderKLLTXVideo
print(f" Loading fine-tuned VAE...")
pipe.vae = AutoencoderKLLTXVideo.from_pretrained(
model_id_to_load,
torch_dtype=pipe_kwargs.get("torch_dtype", torch.float16),
token=hf_token
)
print(f" ✅ Fine-tuned VAE loaded successfully!")
loaded_with_base = True
except Exception as component_e:
if debug:
print(f" [DEBUG] Component detection failed: {component_e}")
# Strategy 2: Try loading from base model and then fine-tuned weights (subfolder style)
if not loaded_with_base:
base_model_fallbacks = {
"ltx": "Lightricks/LTX-Video",
"ltxvideo": "Lightricks/LTX-Video",
"wan": "Wan-AI/Wan2.1-T2V-14B-Diffusers",
"svd": "stabilityai/stable-video-diffusion-img2vid-xt-1-1",
"cogvideo": "THUDM/CogVideoX-5b",
"mochi": "genmo/mochi-1-preview",
}
for key, base_model in base_model_fallbacks.items():
if key in model_id_lower:
print(f" Trying to load base model first: {base_model}")
print(f" Then loading fine-tuned weights from: {model_id_to_load}")
try:
# Load base model
pipe = PipelineClass.from_pretrained(base_model, **pipe_kwargs)
print(f" ✅ Base model loaded")
# Now try to load the fine-tuned components
# This works for models that have component folders but no model_index.json
try:
from huggingface_hub import hf_hub_download, list_repo_files
# List files in the repo to see what components exist
repo_files = list_repo_files(model_id_to_load, token=hf_token)
# Check for component folders
component_folders = set()
for f in repo_files:
parts = f.split('/')
if len(parts) > 1:
component_folders.add(parts[0])
print(f" Found component folders: {component_folders}")
except Exception as ft_e:
# Load each component that exists
components_loaded = []
for component in ['transformer', 'unet', 'vae', 'text_encoder', 'text_encoder_2']:
if component in component_folders:
try:
if component == 'transformer':
from diffusers import LTXVideoTransformer3DModel
pipe.transformer = LTXVideoTransformer3DModel.from_pretrained(
model_id_to_load, subfolder="transformer",
torch_dtype=pipe_kwargs.get("torch_dtype", torch.float16)
)
components_loaded.append(component)
elif component == 'vae':
from diffusers import AutoencoderKLLTXVideo
pipe.vae = AutoencoderKLLTXVideo.from_pretrained(
model_id_to_load, subfolder="vae",
torch_dtype=pipe_kwargs.get("torch_dtype", torch.float16)
)
components_loaded.append(component)
except Exception as comp_e:
if debug:
print(f" [DEBUG] Could not load {component}: {comp_e}")
if components_loaded:
print(f" ✅ Loaded components: {components_loaded}")
loaded_with_base = True
else:
print(f" ⚠️ No components could be loaded from fine-tuned model")
print(f" Using base model: {base_model}")
loaded_with_base = True # Still use base model
except Exception as ft_e:
if debug:
print(f" [DEBUG] Fine-tuned loading failed: {ft_e}")
print(f" Using base model: {base_model}")
loaded_with_base = True
break
except Exception as base_e:
if debug:
print(f" [DEBUG] Fine-tuned loading failed: {ft_e}")
print(f" Using base model: {base_model}")
loaded_with_base = True
break
except Exception as base_e:
if debug:
print(f" [DEBUG] Base model loading failed: {base_e}")
continue
print(f" [DEBUG] Base model loading failed: {base_e}")
continue
# Strategy 2: Try with DiffusionPipeline (generic loader)
# Strategy 3: Try with DiffusionPipeline (generic loader)
if not loaded_with_base:
try:
print(f" Trying generic DiffusionPipeline for I2V model...")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment