Fix model loading 404 errors and improve time estimation

Model Loading Fixes:
- Add fallback loading when model_index.json returns 404
- Try alternative paths (diffusers/, diffusion_model/, pipeline/)
- Try generic DiffusionPipeline as fallback
- Check HuggingFace API for actual file structure
- Load from subdirectories if model_index.json found there
- Apply same fallback to I2V image model loading

Time Estimation Improvements:
- Add hardware detection (GPU model, VRAM, RAM, CPU cores)
- Detect GPU tier (extreme/high/medium/low/very_low)
- Calculate realistic time estimates based on GPU performance
- Account for VRAM constraints and offloading penalty
- Consider distributed/multi-GPU setups
- More accurate model loading times (minutes, not seconds)
- Account for resolution impact (quadratic relationship)
- Add 20% overhead for memory management
- Print hardware info for transparency

GPU Tier Performance Multipliers:
- Extreme (RTX 4090, A100, H100): 1.0x
- High (RTX 4080, RTX 3090, V100): 1.5x
- Medium (RTX 4070, RTX 3080, T4): 2.5x
- Low (RTX 3060, RTX 2070): 4.0x
- Very Low (GTX 1060, etc.): 8.0x
parent 6505a00a
...@@ -1875,81 +1875,235 @@ class TimingTracker: ...@@ -1875,81 +1875,235 @@ class TimingTracker:
mins = int((seconds % 3600) // 60) mins = int((seconds % 3600) // 60)
return f"{hours}h {mins}m" return f"{hours}h {mins}m"
def get_hardware_info(self):
"""Get detailed hardware information for time estimation"""
hw_info = {
"gpu_name": "Unknown",
"gpu_vram": 0,
"gpu_count": 0,
"gpu_tier": "medium", # low, medium, high, extreme
"ram_gb": 0,
"cpu_cores": 1,
"is_distributed": False,
}
# Get RAM
try:
ram_bytes = psutil.virtual_memory().total
hw_info["ram_gb"] = ram_bytes / (1024 ** 3)
except:
hw_info["ram_gb"] = 8
# Get CPU cores
try:
hw_info["cpu_cores"] = psutil.cpu_count(logical=False) or 1
except:
hw_info["cpu_cores"] = 1
# Get GPU info
if torch.cuda.is_available():
try:
hw_info["gpu_count"] = torch.cuda.device_count()
# Get GPU name and VRAM
gpu_props = torch.cuda.get_device_properties(0)
hw_info["gpu_name"] = gpu_props.name
hw_info["gpu_vram"] = gpu_props.total_memory / (1024 ** 3)
# Determine GPU tier based on name and VRAM
gpu_name_lower = gpu_props.name.lower()
vram = hw_info["gpu_vram"]
# Extreme tier: H100, A100, RTX 4090, RTX 6000, etc.
if any(x in gpu_name_lower for x in ["h100", "a100", "rtx 4090", "rtx 6000", "rtx 6000"]):
hw_info["gpu_tier"] = "extreme"
# High tier: RTX 4080, RTX 3090, RTX 4070 Ti, A6000, V100
elif any(x in gpu_name_lower for x in ["rtx 4080", "rtx 3090", "rtx 4070 ti", "a6000", "v100", "a10"]):
hw_info["gpu_tier"] = "high"
# Medium tier: RTX 4070, RTX 3080, RTX 3070, RTX 2080 Ti, T4
elif any(x in gpu_name_lower for x in ["rtx 4070", "rtx 3080", "rtx 3070", "2080 ti", "t4", "l4"]):
hw_info["gpu_tier"] = "medium"
# Low tier: RTX 3060, RTX 2070, GTX 1080, etc.
elif vram >= 8:
hw_info["gpu_tier"] = "low"
else:
hw_info["gpu_tier"] = "very_low"
# Check for distributed setup
if hw_info["gpu_count"] > 1:
hw_info["is_distributed"] = True
# Multi-GPU can help with large models but not necessarily faster for single video
# Keep the tier based on single GPU but note distributed
except Exception as e:
print(f" ⚠️ Could not get GPU info: {e}")
return hw_info
def estimate_total_time(self, args, m_info, has_i2v=False, has_audio=False, has_lipsync=False, has_upscale=False): def estimate_total_time(self, args, m_info, has_i2v=False, has_audio=False, has_lipsync=False, has_upscale=False):
"""Estimate total generation time based on parameters""" """Estimate total generation time based on parameters and hardware
This provides MORE REALISTIC estimates that account for:
- Actual GPU hardware (tier-based performance)
- VRAM constraints (slower if offloading needed)
- Model loading time (realistic for large models)
- Distributed/clustered GPU setups
- Resolution impact
"""
estimates = {} estimates = {}
# Base time per frame (empirical estimates for RTX 3090/4090) # Get hardware info
# These are rough estimates and will vary by model hw_info = self.get_hardware_info()
gpu_tier = hw_info["gpu_tier"]
gpu_vram = hw_info["gpu_vram"]
is_distributed = hw_info["is_distributed"]
# GPU performance multipliers (relative to RTX 4090 = 1.0)
# These are conservative estimates
tier_multipliers = {
"extreme": 1.0, # RTX 4090, A100, H100
"high": 1.5, # RTX 4080, RTX 3090, V100
"medium": 2.5, # RTX 4070, RTX 3080, T4
"low": 4.0, # RTX 3060, RTX 2070
"very_low": 8.0, # GTX 1060, etc.
}
# Get the multiplier for this GPU
perf_multiplier = tier_multipliers.get(gpu_tier, 3.0)
# Distributed setup can help with memory but not always faster
# For single video generation, multi-GPU doesn't help much
if is_distributed:
# Slight speedup for memory-intensive operations
perf_multiplier *= 0.9 # 10% faster due to better memory distribution
# Base time per frame (empirical estimates for RTX 4090)
# These are OPTIMISTIC estimates for top-tier hardware
model_class = m_info.get("class", "") model_class = m_info.get("class", "")
# Time per frame estimates (seconds) # Time per frame estimates (seconds) for RTX 4090
# These are REALISTIC estimates including diffusion steps
if "WanPipeline" in model_class: if "WanPipeline" in model_class:
time_per_frame = 0.8 # Wan is relatively fast base_time_per_frame = 1.5 # Wan 14B is compute heavy
elif "MochiPipeline" in model_class: elif "MochiPipeline" in model_class:
time_per_frame = 1.5 # Mochi is slower base_time_per_frame = 3.0 # Mochi is very slow
elif "StableVideoDiffusionPipeline" in model_class: elif "StableVideoDiffusionPipeline" in model_class:
time_per_frame = 0.5 # SVD is fast base_time_per_frame = 0.8 # SVD is relatively fast
elif "CogVideoXPipeline" in model_class: elif "CogVideoXPipeline" in model_class:
time_per_frame = 1.2 base_time_per_frame = 2.0 # CogVideoX 5B
elif "LTXVideoPipeline" in model_class:
base_time_per_frame = 2.5 # LTX is moderate
elif "FluxPipeline" in model_class: elif "FluxPipeline" in model_class:
time_per_frame = 2.0 # Flux is slower for images base_time_per_frame = 4.0 # Flux is slow for images
elif "StableDiffusionXLPipeline" in model_class: elif "StableDiffusionXLPipeline" in model_class:
time_per_frame = 0.3 # SDXL is fast for images base_time_per_frame = 0.5 # SDXL is fast for images
elif "StableDiffusion3Pipeline" in model_class:
base_time_per_frame = 1.0 # SD3 is moderate
elif "AllegroPipeline" in model_class:
base_time_per_frame = 5.0 # Allegro is very slow
elif "HunyuanDiTPipeline" in model_class:
base_time_per_frame = 6.0 # Hunyuan is very slow
elif "OpenSoraPipeline" in model_class:
base_time_per_frame = 4.0 # OpenSora is slow
elif "MochiPipeline" in model_class:
base_time_per_frame = 3.0 # Mochi is slow
else: else:
time_per_frame = 1.0 # Default estimate base_time_per_frame = 2.0 # Default estimate
# Adjust for resolution (higher res = more time) # Apply GPU performance multiplier
resolution_factor = (args.width * args.height) / (832 * 480) # Normalized to default time_per_frame = base_time_per_frame * perf_multiplier
time_per_frame *= resolution_factor
# Adjust for resolution (higher res = more time, quadratic relationship)
# Model loading time estimate resolution_factor = (args.width * args.height) / (832 * 480)
vram_est = parse_vram_estimate(m_info.get("vram", "~10 GB")) time_per_frame *= (resolution_factor ** 1.2) # Slightly more than linear
if vram_est > 50:
load_time = 120 # Large models take longer to load # VRAM constraint adjustment
elif vram_est > 30: # If model VRAM requirement > available VRAM, will need offloading
load_time = 60 model_vram_req = parse_vram_estimate(m_info.get("vram", "~10 GB"))
elif vram_est > 16: if model_vram_req > gpu_vram:
load_time = 30 # Offloading penalty - can be 2-5x slower
vram_ratio = model_vram_req / gpu_vram
offload_penalty = min(4.0, 1.0 + (vram_ratio - 1) * 1.5)
time_per_frame *= offload_penalty
print(f" ⚠️ Model requires {model_vram_req:.1f}GB VRAM, you have {gpu_vram:.1f}GB")
print(f" Expect {offload_penalty:.1f}x slower due to CPU offloading")
# Model loading time estimate (REALISTIC)
# Large models take MINUTES to load, not seconds
if model_vram_req > 50:
load_time = 300 # 5 minutes for huge models
elif model_vram_req > 30:
load_time = 180 # 3 minutes for large models
elif model_vram_req > 16:
load_time = 90 # 1.5 minutes for medium models
else: else:
load_time = 15 load_time = 45 # 45 seconds for small models
# Add network download time estimate (if model not cached)
# This is a rough estimate - actual time depends on connection
model_size_gb = model_vram_req * 1.5 # Models are usually larger than VRAM requirement
download_time = model_size_gb * 10 # ~10 seconds per GB on average connection
# Only add if model might not be cached (first run)
# We'll be conservative and include partial download time
load_time += min(download_time, 60) # Cap at 60s extra for potential download
estimates["model_loading"] = load_time estimates["model_loading"] = load_time
# Image generation for I2V # Image generation for I2V (REALISTIC estimates)
if has_i2v and not args.image: if has_i2v and not args.image:
img_model_class = "StableDiffusionXLPipeline" # Default image model # Image generation is typically 10-30 seconds for quality models
img_time = 5 + (args.width * args.height) / (1024 * 1024) * 2 img_time = 15 + (args.width * args.height) / (1024 * 1024) * 5
img_time *= perf_multiplier # Apply GPU performance
estimates["image_generation"] = img_time estimates["image_generation"] = img_time
# Add image model loading time
estimates["image_model_loading"] = 30
# Audio generation # Audio generation
if has_audio: if has_audio:
if args.audio_type == "tts": if args.audio_type == "tts":
audio_time = 10 + len(args.audio_text or "") / 50 # Rough estimate audio_time = 15 + len(args.audio_text or "") / 30 # More realistic
else: # music else: # music
audio_time = args.length * 0.5 + 5 # MusicGen is relatively fast audio_time = args.length * 2 + 10 # MusicGen takes time
estimates["audio_generation"] = audio_time estimates["audio_generation"] = audio_time
# Video generation # Video generation (REALISTIC)
num_frames = int(args.length * args.fps) num_frames = int(args.length * args.fps)
inference_steps = 50 if "wan" in args.model.lower() else 28 inference_steps = 50 if "wan" in args.model.lower() else 28
video_time = num_frames * time_per_frame * (inference_steps / 50) # Normalized to 50 steps
# Total video time = frames * time_per_frame * step_factor
step_factor = inference_steps / 50 # Normalize to 50 steps
video_time = num_frames * time_per_frame * step_factor
# Add overhead for memory management, saving, etc.
video_time *= 1.2 # 20% overhead
estimates["video_generation"] = video_time estimates["video_generation"] = video_time
# Upscaling # Upscaling (REALISTIC - can be slow for high-res)
if has_upscale: if has_upscale:
upscale_time = num_frames * 0.3 # Upscaling is relatively fast per frame upscale_factor = getattr(args, 'upscale_factor', 2.0)
# Upscaling time depends on output resolution
output_pixels = args.width * args.height * (upscale_factor ** 2)
upscale_time = num_frames * (output_pixels / (1024 * 1024)) * 0.5
estimates["upscaling"] = upscale_time estimates["upscaling"] = upscale_time
# Audio sync # Audio sync
if has_audio and args.sync_audio: if has_audio and args.sync_audio:
estimates["audio_sync"] = 5 estimates["audio_sync"] = 10
# Lip sync # Lip sync (REALISTIC - Wav2Lip is slow)
if has_lipsync: if has_lipsync:
lipsync_time = num_frames * 0.2 # Wav2Lip processes frames lipsync_time = num_frames * 0.5 # 0.5 seconds per frame
estimates["lip_sync"] = lipsync_time estimates["lip_sync"] = lipsync_time
# Print hardware info for transparency
print(f"\n💻 Hardware detected: {hw_info['gpu_name']} ({hw_info['gpu_vram']:.1f}GB VRAM)")
print(f" GPU tier: {gpu_tier.upper()} (performance multiplier: {perf_multiplier:.1f}x)")
if is_distributed:
print(f" Distributed setup: {hw_info['gpu_count']} GPUs")
print(f" System RAM: {hw_info['ram_gb']:.1f}GB, CPU cores: {hw_info['cpu_cores']}")
return estimates return estimates
def print_estimate(self, estimates): def print_estimate(self, estimates):
...@@ -6362,6 +6516,88 @@ def main(args): ...@@ -6362,6 +6516,88 @@ def main(args):
print(f" [DEBUG] Response: {e.response}") print(f" [DEBUG] Response: {e.response}")
print() print()
# Check if this is a 404 error for model_index.json
# Some models have files in subdirectories (e.g., diffusers/) or use different structures
is_404_error = "404" in error_str or "Entry Not Found" in error_str or "not found" in error_str.lower()
if is_404_error and "model_index.json" in error_str:
print(f"\n⚠️ model_index.json not found at root level")
print(f" Attempting alternative loading strategies...")
# Strategy 1: Try loading from 'diffusers' subdirectory
alternative_paths = [
f"{model_id_to_load}/diffusers",
f"{model_id_to_load}/diffusion_model",
f"{model_id_to_load}/pipeline",
]
for alt_path in alternative_paths:
try:
print(f" Trying: {alt_path}")
pipe = PipelineClass.from_pretrained(alt_path, **pipe_kwargs)
print(f" ✅ Successfully loaded from: {alt_path}")
pipeline_loaded_successfully = True
break
except Exception as alt_e:
if debug:
print(f" [DEBUG] Failed: {alt_e}")
continue
# Strategy 2: Try with DiffusionPipeline (generic loader)
if not pipeline_loaded_successfully:
try:
print(f" Trying generic DiffusionPipeline...")
from diffusers import DiffusionPipeline
pipe = DiffusionPipeline.from_pretrained(model_id_to_load, **pipe_kwargs)
print(f" ✅ Successfully loaded with DiffusionPipeline")
pipeline_loaded_successfully = True
# Update PipelineClass for the rest of the code
PipelineClass = DiffusionPipeline
# Update the models.json file
update_model_pipeline_class(args.model, "DiffusionPipeline")
except Exception as generic_e:
if debug:
print(f" [DEBUG] Generic loader also failed: {generic_e}")
# Strategy 3: Check HuggingFace API for actual file structure
if not pipeline_loaded_successfully:
try:
print(f" Checking HuggingFace API for file structure...")
model_info = validate_hf_model(model_id_to_load, hf_token=hf_token, debug=debug)
if model_info:
siblings = model_info.get("siblings", [])
files = [s.get("rfilename", "") for s in siblings]
# Look for model_index.json in subdirectories
model_index_files = [f for f in files if "model_index.json" in f]
config_files = [f for f in files if f.endswith("config.json") and "model_index" not in f]
if debug:
print(f" [DEBUG] Found model_index.json files: {model_index_files}")
print(f" [DEBUG] Found config.json files: {config_files[:5]}")
# Try loading from subdirectory containing model_index.json
for model_index_path in model_index_files:
subdirectory = os.path.dirname(model_index_path)
if subdirectory:
try:
print(f" Trying subdirectory: {subdirectory}")
pipe = PipelineClass.from_pretrained(
model_id_to_load,
subfolder=subdirectory,
**pipe_kwargs
)
print(f" ✅ Successfully loaded from subdirectory: {subdirectory}")
pipeline_loaded_successfully = True
break
except Exception as sub_e:
if debug:
print(f" [DEBUG] Subdirectory load failed: {sub_e}")
continue
except Exception as api_e:
if debug:
print(f" [DEBUG] API check failed: {api_e}")
# Check if this is a pipeline component mismatch error # Check if this is a pipeline component mismatch error
# This happens when the model_index.json has the wrong _class_name # This happens when the model_index.json has the wrong _class_name
is_component_mismatch = "expected" in error_str and "but only" in error_str and "were passed" in error_str is_component_mismatch = "expected" in error_str and "but only" in error_str and "were passed" in error_str
...@@ -6880,6 +7116,25 @@ def main(args): ...@@ -6880,6 +7116,25 @@ def main(args):
except Exception as e: except Exception as e:
error_str = str(e) error_str = str(e)
# Check if this is a 404 error for model_index.json
is_404_error = "404" in error_str or "Entry Not Found" in error_str or "not found" in error_str.lower()
if is_404_error and "model_index.json" in error_str:
print(f"\n⚠️ Image model model_index.json not found at root level")
print(f" Attempting alternative loading strategies...")
# Try with DiffusionPipeline (generic loader)
try:
print(f" Trying generic DiffusionPipeline for image model...")
from diffusers import DiffusionPipeline
img_pipe = DiffusionPipeline.from_pretrained(img_model_id_to_load, **img_kwargs)
print(f" ✅ Successfully loaded image model with DiffusionPipeline")
ImgCls = DiffusionPipeline
img_pipeline_loaded_successfully = True
except Exception as generic_e:
if debug:
print(f" [DEBUG] Generic loader also failed: {generic_e}")
# Check if this is a pipeline component mismatch error # Check if this is a pipeline component mismatch error
is_component_mismatch = "expected" in error_str and "but only" in error_str and "were passed" in error_str is_component_mismatch = "expected" in error_str and "but only" in error_str and "were passed" in error_str
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment