Fix time estimation to be more realistic

- Increased base time per frame for all models (2-4x more realistic)
- Added LTXVideoPipeline specific estimate (4.0s/frame)
- Increased model loading times (90s-10min based on model size)
- Added realistic image model loading times for I2V mode
- Added image generation time based on model type (Flux, SDXL, SD3)
- Added 30% overhead for I/O and memory operations
- Added 20% extra time for I2V processing
- Increased resolution scaling factor to 1.3 (quadratic relationship)
- Increased download time estimate to 15s/GB with 2min cap

The previous estimates were too optimistic and didn't account for:
- Full diffusion process (multiple denoising steps)
- Model loading from disk/download
- Memory management overhead
- I2V-specific processing time
- Image model loading for I2V mode
parent 5291deb2
......@@ -1977,45 +1977,51 @@ class TimingTracker:
# Slight speedup for memory-intensive operations
perf_multiplier *= 0.9 # 10% faster due to better memory distribution
# Base time per frame (empirical estimates for RTX 4090)
# These are OPTIMISTIC estimates for top-tier hardware
# Base time per frame (REALISTIC estimates for RTX 4090)
# These account for the FULL diffusion process, not just one step
# Video generation involves multiple denoising steps per frame
model_class = m_info.get("class", "")
model_id = m_info.get("id", "").lower()
# Time per frame estimates (seconds) for RTX 4090
# These are REALISTIC estimates including diffusion steps
# These are REALISTIC estimates for the ENTIRE generation process
# including all diffusion steps, VAE decoding, etc.
if "WanPipeline" in model_class:
base_time_per_frame = 1.5 # Wan 14B is compute heavy
base_time_per_frame = 3.0 # Wan 14B is compute heavy, ~25 steps
elif "MochiPipeline" in model_class:
base_time_per_frame = 3.0 # Mochi is very slow
base_time_per_frame = 5.0 # Mochi is very slow
elif "StableVideoDiffusionPipeline" in model_class:
base_time_per_frame = 0.8 # SVD is relatively fast
base_time_per_frame = 1.5 # SVD is relatively fast but still ~25 steps
elif "CogVideoXPipeline" in model_class:
base_time_per_frame = 2.0 # CogVideoX 5B
elif "LTXVideoPipeline" in model_class:
base_time_per_frame = 2.5 # LTX is moderate
base_time_per_frame = 4.0 # CogVideoX 5B is slow
elif "LTXVideoPipeline" in model_class or "ltx" in model_id:
base_time_per_frame = 4.0 # LTX is moderate-slow, ~25 steps
elif "FluxPipeline" in model_class:
base_time_per_frame = 4.0 # Flux is slow for images
base_time_per_frame = 8.0 # Flux is slow for images (~20-30 steps)
elif "StableDiffusionXLPipeline" in model_class:
base_time_per_frame = 0.5 # SDXL is fast for images
base_time_per_frame = 1.0 # SDXL is fast for images
elif "StableDiffusion3Pipeline" in model_class:
base_time_per_frame = 1.0 # SD3 is moderate
base_time_per_frame = 2.0 # SD3 is moderate
elif "AllegroPipeline" in model_class:
base_time_per_frame = 5.0 # Allegro is very slow
base_time_per_frame = 8.0 # Allegro is very slow
elif "HunyuanDiTPipeline" in model_class:
base_time_per_frame = 6.0 # Hunyuan is very slow
base_time_per_frame = 10.0 # Hunyuan is very slow
elif "OpenSoraPipeline" in model_class:
base_time_per_frame = 4.0 # OpenSora is slow
elif "MochiPipeline" in model_class:
base_time_per_frame = 3.0 # Mochi is slow
base_time_per_frame = 6.0 # OpenSora is slow
elif "I2VGenXLPipeline" in model_class:
base_time_per_frame = 3.0 # I2VGenXL
elif "AnimateDiffPipeline" in model_class:
base_time_per_frame = 2.0 # AnimateDiff
else:
base_time_per_frame = 2.0 # Default estimate
# Default - be conservative for unknown models
base_time_per_frame = 4.0
# Apply GPU performance multiplier
time_per_frame = base_time_per_frame * perf_multiplier
# Adjust for resolution (higher res = more time, quadratic relationship)
resolution_factor = (args.width * args.height) / (832 * 480)
time_per_frame *= (resolution_factor ** 1.2) # Slightly more than linear
time_per_frame *= (resolution_factor ** 1.3) # More than linear - memory bandwidth
# VRAM constraint adjustment
# If model VRAM requirement > available VRAM, will need offloading
......@@ -2030,34 +2036,61 @@ class TimingTracker:
# Model loading time estimate (REALISTIC)
# Large models take MINUTES to load, not seconds
# This includes: download, weight loading, CUDA initialization, warmup
if model_vram_req > 50:
load_time = 300 # 5 minutes for huge models
load_time = 600 # 10 minutes for huge models (100GB+)
elif model_vram_req > 30:
load_time = 180 # 3 minutes for large models
load_time = 300 # 5 minutes for large models
elif model_vram_req > 16:
load_time = 90 # 1.5 minutes for medium models
load_time = 180 # 3 minutes for medium models
else:
load_time = 45 # 45 seconds for small models
load_time = 90 # 1.5 minutes for small models
# Add network download time estimate (if model not cached)
# This is a rough estimate - actual time depends on connection
model_size_gb = model_vram_req * 1.5 # Models are usually larger than VRAM requirement
download_time = model_size_gb * 10 # ~10 seconds per GB on average connection
download_time = model_size_gb * 15 # ~15 seconds per GB on average connection
# Only add if model might not be cached (first run)
# We'll be conservative and include partial download time
load_time += min(download_time, 60) # Cap at 60s extra for potential download
load_time += min(download_time, 120) # Cap at 2min extra for potential download
estimates["model_loading"] = load_time
# Image generation for I2V (REALISTIC estimates)
if has_i2v and not args.image:
# Image generation is typically 10-30 seconds for quality models
img_time = 15 + (args.width * args.height) / (1024 * 1024) * 5
# Image generation for Flux/SDXL takes significant time
# Flux: ~20-40 steps, SDXL: ~20-30 steps
img_model_class = ""
if hasattr(args, 'image_model') and args.image_model:
img_model_info = MODELS.get(args.image_model, {})
img_model_class = img_model_info.get("class", "")
# Base image generation time (seconds)
if "FluxPipeline" in img_model_class:
img_time = 45 # Flux is slow
elif "StableDiffusion3Pipeline" in img_model_class:
img_time = 20 # SD3 is moderate
elif "StableDiffusionXLPipeline" in img_model_class:
img_time = 15 # SDXL is faster
else:
img_time = 30 # Default for unknown models
# Scale by resolution
img_time *= (args.width * args.height) / (1024 * 1024)
img_time *= perf_multiplier # Apply GPU performance
estimates["image_generation"] = img_time
# Add image model loading time
estimates["image_model_loading"] = 30
# Add image model loading time (REALISTIC)
# Image models also need to be loaded from disk/downloaded
img_model_vram = parse_vram_estimate(MODELS.get(args.image_model, {}).get("vram", "~10 GB"))
if img_model_vram > 20:
img_load_time = 180 # 3 minutes for large image models
elif img_model_vram > 10:
img_load_time = 90 # 1.5 minutes for medium
else:
img_load_time = 45 # 45 seconds for small
estimates["image_model_loading"] = img_load_time
# Audio generation
if has_audio:
......@@ -2069,14 +2102,28 @@ class TimingTracker:
# Video generation (REALISTIC)
num_frames = int(args.length * args.fps)
inference_steps = 50 if "wan" in args.model.lower() else 28
# Total video time = frames * time_per_frame * step_factor
step_factor = inference_steps / 50 # Normalize to 50 steps
video_time = num_frames * time_per_frame * step_factor
# Get actual inference steps for the model
# Most video models use 20-50 steps
if "wan" in args.model.lower():
inference_steps = 50
elif "svd" in args.model.lower() or "stable-video" in args.model.lower():
inference_steps = 25
elif "ltx" in args.model.lower():
inference_steps = 25
else:
inference_steps = 30 # Default
# Total video time = frames * time_per_frame
# time_per_frame already accounts for diffusion steps
video_time = num_frames * time_per_frame
# Add overhead for memory management, saving, etc.
video_time *= 1.2 # 20% overhead
video_time *= 1.3 # 30% overhead for I/O, memory ops
# For I2V, add extra time for image encoding and conditioning
if has_i2v:
video_time *= 1.2 # 20% extra for I2V processing
estimates["video_generation"] = video_time
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment