Add system load detection and more conservative time estimates

System Load Detection:
- Added get_system_load() method to detect CPU, memory, and GPU utilization
- CPU load >80% adds 50% slowdown, >50% adds 20% slowdown
- Memory >90% adds 80% slowdown, >75% adds 40% slowdown
- GPU utilization >80% adds 60% slowdown, >50% adds 30% slowdown
- Warning displayed when system is under heavy load

More Conservative Base Estimates:
- WanPipeline: 3.0s → 5.0s/frame
- MochiPipeline: 5.0s → 8.0s/frame
- SVD: 1.5s → 2.5s/frame
- CogVideoX: 4.0s → 6.0s/frame
- LTXVideo: 4.0s → 6.0s/frame
- Flux: 8.0s → 12.0s/frame
- Allegro: 8.0s → 12.0s/frame
- Hunyuan: 10.0s → 15.0s/frame
- OpenSora: 6.0s → 10.0s/frame

More Conservative GPU Tier Multipliers:
- extreme: 1.0 → 1.2x
- high: 1.5 → 2.0x
- medium: 2.5 → 3.5x
- low: 4.0 → 5.0x
- very_low: 8.0 → 10.0x

More Conservative Model Loading Times:
- Huge (>50GB): 10min → 15min
- Large (30-50GB): 5min → 8min
- Medium (16-30GB): 3min → 5min
- Small (<16GB): 1.5min → 3min
- Download estimate: 15s/GB → 30s/GB

Additional Safety Margins:
- Overhead increased from 30% to 50%
- I2V processing overhead increased from 20% to 30%
- Added 20% safety margin for unpredictable factors
- Load factor applied to model loading time as well
parent 8c48cea3
......@@ -1940,6 +1940,49 @@ class TimingTracker:
return hw_info
def get_system_load(self):
"""Get current system load (CPU, memory, GPU utilization)
Returns a load factor (1.0 = idle, higher = more loaded)
"""
load_factor = 1.0
try:
# CPU load
cpu_percent = psutil.cpu_percent(interval=0.5)
if cpu_percent > 80:
load_factor += 0.5 # 50% slower if CPU is heavily loaded
elif cpu_percent > 50:
load_factor += 0.2 # 20% slower if CPU is moderately loaded
# Memory pressure
mem = psutil.virtual_memory()
if mem.percent > 90:
load_factor += 0.8 # 80% slower if memory is critical
elif mem.percent > 75:
load_factor += 0.4 # 40% slower if memory is high
# GPU utilization (if available)
try:
result = subprocess.run(
['nvidia-smi', '--query-gpu=utilization.gpu', '--format=csv,noheader,nounits'],
capture_output=True, text=True, timeout=2
)
if result.returncode == 0:
gpu_util = int(result.stdout.strip().split('\n')[0])
if gpu_util > 80:
load_factor += 0.6 # 60% slower if GPU is heavily used
elif gpu_util > 50:
load_factor += 0.3 # 30% slower if GPU is moderately used
except:
pass # nvidia-smi not available
except Exception as e:
# If we can't get load info, assume moderate load to be safe
load_factor = 1.3
return load_factor
def estimate_total_time(self, args, m_info, has_i2v=False, has_audio=False, has_lipsync=False, has_upscale=False):
"""Estimate total generation time based on parameters and hardware
......@@ -1949,6 +1992,8 @@ class TimingTracker:
- Model loading time (realistic for large models)
- Distributed/clustered GPU setups
- Resolution impact
- System load (CPU, memory, GPU utilization)
- Safety margin for unpredictable factors
"""
estimates = {}
......@@ -1958,18 +2003,24 @@ class TimingTracker:
gpu_vram = hw_info["gpu_vram"]
is_distributed = hw_info["is_distributed"]
# Get system load factor
load_factor = self.get_system_load()
# GPU performance multipliers (relative to RTX 4090 = 1.0)
# These are conservative estimates
# These are CONSERVATIVE estimates accounting for real-world conditions
tier_multipliers = {
"extreme": 1.0, # RTX 4090, A100, H100
"high": 1.5, # RTX 4080, RTX 3090, V100
"medium": 2.5, # RTX 4070, RTX 3080, T4
"low": 4.0, # RTX 3060, RTX 2070
"very_low": 8.0, # GTX 1060, etc.
"extreme": 1.2, # RTX 4090, A100, H100 (slightly conservative)
"high": 2.0, # RTX 4080, RTX 3090, V100
"medium": 3.5, # RTX 4070, RTX 3080, T4
"low": 5.0, # RTX 3060, RTX 2070
"very_low": 10.0, # GTX 1060, etc.
}
# Get the multiplier for this GPU
perf_multiplier = tier_multipliers.get(gpu_tier, 3.0)
perf_multiplier = tier_multipliers.get(gpu_tier, 4.0)
# Apply system load factor
perf_multiplier *= load_factor
# Distributed setup can help with memory but not always faster
# For single video generation, multi-GPU doesn't help much
......@@ -1987,34 +2038,34 @@ class TimingTracker:
# These are REALISTIC estimates for the ENTIRE generation process
# including all diffusion steps, VAE decoding, etc.
if "WanPipeline" in model_class:
base_time_per_frame = 3.0 # Wan 14B is compute heavy, ~25 steps
base_time_per_frame = 5.0 # Wan 14B is compute heavy, ~25-50 steps
elif "MochiPipeline" in model_class:
base_time_per_frame = 5.0 # Mochi is very slow
base_time_per_frame = 8.0 # Mochi is very slow
elif "StableVideoDiffusionPipeline" in model_class:
base_time_per_frame = 1.5 # SVD is relatively fast but still ~25 steps
base_time_per_frame = 2.5 # SVD is relatively fast but still ~25 steps
elif "CogVideoXPipeline" in model_class:
base_time_per_frame = 4.0 # CogVideoX 5B is slow
base_time_per_frame = 6.0 # CogVideoX 5B is slow
elif "LTXVideoPipeline" in model_class or "ltx" in model_id:
base_time_per_frame = 4.0 # LTX is moderate-slow, ~25 steps
base_time_per_frame = 6.0 # LTX is moderate-slow, ~25 steps
elif "FluxPipeline" in model_class:
base_time_per_frame = 8.0 # Flux is slow for images (~20-30 steps)
base_time_per_frame = 12.0 # Flux is slow for images (~20-30 steps)
elif "StableDiffusionXLPipeline" in model_class:
base_time_per_frame = 1.0 # SDXL is fast for images
base_time_per_frame = 2.0 # SDXL is fast for images
elif "StableDiffusion3Pipeline" in model_class:
base_time_per_frame = 2.0 # SD3 is moderate
base_time_per_frame = 3.0 # SD3 is moderate
elif "AllegroPipeline" in model_class:
base_time_per_frame = 8.0 # Allegro is very slow
base_time_per_frame = 12.0 # Allegro is very slow
elif "HunyuanDiTPipeline" in model_class:
base_time_per_frame = 10.0 # Hunyuan is very slow
base_time_per_frame = 15.0 # Hunyuan is very slow
elif "OpenSoraPipeline" in model_class:
base_time_per_frame = 6.0 # OpenSora is slow
base_time_per_frame = 10.0 # OpenSora is slow
elif "I2VGenXLPipeline" in model_class:
base_time_per_frame = 3.0 # I2VGenXL
base_time_per_frame = 5.0 # I2VGenXL
elif "AnimateDiffPipeline" in model_class:
base_time_per_frame = 2.0 # AnimateDiff
base_time_per_frame = 3.0 # AnimateDiff
else:
# Default - be conservative for unknown models
base_time_per_frame = 4.0
base_time_per_frame = 6.0
# Apply GPU performance multiplier
time_per_frame = base_time_per_frame * perf_multiplier
......@@ -2034,25 +2085,29 @@ class TimingTracker:
print(f" ⚠️ Model requires {model_vram_req:.1f}GB VRAM, you have {gpu_vram:.1f}GB")
print(f" Expect {offload_penalty:.1f}x slower due to CPU offloading")
# Model loading time estimate (REALISTIC)
# Model loading time estimate (CONSERVATIVE)
# Large models take MINUTES to load, not seconds
# This includes: download, weight loading, CUDA initialization, warmup
# Also accounts for slow disk I/O, network issues, etc.
if model_vram_req > 50:
load_time = 600 # 10 minutes for huge models (100GB+)
load_time = 900 # 15 minutes for huge models (100GB+)
elif model_vram_req > 30:
load_time = 300 # 5 minutes for large models
load_time = 480 # 8 minutes for large models
elif model_vram_req > 16:
load_time = 180 # 3 minutes for medium models
load_time = 300 # 5 minutes for medium models
else:
load_time = 90 # 1.5 minutes for small models
load_time = 180 # 3 minutes for small models
# Add network download time estimate (if model not cached)
# This is a rough estimate - actual time depends on connection
model_size_gb = model_vram_req * 1.5 # Models are usually larger than VRAM requirement
download_time = model_size_gb * 15 # ~15 seconds per GB on average connection
download_time = model_size_gb * 30 # ~30 seconds per GB (conservative)
# Only add if model might not be cached (first run)
# We'll be conservative and include partial download time
load_time += min(download_time, 120) # Cap at 2min extra for potential download
load_time += min(download_time, 300) # Cap at 5min extra for potential download
# Apply load factor to loading time too
load_time *= load_factor
estimates["model_loading"] = load_time
......@@ -2119,11 +2174,15 @@ class TimingTracker:
video_time = num_frames * time_per_frame
# Add overhead for memory management, saving, etc.
video_time *= 1.3 # 30% overhead for I/O, memory ops
video_time *= 1.5 # 50% overhead for I/O, memory ops, unexpected delays
# For I2V, add extra time for image encoding and conditioning
if has_i2v:
video_time *= 1.2 # 20% extra for I2V processing
video_time *= 1.3 # 30% extra for I2V processing
# Add safety margin for unpredictable factors
# This accounts for: thermal throttling, other processes, disk I/O, etc.
video_time *= 1.2 # 20% safety margin
estimates["video_generation"] = video_time
......@@ -2147,10 +2206,17 @@ class TimingTracker:
# Print hardware info for transparency
print(f"\n💻 Hardware detected: {hw_info['gpu_name']} ({hw_info['gpu_vram']:.1f}GB VRAM)")
print(f" GPU tier: {gpu_tier.upper()} (performance multiplier: {perf_multiplier:.1f}x)")
print(f" System load factor: {load_factor:.1f}x")
if is_distributed:
print(f" Distributed setup: {hw_info['gpu_count']} GPUs")
print(f" System RAM: {hw_info['ram_gb']:.1f}GB, CPU cores: {hw_info['cpu_cores']}")
# Print warning if system is under heavy load
if load_factor > 1.5:
print(f"\n ⚠️ WARNING: System is under heavy load (factor: {load_factor:.1f}x)")
print(f" Generation will be significantly slower than usual.")
print(f" Consider closing other applications for better performance.")
return estimates
def print_estimate(self, estimates):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment