Fix time estimation for T2I models and suppress log output for JSON

- Move T2I detection before time estimation
- Add has_t2i parameter to estimate_total_time
- T2I models now show image_generation time instead of video_generation
- Add Lumina pipelines to T2I model detection
- Suppress 'Loaded models' message when --json flag is used
parent 6bd2bbd4
......@@ -2140,12 +2140,17 @@ def print_search_results(results, args):
# Initialize MODELS from external config only
MODELS = {}
# Check if JSON output is requested (for suppressing log messages)
_json_output = "--json" in sys.argv
# Load external models config
_external_models = load_models_config()
if _external_models:
MODELS = _external_models
if not _json_output:
print(f"📁 Loaded {len(_external_models)} models from {MODELS_CONFIG_FILE}")
else:
if not _json_output:
print(f"⚠️ No models configured. Run: videogen --update-models")
print(f" Or add a model: videogen --add-model <model_id> --name <name>")
......@@ -2379,7 +2384,7 @@ class TimingTracker:
return load_factor
def estimate_total_time(self, args, m_info, has_i2v=False, has_audio=False, has_lipsync=False, has_upscale=False):
def estimate_total_time(self, args, m_info, has_i2v=False, has_audio=False, has_lipsync=False, has_upscale=False, has_t2i=False):
"""Estimate total generation time based on parameters and hardware
This provides MORE REALISTIC estimates that account for:
......@@ -2551,7 +2556,29 @@ class TimingTracker:
audio_time = args.length * 2 + 10 # MusicGen takes time
estimates["audio_generation"] = audio_time
# Video generation (REALISTIC)
# Video generation (REALISTIC) - only for video models, not T2I
if has_t2i:
# For T2I models, estimate image generation time instead of video
# Get base time per image from model class
model_class = m_info.get("class", "")
if "FluxPipeline" in model_class:
img_time = 45 # Flux is slow (~20-30 steps)
elif "StableDiffusion3Pipeline" in model_class:
img_time = 20 # SD3 is moderate
elif "StableDiffusionXLPipeline" in model_class:
img_time = 15 # SDXL is faster
elif "LuminaText2ImgPipeline" in model_class or "Lumina2Text2ImgPipeline" in model_class:
img_time = 25 # Lumina is moderate
else:
img_time = 30 # Default for unknown models
# Scale by resolution
img_time *= (args.width * args.height) / (1024 * 1024)
img_time *= perf_multiplier # Apply GPU performance
estimates["image_generation"] = img_time
else:
# Video generation
num_frames = int(args.length * args.fps)
# Get actual inference steps for the model
......@@ -8001,29 +8028,33 @@ def main(args):
main_prompt = ", ".join(args.prompt)
init_image = None
# Detect if we should generate a static image (T2I mode)
# Conditions: T2I model, OR output ends with image extension, OR only prompt_image specified
is_t2i_model = m_info.get("class") in ["StableDiffusionXLPipeline", "FluxPipeline",
"StableDiffusion3Pipeline", "LuminaText2ImgPipeline",
"Lumina2Text2ImgPipeline"]
output_ext = os.path.splitext(args.output)[1].lower()
is_image_output = output_ext in [".png", ".jpg", ".jpeg", ".gif", ".webp"]
only_prompt_image = args.prompt_image and not args.prompt
generate_static_image = is_t2i_model or is_image_output or only_prompt_image
# Calculate and print time estimate
has_i2v = args.image_to_video or args.image
has_audio = args.generate_audio or args.audio_file
has_lipsync = args.lip_sync
has_upscale = args.upscale
has_t2i = generate_static_image and not has_i2v # T2I mode, not I2V
estimates = timing.estimate_total_time(
args, m_info,
has_i2v=has_i2v,
has_audio=has_audio,
has_lipsync=has_lipsync,
has_upscale=has_upscale
has_upscale=has_upscale,
has_t2i=has_t2i
)
timing.print_estimate(estimates)
# Detect if we should generate a static image (T2I mode)
# Conditions: T2I model, OR output ends with image extension, OR only prompt_image specified
is_t2i_model = m_info.get("class") in ["StableDiffusionXLPipeline", "FluxPipeline"]
output_ext = os.path.splitext(args.output)[1].lower()
is_image_output = output_ext in [".png", ".jpg", ".jpeg", ".gif", ".webp"]
only_prompt_image = args.prompt_image and not args.prompt
generate_static_image = is_t2i_model or is_image_output or only_prompt_image
# ─── T+I2I (Text + Image-to-Image) Mode ─────────────────────────────────────
# Use existing image with T2I model to create modified image
if args.image_to_image and args.image:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment