Fix time estimation for T2I models and suppress log output for JSON

- Move T2I detection before time estimation - Add has_t2i parameter to estimate_total_time - T2I models now show image_generation time instead of video_generation - Add Lumina pipelines to T2I model detection - Suppress 'Loaded models' message when --json flag is used

Fix time estimation for T2I models and suppress log output for JSON
- Move T2I detection before time estimation - Add has_t2i parameter to estimate_total_time - T2I models now show image_generation time instead of video_generation - Add Lumina pipelines to T2I model detection - Suppress 'Loaded models' message when --json flag is used
0a6b9dd2 · Stefy Lanza (nextime / spora ) · 6bd2bbd4 · 0a6b9dd2
Commit 0a6b9dd2 authored Feb 25, 2026 by Stefy Lanza (nextime / spora )
Show whitespace changes
Inline Side-by-side

Showing with 73 additions and 42 deletions

videogen videogen +73 -42

No files found.
--- a/videogen
+++ b/videogen
@@ -2140,12 +2140,17 @@ def print_search_results(results, args):
 # Initialize MODELS from external config only
 MODELS = {}

+# Check if JSON output is requested (for suppressing log messages)
+_json_output = "--json" in sys.argv
+
 # Load external models config
 _external_models = load_models_config()
 if _external_models:
    MODELS = _external_models
+    if not _json_output:
        print(f"📁 Loaded {len(_external_models)} models from {MODELS_CONFIG_FILE}")
 else:
+    if not _json_output:
        print(f"⚠️  No models configured. Run: videogen --update-models")
        print(f"   Or add a model: videogen --add-model <model_id> --name <name>")

@@ -2379,7 +2384,7 @@ class TimingTracker:
        
        return load_factor
    
-    def estimate_total_time(self, args, m_info, has_i2v=False, has_audio=False, has_lipsync=False, has_upscale=False):
+    def estimate_total_time(self, args, m_info, has_i2v=False, has_audio=False, has_lipsync=False, has_upscale=False, has_t2i=False):
        """Estimate total generation time based on parameters and hardware
        
        This provides MORE REALISTIC estimates that account for:
@@ -2551,7 +2556,29 @@ class TimingTracker:
                audio_time = args.length * 2 + 10  # MusicGen takes time
            estimates["audio_generation"] = audio_time
        
-        # Video generation (REALISTIC)
+        # Video generation (REALISTIC) - only for video models, not T2I
+        if has_t2i:
+            # For T2I models, estimate image generation time instead of video
+            # Get base time per image from model class
+            model_class = m_info.get("class", "")
+            if "FluxPipeline" in model_class:
+                img_time = 45  # Flux is slow (~20-30 steps)
+            elif "StableDiffusion3Pipeline" in model_class:
+                img_time = 20  # SD3 is moderate
+            elif "StableDiffusionXLPipeline" in model_class:
+                img_time = 15  # SDXL is faster
+            elif "LuminaText2ImgPipeline" in model_class or "Lumina2Text2ImgPipeline" in model_class:
+                img_time = 25  # Lumina is moderate
+            else:
+                img_time = 30  # Default for unknown models
+            
+            # Scale by resolution
+            img_time *= (args.width * args.height) / (1024 * 1024)
+            img_time *= perf_multiplier  # Apply GPU performance
+            
+            estimates["image_generation"] = img_time
+        else:
+            # Video generation
            num_frames = int(args.length * args.fps)
            
            # Get actual inference steps for the model
@@ -8001,29 +8028,33 @@ def main(args):
    main_prompt = ", ".join(args.prompt)
    init_image = None

+    # Detect if we should generate a static image (T2I mode)
+    # Conditions: T2I model, OR output ends with image extension, OR only prompt_image specified
+    is_t2i_model = m_info.get("class") in ["StableDiffusionXLPipeline", "FluxPipeline",
+                                            "StableDiffusion3Pipeline", "LuminaText2ImgPipeline",
+                                            "Lumina2Text2ImgPipeline"]
+    output_ext = os.path.splitext(args.output)[1].lower()
+    is_image_output = output_ext in [".png", ".jpg", ".jpeg", ".gif", ".webp"]
+    only_prompt_image = args.prompt_image and not args.prompt
+    generate_static_image = is_t2i_model or is_image_output or only_prompt_image
+    
    # Calculate and print time estimate
    has_i2v = args.image_to_video or args.image
    has_audio = args.generate_audio or args.audio_file
    has_lipsync = args.lip_sync
    has_upscale = args.upscale
+    has_t2i = generate_static_image and not has_i2v  # T2I mode, not I2V
    
    estimates = timing.estimate_total_time(
        args, m_info,
        has_i2v=has_i2v,
        has_audio=has_audio,
        has_lipsync=has_lipsync,
-        has_upscale=has_upscale
+        has_upscale=has_upscale,
+        has_t2i=has_t2i
    )
    timing.print_estimate(estimates)

-    # Detect if we should generate a static image (T2I mode)
-    # Conditions: T2I model, OR output ends with image extension, OR only prompt_image specified
-    is_t2i_model = m_info.get("class") in ["StableDiffusionXLPipeline", "FluxPipeline"]
-    output_ext = os.path.splitext(args.output)[1].lower()
-    is_image_output = output_ext in [".png", ".jpg", ".jpeg", ".gif", ".webp"]
-    only_prompt_image = args.prompt_image and not args.prompt
-    generate_static_image = is_t2i_model or is_image_output or only_prompt_image
-
    # ─── T+I2I (Text + Image-to-Image) Mode ─────────────────────────────────────
    # Use existing image with T2I model to create modified image
    if args.image_to_image and args.image: