Make colorspace detection safer for low-memory systems

- Check available RAM before detection (skip if <8GB free) - Reduce test dimensions from 256x256 to 128x128 - Reduce inference steps from 10 to 5 - Lower guidance scale from 5.0 to 3.0 - Clear memory before and after detection - Catch OOM errors specifically with fallback - Always clear memory on any error

Make colorspace detection safer for low-memory systems
- Check available RAM before detection (skip if <8GB free) - Reduce test dimensions from 256x256 to 128x128 - Reduce inference steps from 10 to 5 - Lower guidance scale from 5.0 to 3.0 - Clear memory before and after detection - Catch OOM errors specifically with fallback - Always clear memory on any error
5f5c7ac4 · Stefy Lanza (nextime / spora ) · 85e21c0a · 5f5c7ac4
Commit 5f5c7ac4 authored Feb 28, 2026 by Stefy Lanza (nextime / spora )
Hide whitespace changes
Inline Side-by-side

Showing with 33 additions and 16 deletions

videogen.py videogen.py +33 -16

No files found.
--- a/videogen.py
+++ b/videogen.py
@@ -1221,19 +1221,28 @@ def detect_model_colorspace(pipe, model_name, m_info, args):
    if existing_colorspace in ["RGB", "BGR"]:
        return existing_colorspace
+    # Check available system RAM before attempting detection
+    # Large models can use massive amounts of RAM during inference
+    available_ram_gb = get_available_ram_gb()
+    if available_ram_gb < 8.0:  # Need at least 8GB free RAM
+        print(f"  ⚠️  Low system RAM ({available_ram_gb:.1f}GB), skipping colorspace detection")
+        print(f"     Defaulting to RGB (use --swap_bgr if colors look wrong)")
+        return "RGB"
    print(f"  🔍 Detecting colorspace for {model_name}...")
-    print(f"     (This is done once per model by generating a test frame)")
+    print(f"     (Using minimal resources - 128x128, 5 steps)")
    try:
-        # Create a simple test prompt that should produce red
+        # Use VERY small dimensions and minimal steps to conserve memory
-        test_prompt = "solid red color, uniform red background, pure red"
+        test_prompt = "solid red"
+        test_height = 128  # Very small to save memory
+        test_width = 128
+        test_frames = 1
-        # Determine test dimensions (small for speed)
+        # Clear memory before detection
-        test_height = 256
+        clear_memory(clear_cuda=True, aggressive=True)
-        test_width = 256
-        test_frames = 1  # Just one frame for detection
-        # Generate with minimal steps for speed
+        # Generate with minimal steps for speed and memory
        with torch.no_grad():
            # Prepare kwargs based on pipeline type
            video_kwargs = {
@@ -1241,8 +1250,8 @@ def detect_model_colorspace(pipe, model_name, m_info, args):
                "height": test_height,
                "width": test_width,
                "num_frames": test_frames,
-                "num_inference_steps": 10,  # Minimal steps for speed
+                "num_inference_steps": 5,  # Very minimal steps
-                "guidance_scale": 5.0,
+                "guidance_scale": 3.0,  # Lower guidance to reduce memory
            }
            # Check if pipeline supports image input (I2V) - if so, provide red image
@@ -1252,11 +1261,19 @@ def detect_model_colorspace(pipe, model_name, m_info, args):
                           'CogVideoXImageToVideoPipeline']
            if pipeline_class_name in i2v_pipelines:
-                # Create a pure red image for I2V models
+                # Create a pure red image for I2V models - very small
                red_image = Image.new('RGB', (test_width, test_height), color=(255, 0, 0))
                video_kwargs["image"] = red_image
-            output = pipe(**video_kwargs)
+            # Run inference with memory protection
+            try:
+                output = pipe(**video_kwargs)
+            except (torch.cuda.OutOfMemoryError, RuntimeError) as oom_error:
+                if "out of memory" in str(oom_error).lower() or "cuda" in str(oom_error).lower():
+                    print(f"     ⚠️  OOM during detection, defaulting to RGB")
+                    clear_memory(clear_cuda=True, aggressive=True)
+                    return "RGB"
+                raise
            # Extract frames from output
            if hasattr(output, "frames"):
@@ -1264,7 +1281,6 @@ def detect_model_colorspace(pipe, model_name, m_info, args):
            elif hasattr(output, "videos"):
                test_frames_data = output.videos[0]
            else:
-                # Fallback: assume RGB if we can't analyze
                print(f"     ⚠️ Could not analyze output format, assuming RGB")
                return "RGB"
@@ -1307,14 +1323,11 @@ def detect_model_colorspace(pipe, model_name, m_info, args):
            print(f"     Channel averages - R: {r_avg:.1f}, G: {g_avg:.1f}, B: {b_avg:.1f}")
            # Determine colorspace
-            # If R > B significantly, it's RGB (red appears in red channel)
-            # If B > R significantly, it's BGR (red appears in blue channel)
            if r_avg > b_avg + 20:  # Red channel significantly higher
                detected_colorspace = "RGB"
            elif b_avg > r_avg + 20:  # Blue channel significantly higher
                detected_colorspace = "BGR"
            else:
-                # Ambiguous - default to RGB (most common)
                print(f"     ⚠️ Colorspace ambiguous, defaulting to RGB")
                detected_colorspace = "RGB"
@@ -1326,11 +1339,15 @@ def detect_model_colorspace(pipe, model_name, m_info, args):
                save_models_config(MODELS)
                print(f"     📝 Saved colorspace to model config")
+            # Clear memory after detection
+            clear_memory(clear_cuda=True, aggressive=True)
            return detected_colorspace
    except Exception as e:
        print(f"     ⚠️ Colorspace detection failed: {e}")
        print(f"     Defaulting to RGB")
+        clear_memory(clear_cuda=True, aggressive=True)
        return "RGB"