Revert: Keep sd.cpp fallback available for all models when diffusers fails

- Removed the GGUF-only restriction on sd.cpp fallback - Some HF models may be GGUF even without 'gguf' in the name - Let sd.cpp attempt loading and fail gracefully if incompatible - This allows sd.cpp to work as a proper fallback for any model type

Revert: Keep sd.cpp fallback available for all models when diffusers fails
- Removed the GGUF-only restriction on sd.cpp fallback - Some HF models may be GGUF even without 'gguf' in the name - Let sd.cpp attempt loading and fail gracefully if incompatible - This allows sd.cpp to work as a proper fallback for any model type
2cedd442 · Your Name · f5b9d812 · 2cedd442
Commit 2cedd442 authored Mar 19, 2026 by Your Name
Hide whitespace changes
Inline Side-by-side

Showing with 46 additions and 52 deletions

images.py codai/api/images.py +46 -52

No files found.
--- a/codai/api/images.py
+++ b/codai/api/images.py
@@ -534,64 +534,58 @@ async def create_image_generation(request: ImageGenerationRequest, http_request:
            print(f"Traceback: {traceback.format_exc()}")
            print(f"Trying stable-diffusion-cpp-python...")
-    # Try stable-diffusion-cpp-python (sd.cpp) as fallback - ONLY for GGUF models
+    # Try stable-diffusion-cpp-python (sd.cpp) as fallback when diffusers fails
-    # sd.cpp only works with GGUF models, not diffusers models
+    # sd.cpp works with GGUF models, but some HF models may be GGUF even without "gguf" in name
-    is_potential_gguf_model = (model_to_use.endswith('.gguf') or 'gguf' in model_to_use.lower() or
+    # Let sd.cpp attempt loading and fail gracefully if it's not compatible
-                              (model_to_use.startswith('http') and '.gguf' in model_to_use) or
-                              (not model_to_use.startswith('http') and '/' in model_to_use))  # HF model IDs might be GGUF
-    if not is_potential_gguf_model:
+    # Try stable-diffusion-cpp-python (sd.cpp) as fallback
-        print(f"Model '{model_to_use}' is not a GGUF model (sd.cpp only supports GGUF), skipping sd.cpp fallback")
+    # First, check all available image models to find one loaded via sd.cpp
-        sd_model = None
+    # Always check for cached models - allows dynamically loaded models to be reused across requests
-    else:
+    sd_model = None
-        # Try stable-diffusion-cpp-python (sd.cpp) as fallback for GGUF models
+    for key in multi_model_manager.models:
-        # First, check all available image models to find one loaded via sd.cpp
+        if key.startswith("image:"):
-        # Always check for cached models - allows dynamically loaded models to be reused across requests
+            potential_model = multi_model_manager.get_model(key)
-        sd_model = None
+            if potential_model is not None:
-        for key in multi_model_manager.models:
+                # Check if it's a stable-diffusion-cpp model
-            if key.startswith("image:"):
+                try:
-                potential_model = multi_model_manager.get_model(key)
+                    from stable_diffusion_cpp import StableDiffusion
-                if potential_model is not None:
+                    if isinstance(potential_model, StableDiffusion):
-                    # Check if it's a stable-diffusion-cpp model
+                        sd_model = potential_model
-                    try:
+                        print(f"Found cached stable-diffusion-cpp model with key: {key}")
-                        from stable_diffusion_cpp import StableDiffusion
+                        break
-                        if isinstance(potential_model, StableDiffusion):
+                except ImportError:
-                            sd_model = potential_model
+                    pass
-                            print(f"Found cached stable-diffusion-cpp model with key: {key}")
-                            break
-                    except ImportError:
-                        pass
-        # If no cached image model found, need to load one - first cleanup any existing models
+    # If no cached image model found, need to load one - first cleanup any existing models
-        if sd_model is None:
+    if sd_model is None:
-            # In ondemand mode, check if we need to unload before loading sd.cpp model
+        # In ondemand mode, check if we need to unload before loading sd.cpp model
-            from codai.models.manager import model_manager
+        from codai.models.manager import model_manager
-            has_any_model = len(multi_model_manager.models) > 0 or model_manager.backend is not None
+        has_any_model = len(multi_model_manager.models) > 0 or model_manager.backend is not None
-            if mode == "ondemand" and has_any_model:
+        if mode == "ondemand" and has_any_model:
-                # Resolve both the requested image model and currently loaded model to their canonical names
+            # Resolve both the requested image model and currently loaded model to their canonical names
-                requested_canonical = multi_model_manager.resolve_model_name(f"image:{model_to_use}")
+            requested_canonical = multi_model_manager.resolve_model_name(f"image:{model_to_use}")
-                loaded_canonical = multi_model_manager.get_currently_loaded_model_name()
+            loaded_canonical = multi_model_manager.get_currently_loaded_model_name()
-                # Also check legacy model_manager
+            # Also check legacy model_manager
-                if not loaded_canonical and model_manager.backend is not None:
+            if not loaded_canonical and model_manager.backend is not None:
-                    loaded_canonical = "legacy_model_manager"
+                loaded_canonical = "legacy_model_manager"
-                # Compare: if they're different models, unload first
+            # Compare: if they're different models, unload first
-                already_loaded = (requested_canonical and loaded_canonical and
+            already_loaded = (requested_canonical and loaded_canonical and
-                                requested_canonical == loaded_canonical)
+                            requested_canonical == loaded_canonical)
-                if not already_loaded:
+            if not already_loaded:
-                    print(f"In ondemand mode - model switch detected:")
+                print(f"In ondemand mode - model switch detected:")
-                    print(f"  Requested: 'image:{model_to_use}' (resolved to: '{requested_canonical}')")
+                print(f"  Requested: 'image:{model_to_use}' (resolved to: '{requested_canonical}')")
-                    print(f"  Loaded: '{loaded_canonical}'")
+                print(f"  Loaded: '{loaded_canonical}'")
-                    print(f"  -> Fully unloading current model(s) before loading sd.cpp model...")
+                print(f"  -> Fully unloading current model(s) before loading sd.cpp model...")
-                    multi_model_manager.unload_all_models()
+                multi_model_manager.unload_all_models()
-                    if model_manager.backend is not None:
+                if model_manager.backend is not None:
-                        try:
+                    try:
-                            model_manager.cleanup()
+                        model_manager.cleanup()
-                        except:
+                    except:
-                            pass
+                        pass
    if sd_model is not None:
        # Check if it's a stable-diffusion-cpp model (has generate method from sd.cpp)