Commit 7f5bf82d authored by Your Name's avatar Your Name

Implement image generation fallback chain: try torch/diffusers first, then sd.cpp

- Reordered the image generation backend priority to try torch/diffusers first
- If torch/diffusers fails (ImportError or other error), fallback to stable-diffusion-cpp-python
- If both backends fail, return a helpful error message with installation instructions
- Added dynamic loading of sd.cpp model if not pre-loaded
parent 3b527c5a
...@@ -3122,84 +3122,9 @@ async def create_image_generation(request: ImageGenerationRequest): ...@@ -3122,84 +3122,9 @@ async def create_image_generation(request: ImageGenerationRequest):
if model_to_use.startswith("image:"): if model_to_use.startswith("image:"):
model_to_use = image_model model_to_use = image_model
# First, try to use stable-diffusion-cpp-python (sd.cpp) if available # Track errors for proper fallback chain
# Check all available image models to find one loaded via sd.cpp diffusers_error = None
sd_model = None sd_cpp_error = None
for key in multi_model_manager.models:
if key.startswith("image:"):
potential_model = multi_model_manager.get_model(key)
if potential_model is not None:
# Check if it's a stable-diffusion-cpp model
try:
from stable_diffusion_cpp import StableDiffusion
if isinstance(potential_model, StableDiffusion):
sd_model = potential_model
print(f"Found stable-diffusion-cpp model with key: {key}")
break
except ImportError:
pass
if sd_model is not None:
# Check if it's a stable-diffusion-cpp model (has generate method from sd.cpp)
try:
from stable_diffusion_cpp import StableDiffusion
if isinstance(sd_model, StableDiffusion):
print(f"Using stable-diffusion-cpp-python for image generation")
# Use sd.cpp for generation
# Parse size
width, height = 512, 512
if request.size:
parts = request.size.split("x")
if len(parts) == 2:
try:
width = int(parts[0])
height = int(parts[1])
except ValueError:
pass
# Use default steps for Z-Image Turbo (very fast)
steps = 4 # Default for fast generation
# Generate images using sd.cpp (run in thread to not block event loop)
result = await asyncio.to_thread(
sd_model.generate_image,
prompt=request.prompt,
negative_prompt='',
width=width,
height=height,
cfg_scale=7.0,
sample_steps=steps,
seed=42,
batch_count=request.n if request.n else 1,
)
# Convert results to response format
images = []
import base64
import io
from PIL import Image
for img in result:
# Convert to base64
buffered = io.BytesIO()
if isinstance(img, Image.Image):
img.save(buffered, format="PNG")
else:
# Might be numpy array
Image.fromarray(img).save(buffered, format="PNG")
img_bytes = buffered.getvalue()
img_base64 = base64.b64encode(img_bytes).decode('utf-8')
images.append({"b64_json": img_base64})
return {
"created": int(time.time()),
"data": images
}
except ImportError:
pass # stable-diffusion-cpp not available, continue to diffusers
except Exception as e:
print(f"sd.cpp generation error: {e}")
# Continue to try diffusers
# Parse size (e.g., "1024x1024") # Parse size (e.g., "1024x1024")
width, height = 1024, 1024 width, height = 1024, 1024
...@@ -3212,7 +3137,7 @@ async def create_image_generation(request: ImageGenerationRequest): ...@@ -3212,7 +3137,7 @@ async def create_image_generation(request: ImageGenerationRequest):
except ValueError: except ValueError:
pass pass
# Try to use diffusers if available # Try diffusers first (torch-based, best quality for NVIDIA)
try: try:
import torch import torch
from diffusers import StableDiffusionXLPipeline, DiffusionPipeline from diffusers import StableDiffusionXLPipeline, DiffusionPipeline
...@@ -3296,16 +3221,190 @@ async def create_image_generation(request: ImageGenerationRequest): ...@@ -3296,16 +3221,190 @@ async def create_image_generation(request: ImageGenerationRequest):
} }
except ImportError as e: except ImportError as e:
# diffusers not installed # diffusers/torch not installed - record error and try sd.cpp
raise HTTPException( diffusers_error = str(e)
status_code=501, print(f"diffusers not available: {diffusers_error}, trying stable-diffusion-cpp-python...")
detail=f"Image generation not available. Install diffusers: pip install diffusers torch accelerate safetensors. Error: {str(e)}"
)
except Exception as e: except Exception as e:
print(f"Image generation error: {e}") # Other error with diffusers - record and try sd.cpp
import traceback diffusers_error = str(e)
traceback.print_exc() print(f"diffusers error: {diffusers_error}, trying stable-diffusion-cpp-python...")
raise HTTPException(status_code=500, detail=f"Image generation error: {str(e)}")
# Try stable-diffusion-cpp-python (sd.cpp) as fallback
# First, check all available image models to find one loaded via sd.cpp
sd_model = None
for key in multi_model_manager.models:
if key.startswith("image:"):
potential_model = multi_model_manager.get_model(key)
if potential_model is not None:
# Check if it's a stable-diffusion-cpp model
try:
from stable_diffusion_cpp import StableDiffusion
if isinstance(potential_model, StableDiffusion):
sd_model = potential_model
print(f"Found stable-diffusion-cpp model with key: {key}")
break
except ImportError:
pass
if sd_model is not None:
# Check if it's a stable-diffusion-cpp model (has generate method from sd.cpp)
try:
from stable_diffusion_cpp import StableDiffusion
if isinstance(sd_model, StableDiffusion):
print(f"Using stable-diffusion-cpp-python for image generation")
# Use sd.cpp for generation
# Parse size
width, height = 512, 512
if request.size:
parts = request.size.split("x")
if len(parts) == 2:
try:
width = int(parts[0])
height = int(parts[1])
except ValueError:
pass
# Use default steps for Z-Image Turbo (very fast)
steps = 4 # Default for fast generation
# Generate images using sd.cpp (run in thread to not block event loop)
result = await asyncio.to_thread(
sd_model.generate_image,
prompt=request.prompt,
negative_prompt='',
width=width,
height=height,
cfg_scale=7.0,
sample_steps=steps,
seed=42,
batch_count=request.n if request.n else 1,
)
# Convert results to response format
images = []
import base64
import io
from PIL import Image
for img in result:
# Convert to base64
buffered = io.BytesIO()
if isinstance(img, Image.Image):
img.save(buffered, format="PNG")
else:
# Might be numpy array
Image.fromarray(img).save(buffered, format="PNG")
img_bytes = buffered.getvalue()
img_base64 = base64.b64encode(img_bytes).decode('utf-8')
images.append({"b64_json": img_base64})
return {
"created": int(time.time()),
"data": images
}
except ImportError as e:
# stable-diffusion-cpp not available
sd_cpp_error = str(e)
print(f"stable-diffusion-cpp-python not available: {sd_cpp_error}")
except Exception as e:
print(f"sd.cpp generation error: {e}")
sd_cpp_error = str(e)
else:
# No sd.cpp model pre-loaded, try to load dynamically
print("No pre-loaded sd.cpp model found, trying to load...")
try:
from stable_diffusion_cpp import StableDiffusion
# Check if model_to_use is a URL and get cached path
model_path = None
if model_to_use.startswith('http://') or model_to_use.startswith('https://'):
cached_path = get_cached_model_path(model_to_use)
if cached_path:
model_path = cached_path
print(f"Using cached model: {model_path}")
if model_path is None and os.path.isfile(model_to_use):
model_path = model_to_use
if model_path is None:
print("Warning: Could not resolve sd.cpp model path")
sd_cpp_error = "Could not resolve model path"
else:
# Load sd.cpp model
sd_model = StableDiffusion(
model_path=model_path,
vae_path=None,
n_threads=4,
n_gpu_layers=-1, # All layers to GPU
)
print(f"Using stable-diffusion-cpp-python for image generation")
# Generate images
width, height = 512, 512
if request.size:
parts = request.size.split("x")
if len(parts) == 2:
try:
width = int(parts[0])
height = int(parts[1])
except ValueError:
pass
steps = 4
result = await asyncio.to_thread(
sd_model.generate_image,
prompt=request.prompt,
negative_prompt='',
width=width,
height=height,
cfg_scale=7.0,
sample_steps=steps,
seed=42,
batch_count=request.n if request.n else 1,
)
# Convert results to response format
images = []
import base64
import io
from PIL import Image
for img in result:
buffered = io.BytesIO()
if isinstance(img, Image.Image):
img.save(buffered, format="PNG")
else:
Image.fromarray(img).save(buffered, format="PNG")
img_bytes = buffered.getvalue()
img_base64 = base64.b64encode(img_bytes).decode('utf-8')
images.append({"b64_json": img_base64})
return {
"created": int(time.time()),
"data": images
}
except ImportError as e:
sd_cpp_error = str(e)
print(f"stable-diffusion-cpp-python not available: {sd_cpp_error}")
except Exception as e:
sd_cpp_error = str(e)
print(f"sd.cpp error: {sd_cpp_error}")
# Both backends failed - return error with installation instructions
error_details = []
if diffusers_error:
error_details.append(f"diffusers: {diffusers_error}")
if sd_cpp_error:
error_details.append(f"sd.cpp: {sd_cpp_error}")
raise HTTPException(
status_code=501,
detail=f"Image generation not available. Tried: {', '.join(error_details)}. "
f"Install either: pip install diffusers torch accelerate safetensors (for NVIDIA) "
f"or: pip install stable-diffusion-cpp-python (for Vulkan/AMD)"
)
# ============================================================================= # =============================================================================
# Text-to-Speech Endpoint # Text-to-Speech Endpoint
# ============================================================================= # =============================================================================
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment