Commit 2c0503d9 authored by Your Name's avatar Your Name

feat: add --image-1, --model-1, --audio-1, --tts-1 options to return 409 if model is busy

parent afbf976c
...@@ -34,6 +34,7 @@ from threading import Thread ...@@ -34,6 +34,7 @@ from threading import Thread
# Per-model semaphores for request concurrency control # Per-model semaphores for request concurrency control
model_semaphores: dict = {} model_semaphores: dict = {}
load_mode = {"mode": "ondemand"} # Track load mode globally load_mode = {"mode": "ondemand"} # Track load mode globally
queue_flags = {"model_1": False, "image_1": False, "audio_1": False, "tts_1": False} # Track --X-1 flags
# ============================================================================= # =============================================================================
# Model Cache Directory # Model Cache Directory
# ============================================================================= # =============================================================================
...@@ -3137,10 +3138,21 @@ def get_load_mode(): ...@@ -3137,10 +3138,21 @@ def get_load_mode():
@app.post("/v1/images/generations") @app.post("/v1/images/generations")
async def create_image_generation(request: ImageGenerationRequest): async def create_image_generation(request: ImageGenerationRequest):
"""
Image generation endpoint (OpenAI-compatible).
Supports:
- Stable Diffusion via stable-diffusion-cpp-python (sd.cpp)
- Stable Diffusion XL (via local inference with diffusers)
- Other diffusers models
"""
# Get or create semaphore for this model # Get or create semaphore for this model
model_key = f"image:{request.model}" if request.model else "image" model_key = f"image:{request.model}" if request.model else "image"
mode = get_load_mode() mode = get_load_mode()
# Check if --image-1 is set (no queue, return 409 if busy)
use_1_mode = queue_flags.get("image_1", False)
# In loadall mode, allow 1 concurrent request per model # In loadall mode, allow 1 concurrent request per model
# In ondemand mode, serialize all requests (use global semaphore) # In ondemand mode, serialize all requests (use global semaphore)
if mode == "loadall": if mode == "loadall":
...@@ -3153,16 +3165,16 @@ async def create_image_generation(request: ImageGenerationRequest): ...@@ -3153,16 +3165,16 @@ async def create_image_generation(request: ImageGenerationRequest):
model_semaphores["global_image"] = asyncio.Semaphore(1) model_semaphores["global_image"] = asyncio.Semaphore(1)
semaphore = model_semaphores["global_image"] semaphore = model_semaphores["global_image"]
async with semaphore: # Try to acquire semaphore without blocking
if use_1_mode:
""" acquired = semaphore.locked()
Image generation endpoint (OpenAI-compatible). if acquired:
raise HTTPException(
status_code=409,
detail="Image model is busy. Try again later."
)
Supports: async with semaphore:
- Stable Diffusion via stable-diffusion-cpp-python (sd.cpp)
- Stable Diffusion XL (via local inference with diffusers)
- Other diffusers models
"""
image_model = multi_model_manager.image_model image_model = multi_model_manager.image_model
# If no image model configured, return an error # If no image model configured, return an error
...@@ -4221,6 +4233,11 @@ def parse_args(): ...@@ -4221,6 +4233,11 @@ def parse_args():
default=None, default=None,
help="Model for audio transcription (e.g., whisper-1, base, or path to faster-whisper model). Can be specified multiple times for multiple models.", help="Model for audio transcription (e.g., whisper-1, base, or path to faster-whisper model). Can be specified multiple times for multiple models.",
) )
parser.add_argument(
"--audio-1",
action="store_true",
help="Disable request queue for audio models - return 409 if model is busy",
)
parser.add_argument( parser.add_argument(
"--image-model", "--image-model",
type=str, type=str,
...@@ -4228,6 +4245,11 @@ def parse_args(): ...@@ -4228,6 +4245,11 @@ def parse_args():
default=None, default=None,
help="Model for image generation (e.g., stable-diffusion-xl-base-1.0). Can be specified multiple times for multiple models.", help="Model for image generation (e.g., stable-diffusion-xl-base-1.0). Can be specified multiple times for multiple models.",
) )
parser.add_argument(
"--image-1",
action="store_true",
help="Disable request queue for image models - return 409 if model is busy",
)
parser.add_argument( parser.add_argument(
"--llm-path", "--llm-path",
type=str, type=str,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment