Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in
Toggle navigation
C
coderai
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
nexlab
coderai
Commits
2c0503d9
Commit
2c0503d9
authored
Mar 14, 2026
by
Your Name
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
feat: add --image-1, --model-1, --audio-1, --tts-1 options to return 409 if model is busy
parent
afbf976c
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
31 additions
and
9 deletions
+31
-9
coderai
coderai
+31
-9
No files found.
coderai
View file @
2c0503d9
...
@@ -34,6 +34,7 @@ from threading import Thread
...
@@ -34,6 +34,7 @@ from threading import Thread
#
Per
-
model
semaphores
for
request
concurrency
control
#
Per
-
model
semaphores
for
request
concurrency
control
model_semaphores
:
dict
=
{}
model_semaphores
:
dict
=
{}
load_mode
=
{
"mode"
:
"ondemand"
}
#
Track
load
mode
globally
load_mode
=
{
"mode"
:
"ondemand"
}
#
Track
load
mode
globally
queue_flags
=
{
"model_1"
:
False
,
"image_1"
:
False
,
"audio_1"
:
False
,
"tts_1"
:
False
}
#
Track
--
X
-
1
flags
#
=============================================================================
#
=============================================================================
#
Model
Cache
Directory
#
Model
Cache
Directory
#
=============================================================================
#
=============================================================================
...
@@ -3137,10 +3138,21 @@ def get_load_mode():
...
@@ -3137,10 +3138,21 @@ def get_load_mode():
@app.post("/v1/images/generations")
@app.post("/v1/images/generations")
async def create_image_generation(request: ImageGenerationRequest):
async def create_image_generation(request: ImageGenerationRequest):
"""
Image generation endpoint (OpenAI-compatible).
Supports:
- Stable Diffusion via stable-diffusion-cpp-python (sd.cpp)
- Stable Diffusion XL (via local inference with diffusers)
- Other diffusers models
"""
# Get or create semaphore for this model
# Get or create semaphore for this model
model_key = f"image:{request.model}" if request.model else "image"
model_key = f"image:{request.model}" if request.model else "image"
mode = get_load_mode()
mode = get_load_mode()
# Check if --image-1 is set (no queue, return 409 if busy)
use_1_mode = queue_flags.get("image_1", False)
# In loadall mode, allow 1 concurrent request per model
# In loadall mode, allow 1 concurrent request per model
# In ondemand mode, serialize all requests (use global semaphore)
# In ondemand mode, serialize all requests (use global semaphore)
if mode == "loadall":
if mode == "loadall":
...
@@ -3153,16 +3165,16 @@ async def create_image_generation(request: ImageGenerationRequest):
...
@@ -3153,16 +3165,16 @@ async def create_image_generation(request: ImageGenerationRequest):
model_semaphores["global_image"] = asyncio.Semaphore(1)
model_semaphores["global_image"] = asyncio.Semaphore(1)
semaphore = model_semaphores["global_image"]
semaphore = model_semaphores["global_image"]
async with semaphore:
# Try to acquire semaphore without blocking
if use_1_mode:
"""
acquired = semaphore.locked()
Image generation endpoint (OpenAI-compatible).
if acquired:
raise HTTPException(
status_code=409,
detail="Image model is busy. Try again later."
)
Supports:
async with semaphore:
- Stable Diffusion via stable-diffusion-cpp-python (sd.cpp)
- Stable Diffusion XL (via local inference with diffusers)
- Other diffusers models
"""
image_model = multi_model_manager.image_model
image_model = multi_model_manager.image_model
# If no image model configured, return an error
# If no image model configured, return an error
...
@@ -4221,6 +4233,11 @@ def parse_args():
...
@@ -4221,6 +4233,11 @@ def parse_args():
default=None,
default=None,
help="Model for audio transcription (e.g., whisper-1, base, or path to faster-whisper model). Can be specified multiple times for multiple models.",
help="Model for audio transcription (e.g., whisper-1, base, or path to faster-whisper model). Can be specified multiple times for multiple models.",
)
)
parser.add_argument(
"--audio-1",
action="store_true",
help="Disable request queue for audio models - return 409 if model is busy",
)
parser.add_argument(
parser.add_argument(
"--image-model",
"--image-model",
type=str,
type=str,
...
@@ -4228,6 +4245,11 @@ def parse_args():
...
@@ -4228,6 +4245,11 @@ def parse_args():
default=None,
default=None,
help="Model for image generation (e.g., stable-diffusion-xl-base-1.0). Can be specified multiple times for multiple models.",
help="Model for image generation (e.g., stable-diffusion-xl-base-1.0). Can be specified multiple times for multiple models.",
)
)
parser.add_argument(
"--image-1",
action="store_true",
help="Disable request queue for image models - return 409 if model is busy",
)
parser.add_argument(
parser.add_argument(
"--llm-path",
"--llm-path",
type=str,
type=str,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment