wip: snapshot in-progress platform updates

parent 8fd1c5c2
This diff is collapsed.
......@@ -522,7 +522,14 @@ elif [ "$BACKEND" = "all" ]; then
pip install setproctitle || echo -e "${YELLOW}Warning: setproctitle failed (optional)${NC}"
# Try stable-diffusion-cpp-python (disable WebM to avoid missing libwebm cmake submodule)
CMAKE_ARGS="$SD_CMAKE_ARGS" pip install stable-diffusion-cpp-python || echo -e "${YELLOW}Warning: stable-diffusion-cpp-python failed (optional)${NC}"
# Use CUDA if available (detected later in this block, check nvcc now)
if command -v nvcc &> /dev/null || [ -d "/usr/local/cuda" ]; then
CMAKE_ARGS="$SD_CMAKE_ARGS -DSD_CUDA=ON" pip install stable-diffusion-cpp-python --no-cache-dir || \
CMAKE_ARGS="$SD_CMAKE_ARGS" pip install stable-diffusion-cpp-python || \
echo -e "${YELLOW}Warning: stable-diffusion-cpp-python failed (optional)${NC}"
else
CMAKE_ARGS="$SD_CMAKE_ARGS" pip install stable-diffusion-cpp-python || echo -e "${YELLOW}Warning: stable-diffusion-cpp-python failed (optional)${NC}"
fi
}
# Install PyTorch with CUDA support (for nvidia backend)
......@@ -622,14 +629,28 @@ elif [ "$BACKEND" = "all" ]; then
echo -e "${YELLOW}Warning: Some Vulkan packages failed to install${NC}"
}
# Try to install stable-diffusion-cpp-python with OpenCL
if [ "$OPENCL_AVAILABLE" = true ]; then
echo -e "${YELLOW}Installing stable-diffusion-cpp-python with OpenCL support...${NC}"
CMAKE_ARGS="$SD_CMAKE_ARGS" pip install stable-diffusion-cpp-python || {
echo -e "${YELLOW}Warning: stable-diffusion-cpp-python not available (requires CMake and build tools)${NC}"
# Try to install stable-diffusion-cpp-python with CUDA+Vulkan (preferred) or fallbacks
if [ "$CUDA_AVAILABLE" = true ] && [ "$VULKAN_AVAILABLE" = true ]; then
echo -e "${YELLOW}Installing stable-diffusion-cpp-python with CUDA+Vulkan support...${NC}"
CMAKE_ARGS="$SD_CMAKE_ARGS -DSD_CUDA=ON -DSD_VULKAN=ON" pip install stable-diffusion-cpp-python --no-cache-dir || {
echo -e "${YELLOW}CUDA+Vulkan build failed, trying CUDA only...${NC}"
CMAKE_ARGS="$SD_CMAKE_ARGS -DSD_CUDA=ON" pip install stable-diffusion-cpp-python --no-cache-dir || \
echo -e "${YELLOW}Warning: stable-diffusion-cpp-python not available${NC}"
}
elif [ "$CUDA_AVAILABLE" = true ]; then
echo -e "${YELLOW}Installing stable-diffusion-cpp-python with CUDA support...${NC}"
CMAKE_ARGS="$SD_CMAKE_ARGS -DSD_CUDA=ON" pip install stable-diffusion-cpp-python --no-cache-dir || \
echo -e "${YELLOW}Warning: stable-diffusion-cpp-python not available${NC}"
elif [ "$VULKAN_AVAILABLE" = true ]; then
echo -e "${YELLOW}Installing stable-diffusion-cpp-python with Vulkan support...${NC}"
CMAKE_ARGS="$SD_CMAKE_ARGS -DSD_VULKAN=ON" pip install stable-diffusion-cpp-python --no-cache-dir || \
echo -e "${YELLOW}Warning: stable-diffusion-cpp-python not available${NC}"
elif [ "$OPENCL_AVAILABLE" = true ]; then
echo -e "${YELLOW}Installing stable-diffusion-cpp-python with OpenCL support...${NC}"
CMAKE_ARGS="$SD_CMAKE_ARGS -DSD_OPENCL=ON" pip install stable-diffusion-cpp-python --no-cache-dir || \
echo -e "${YELLOW}Warning: stable-diffusion-cpp-python not available${NC}"
else
echo -e "${YELLOW}Skipping OpenCL (stable-diffusion-cpp-python) - OpenCL not available${NC}"
echo -e "${YELLOW}Skipping GPU-accelerated stable-diffusion-cpp-python - no GPU backend available${NC}"
fi
# Install additional requirements
......@@ -667,8 +688,11 @@ elif [ "$BACKEND" = "all" ]; then
echo "Available backends:"
[ "$CUDA_AVAILABLE" = true ] && echo " ✓ NVIDIA/CUDA (PyTorch)"
[ "$CUDA_AVAILABLE" = true ] && echo " ✓ CUDA (llama-cpp-python)"
[ "$CUDA_AVAILABLE" = true ] && [ "$VULKAN_AVAILABLE" = true ] && echo " ✓ CUDA+Vulkan (stable-diffusion-cpp-python)"
[ "$CUDA_AVAILABLE" = true ] && [ "$VULKAN_AVAILABLE" != true ] && echo " ✓ CUDA (stable-diffusion-cpp-python)"
[ "$CUDA_AVAILABLE" != true ] && [ "$VULKAN_AVAILABLE" = true ] && echo " ✓ Vulkan (stable-diffusion-cpp-python)"
[ "$VULKAN_AVAILABLE" = true ] && echo " ✓ Vulkan (llama-cpp-python)"
[ "$OPENCL_AVAILABLE" = true ] && echo " ✓ OpenCL (stable-diffusion-cpp-python)"
[ "$OPENCL_AVAILABLE" = true ] && [ "$CUDA_AVAILABLE" != true ] && [ "$VULKAN_AVAILABLE" != true ] && echo " ✓ OpenCL (stable-diffusion-cpp-python)"
echo " ✓ CPU (fallback for all)"
if [ "$FLASH" = true ] && [ "$CUDA_AVAILABLE" = true ]; then
echo ""
......
......@@ -15,10 +15,13 @@
# along with this program. If not, see <https://www.gnu.org/licenses/>.
"""Authentication and session management for admin dashboard."""
import base64
import hashlib
import hmac
import json
import os
import secrets
import threading
import time
from pathlib import Path
from typing import Any, Dict, Optional
......@@ -43,35 +46,62 @@ def get_or_create_secret(config_dir: Path) -> bytes:
def hash_password(password: str) -> str:
"""Hash a password using SHA-256 with salt.
In production, use argon2 or bcrypt. This is a minimal implementation
for environments where those libraries aren't available.
"""Hash a password using argon2 (preferred) or scrypt as fallback.
New hashes are always produced with a proper key-derivation function and
a per-password random salt. The legacy SHA-256/static-salt format is
only retained for *verification* of pre-existing hashes.
"""
# Use SHA-256 with a pepper-like secret for basic hashing
# Real implementation should use argon2 from main.py
salt = b'static_salt_' # In production, use per-user random salt
return hashlib.sha256(salt + password.encode()).hexdigest()
try:
from argon2 import PasswordHasher
ph = PasswordHasher()
return ph.hash(password)
except ImportError:
pass
# scrypt fallback: encode as "scrypt:<b64salt>:<b64key>"
salt = os.urandom(16)
key = hashlib.scrypt(password.encode(), salt=salt, n=2**14, r=8, p=1)
return "scrypt:" + base64.b64encode(salt).decode() + ":" + base64.b64encode(key).decode()
def verify_password(password: str, password_hash: str) -> bool:
"""Verify a password against its hash."""
# Try argon2 first
"""Verify a password against its hash.
Supports argon2, scrypt (new format), and the legacy SHA-256/static-salt
format so that old stored hashes continue to work.
"""
# --- argon2 ---
try:
from argon2 import PasswordHasher
from argon2.exceptions import VerifyMismatchError
from argon2.exceptions import VerifyMismatchError, InvalidHashError
ph = PasswordHasher()
try:
return ph.verify(password_hash, password)
except VerifyMismatchError:
return False
except InvalidHashError:
pass # not an argon2 hash; fall through
except Exception:
pass
except ImportError:
pass
# Fallback to simple hash
return hash_password(password) == password_hash
# --- scrypt ---
if password_hash.startswith("scrypt:"):
try:
parts = password_hash.split(":")
if len(parts) == 3:
salt = base64.b64decode(parts[1])
stored_key = base64.b64decode(parts[2])
new_key = hashlib.scrypt(password.encode(), salt=salt, n=2**14, r=8, p=1)
return hmac.compare_digest(new_key, stored_key)
except Exception:
pass
return False
# --- legacy SHA-256 with static salt (read-only; never written for new passwords) ---
legacy = hashlib.sha256(b'static_salt_' + password.encode()).hexdigest()
return hmac.compare_digest(legacy, password_hash)
class SessionManager:
......@@ -81,7 +111,7 @@ class SessionManager:
self.config_dir = config_dir
self.secret = get_or_create_secret(config_dir)
self.session_timeout = timedelta(minutes=session_timeout_minutes)
self._lock = __import__('threading').Lock()
self._lock = threading.Lock()
def _load_auth_data(self) -> Dict[str, Any]:
"""Load auth.json data."""
......
This diff is collapsed.
......@@ -8,8 +8,8 @@
--border: #1A1D28;
--border-2: #252836;
--text: #DDE1F0;
--text-2: #636880;
--text-3: #2E3145;
--text-2: #8B90A8;
--text-3: #555A72;
--accent: #6366F1;
--accent-s: rgba(99,102,241,.12);
--green: #34D399;
......
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -28,15 +28,17 @@
<div class="stat-value" id="req-total">0</div>
<div class="stat-sub"><span id="req-active">0</span> active</div>
</div>
<div class="stat">
<div class="stat" id="vram-card" style="display:none">
<div class="stat-label">VRAM</div>
<div class="stat-value" id="vram-pct"></div>
<div class="stat-value" id="vram-pct" style="font-size:2rem"></div>
<div class="progress" style="margin-top:.625rem">
<div class="progress-fill" id="vram-bar" style="width:0%"></div>
</div>
<div class="progress-labels">
<span id="vram-used"></span><span id="vram-total"></span>
<div class="progress-labels" style="color:var(--text-1);font-size:12px;margin-top:.4rem">
<span id="vram-used"></span><span id="vram-free"></span>
</div>
<div style="font-size:11.5px;color:var(--text-2);margin-top:.2rem;font-family:var(--mono)" id="vram-total-line"></div>
<div class="stat-sub" id="vram-gpu" style="margin-top:.25rem"></div>
</div>
</div>
......@@ -85,13 +87,25 @@ async function poll() {
document.getElementById('active-models').innerHTML = html || '<span class="muted small">No models loaded</span>';
if (d.vram) {
const pct = Math.round(d.vram.used / d.vram.total * 100);
document.getElementById('vram-pct').textContent = pct + '%';
document.getElementById('vram-bar').style.width = pct + '%';
document.getElementById('vram-used').textContent = d.vram.used.toFixed(1) + ' GB';
document.getElementById('vram-total').textContent = d.vram.total.toFixed(1) + ' GB';
document.getElementById('vram-card').style.display = '';
if (d.vram.free != null && d.vram.total) {
const usedPct = Math.round(d.vram.used / d.vram.total * 100);
document.getElementById('vram-pct').textContent = usedPct + '%';
document.getElementById('vram-bar').style.width = usedPct + '%';
document.getElementById('vram-used').textContent = d.vram.used.toFixed(1) + ' GB used';
document.getElementById('vram-free').textContent = d.vram.free.toFixed(1) + ' GB free';
document.getElementById('vram-total-line').textContent = d.vram.total.toFixed(1) + ' GB total';
} else {
document.getElementById('vram-pct').textContent = d.vram.total ? d.vram.total.toFixed(1) + ' GB' : '—';
document.getElementById('vram-bar').style.width = '0%';
document.getElementById('vram-used').textContent = '';
document.getElementById('vram-free').textContent = '';
document.getElementById('vram-total-line').textContent = '';
}
const gpuName = d.vram.gpu || '';
document.getElementById('vram-gpu').textContent = gpuName.length > 32 ? gpuName.slice(0, 32) + '…' : gpuName;
} else {
document.getElementById('vram-pct').textContent = 'N/A';
document.getElementById('vram-card').style.display = 'none';
}
if (d.requests) {
......
This diff is collapsed.
......@@ -45,6 +45,11 @@
<input type="text" id="s-cert" class="form-input" placeholder="/path/to/cert.pem">
</div>
</div>
<div class="form-row" style="margin-top:1rem;margin-bottom:0">
<label class="form-label">Request queue max size</label>
<input type="number" id="s-queue-max" class="form-input" placeholder="6" min="1" max="1000" style="max-width:160px">
<span class="form-hint">Maximum number of concurrent queued requests. Authenticated requests arriving when the queue is full receive a 429 response.</span>
</div>
</div>
<!-- Storage -->
......@@ -64,6 +69,48 @@
<span class="form-hint">Models will inherit this as default when configured</span>
</div>
</div>
<!-- Whisper Server -->
<div class="card mb-0" style="margin-top:1rem">
<div style="display:flex;align-items:center;justify-content:space-between;flex-wrap:wrap;gap:.5rem;margin-bottom:1rem">
<div class="card-title" style="margin:0">Whisper Server <span class="muted" style="font-size:11px;font-weight:400">(whisper.cpp native binary — recommended for AMD/Vulkan)</span></div>
<div style="display:flex;align-items:center;gap:.5rem">
<span id="ws-badge" class="muted small"></span>
<button class="btn btn-sm btn-secondary" onclick="wsStart()">Start</button>
<button class="btn btn-sm btn-danger" onclick="wsStop()">Stop</button>
</div>
</div>
<div style="display:grid;grid-template-columns:1fr 160px;gap:1rem;align-items:start">
<div class="form-row" style="margin:0">
<label class="form-label">Model ID <span class="muted">(used in API calls, e.g. whisper-base)</span></label>
<input type="text" id="ws-id" class="form-input" placeholder="whisper-server">
<span class="form-hint">The name clients use in the <code>model</code> field of transcription requests</span>
</div>
<div class="form-row" style="margin:0">
<label class="form-label">Port</label>
<input type="number" id="ws-port" class="form-input" placeholder="8744" min="1024" max="65535">
</div>
</div>
<div style="display:grid;grid-template-columns:1fr 160px;gap:1rem;align-items:start;margin-top:1rem">
<div class="form-row" style="margin:0">
<label class="form-label">whisper-server binary path</label>
<input type="text" id="ws-path" class="form-input" placeholder="/usr/local/bin/whisper-server">
</div>
<div class="form-row" style="margin:0">
<label class="form-label">GPU device index</label>
<input type="number" id="ws-gpu" class="form-input" placeholder="0" min="0">
</div>
</div>
<div class="form-row" style="margin-top:1rem;margin-bottom:0">
<label class="form-label">Model path <span class="muted">(GGUF whisper model, e.g. ggml-base.bin)</span></label>
<input type="text" id="ws-model" class="form-input" placeholder="/path/to/ggml-base.bin">
<span class="form-hint">Configure multiple instances by adding entries to <code>models.json</code> with <code>"backend": "whisper-server"</code></span>
</div>
<p class="form-hint" style="margin-top:.75rem;margin-bottom:0">
When configured, the transcription endpoint uses this subprocess instead of the Python faster-whisper module.
Saves settings to <code>config.json</code> and takes effect immediately (no restart needed).
</p>
</div>
{% endblock %}
{% block scripts %}
......@@ -89,13 +136,69 @@ async function loadSettings(){
document.getElementById('s-https').checked = !!d.server?.https;
document.getElementById('s-key').value = d.server?.https_key_path ?? '';
document.getElementById('s-cert').value = d.server?.https_cert_path ?? '';
document.getElementById('s-queue-max').value = d.server?.queue_max_size ?? 6;
document.getElementById('s-hf-cache').value = d.models?.hf_cache_dir ?? '';
document.getElementById('s-gguf-cache').value = d.models?.gguf_cache_dir ?? '';
document.getElementById('s-offload-dir').value = d.offload?.directory ?? './offload';
document.getElementById('ws-path').value = d.whisper?.server_path ?? '';
document.getElementById('ws-port').value = d.whisper?.server_port ?? 8744;
toggleHttps();
}catch(e){ showAlert('error','Failed to load settings: '+e.message); }
}
async function loadWsStatus(){
try{
const s = await fetch('/admin/api/whisper-server/status').then(r=>r.json());
const badge = document.getElementById('ws-badge');
// s is now a dict of {model_id: {running, model, url}}
const entries = Object.entries(s);
if(!entries.length){
badge.textContent = '○ not configured';
badge.style.color = 'var(--text-2)';
return;
}
const running = entries.filter(([,v])=>v.running);
if(running.length){
badge.textContent = `● ${running.length} running`;
badge.style.color = 'var(--green, #4ade80)';
} else {
badge.textContent = '○ stopped';
badge.style.color = 'var(--text-2)';
}
}catch(e){}
}
async function wsStart(){
const path = document.getElementById('ws-path').value.trim();
if(!path){ showAlert('error','Binary path required'); return; }
try{
const r = await fetch('/admin/api/whisper-server/start',{
method:'POST', headers:{'Content-Type':'application/json'},
body: JSON.stringify({
model_id: document.getElementById('ws-id').value.trim() || 'whisper-server',
server_path: path,
model_path: document.getElementById('ws-model').value.trim() || null,
port: parseInt(document.getElementById('ws-port').value) || 8744,
gpu_device: parseInt(document.getElementById('ws-gpu').value) || 0,
})
});
const d = await r.json();
if(d.success) showAlert('info','whisper-server started');
else showAlert('error','Failed to start whisper-server');
loadWsStatus();
}catch(e){ showAlert('error','Error: '+e.message); }
}
async function wsStop(){
const modelId = document.getElementById('ws-id').value.trim() || 'whisper-server';
await fetch('/admin/api/whisper-server/stop',{
method:'POST', headers:{'Content-Type':'application/json'},
body: JSON.stringify({model_id: modelId})
});
showAlert('info','whisper-server stopped');
loadWsStatus();
}
async function saveSettings(){
const strOrNull = id => document.getElementById(id).value.trim() || null;
const data = {
......@@ -105,6 +208,7 @@ async function saveSettings(){
https: document.getElementById('s-https').checked,
https_key_path: strOrNull('s-key'),
https_cert_path: strOrNull('s-cert'),
queue_max_size: parseInt(document.getElementById('s-queue-max').value) || 6,
},
models:{
hf_cache_dir: strOrNull('s-hf-cache'),
......@@ -112,7 +216,11 @@ async function saveSettings(){
},
offload:{
directory: document.getElementById('s-offload-dir').value.trim() || './offload',
}
},
whisper:{
server_path: document.getElementById('ws-path').value.trim() || null,
server_port: parseInt(document.getElementById('ws-port').value) || 8744,
},
};
try{
const r = await fetch('/admin/api/settings',{
......@@ -125,5 +233,7 @@ async function saveSettings(){
}
loadSettings();
loadWsStatus();
setInterval(loadWsStatus, 5000);
</script>
{% endblock %}
......@@ -19,12 +19,16 @@ FastAPI application module for codai API.
Contains the FastAPI app initialization, lifespan, and core endpoints.
"""
import logging
import os
from contextlib import asynccontextmanager
from typing import List
from fastapi import FastAPI, HTTPException, Request
from fastapi.responses import FileResponse, JSONResponse
logger = logging.getLogger(__name__)
# Import from codai modules
from codai.pydantic.textrequest import ModelList
from codai.models.manager import model_manager, multi_model_manager
......@@ -89,11 +93,19 @@ from codai.api.text import router as text_router
from codai.api.video import router as video_router
from codai.api.audio_gen import router as audio_gen_router
from codai.api.embeddings import router as embeddings_router
from codai.api.pipelines import router as pipelines_router
from codai.api.custom_pipelines import router as custom_pipelines_router
from codai.api.voice_clone import router as voice_clone_router
from codai.api.voice_convert import router as voice_convert_router
from codai.api.faceswap import router as faceswap_router
from codai.api.characters import router as characters_router
from codai.admin.routes import router as admin_router
# Import and add middleware
from codai.api.log import log_requests
from codai.api.ratelimit import RateLimitMiddleware
app.middleware("http")(log_requests)
app.add_middleware(RateLimitMiddleware)
# Mount static files for admin dashboard
from fastapi.staticfiles import StaticFiles
......@@ -110,6 +122,12 @@ app.include_router(text_router)
app.include_router(video_router)
app.include_router(audio_gen_router)
app.include_router(embeddings_router)
app.include_router(pipelines_router)
app.include_router(custom_pipelines_router)
app.include_router(voice_clone_router)
app.include_router(voice_convert_router)
app.include_router(faceswap_router)
app.include_router(characters_router)
app.include_router(admin_router)
......@@ -133,11 +151,14 @@ async def list_models():
@app.get("/v1/files/{filename}")
async def get_file(filename: str):
"""Serve uploaded/generated files."""
print(f"DEBUG get_file: filename={filename}, global_file_path={global_file_path}")
if global_file_path:
import os
file_path = os.path.join(global_file_path, filename)
print(f"DEBUG get_file: full path={file_path}, exists={os.path.exists(file_path)}")
if os.path.exists(file_path):
return FileResponse(file_path)
raise HTTPException(status_code=404, detail="File not found")
\ No newline at end of file
if not global_file_path:
raise HTTPException(status_code=404, detail="File not found")
# Prevent path traversal: resolve to real paths and confirm the result
# stays inside the configured directory.
safe_base = os.path.realpath(global_file_path)
candidate = os.path.realpath(os.path.join(global_file_path, filename))
if not (candidate == safe_base or candidate.startswith(safe_base + os.sep)):
raise HTTPException(status_code=403, detail="Access denied")
if not os.path.isfile(candidate):
raise HTTPException(status_code=404, detail="File not found")
return FileResponse(candidate)
\ No newline at end of file
# CoderAI - OpenAI-compatible API server
# Copyright (C) 2026 Stefy Lanza <stefy@nexlab.net>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
"""
Character profile endpoints.
Saved character profiles are named collections of reference images used to
maintain visual consistency of a character across multiple video generations.
POST /v1/characters – save / update a character profile
GET /v1/characters – list all saved profiles (no images)
GET /v1/characters/{name} – get a profile including base64 images
DELETE /v1/characters/{name} – delete a profile
"""
import base64
import json
import os
import time
from typing import List, Optional
from fastapi import APIRouter, HTTPException
from pydantic import BaseModel, ConfigDict
router = APIRouter()
_CHARS_DIR: Optional[str] = None
def set_global_args(args):
global _CHARS_DIR
base = getattr(args, 'file_path', None) or os.path.expanduser('~/.coderai')
root = base if os.path.isdir(base) else (os.path.dirname(base) if base else os.path.expanduser('~/.coderai'))
_CHARS_DIR = os.path.join(root, 'characters')
os.makedirs(_CHARS_DIR, exist_ok=True)
def set_global_file_path(path: str):
pass # not needed for characters
def _chars_dir() -> str:
if _CHARS_DIR:
return _CHARS_DIR
d = os.path.expanduser('~/.coderai/characters')
os.makedirs(d, exist_ok=True)
return d
def _char_dir(name: str) -> str:
return os.path.join(_chars_dir(), name)
# ── Pydantic models ───────────────────────────────────────────────────────────
class CharacterImage(BaseModel):
label: Optional[str] = None # e.g. "front", "side", "close-up"
data: str # base64 image (with or without data: prefix)
model_config = ConfigDict(extra="allow")
class CharacterSaveRequest(BaseModel):
name: str
description: Optional[str] = ""
images: List[CharacterImage] # one or more reference images
model_config = ConfigDict(extra="allow")
class CharacterProfile(BaseModel):
name: str
description: Optional[str] = ""
image_count: int
created_at: int
images: Optional[List[CharacterImage]] = None # only populated on GET /{name}
model_config = ConfigDict(extra="allow")
# ── Helpers ───────────────────────────────────────────────────────────────────
def _save_character(name: str, description: str, images: List[CharacterImage]) -> dict:
cdir = _char_dir(name)
os.makedirs(cdir, exist_ok=True)
img_files = []
for i, img in enumerate(images):
raw = img.data
if raw.startswith('data:'):
_, b64 = raw.split(',', 1)
else:
b64 = raw
img_bytes = base64.b64decode(b64)
# Detect PNG vs JPEG from magic bytes
ext = '.png' if img_bytes[:4] == b'\x89PNG' else '.jpg'
fname = f"ref{i:02d}{ext}"
fpath = os.path.join(cdir, fname)
with open(fpath, 'wb') as f:
f.write(img_bytes)
img_files.append({'file': fname, 'label': img.label or f'ref{i}'})
meta = {
'name': name,
'description': description,
'images': img_files,
'image_count': len(img_files),
'created_at': int(time.time()),
}
with open(os.path.join(cdir, 'meta.json'), 'w') as f:
json.dump(meta, f)
return meta
def _load_character_meta(name: str) -> Optional[dict]:
meta_path = os.path.join(_char_dir(name), 'meta.json')
if not os.path.exists(meta_path):
return None
with open(meta_path) as f:
return json.load(f)
def _load_character_images(name: str) -> List[CharacterImage]:
meta = _load_character_meta(name)
if not meta:
return []
cdir = _char_dir(name)
result = []
for img_info in meta.get('images', []):
fpath = os.path.join(cdir, img_info['file'])
if not os.path.exists(fpath):
continue
with open(fpath, 'rb') as f:
raw = f.read()
ext = img_info['file'].rsplit('.', 1)[-1]
mime = 'image/png' if ext == 'png' else 'image/jpeg'
b64 = base64.b64encode(raw).decode()
result.append(CharacterImage(
label=img_info.get('label'),
data=f"data:{mime};base64,{b64}",
))
return result
def _list_characters() -> list:
d = _chars_dir()
profiles = []
for entry in os.scandir(d):
if entry.is_dir():
meta = _load_character_meta(entry.name)
if meta:
profiles.append({k: v for k, v in meta.items() if k != 'images'})
return sorted(profiles, key=lambda p: p.get('created_at', 0))
def resolve_character_profiles(profile_names: List[str]) -> List[str]:
"""Resolve saved profile names → flat list of base64 image strings."""
out = []
for name in profile_names:
for img in _load_character_images(name):
out.append(img.data)
return out
# ── Endpoints ─────────────────────────────────────────────────────────────────
@router.post("/v1/characters")
async def save_character(req: CharacterSaveRequest):
"""Save or update a named character profile."""
if not req.name or '/' in req.name or '..' in req.name:
raise HTTPException(status_code=400, detail="Invalid character name")
if not req.images:
raise HTTPException(status_code=400, detail="At least one reference image required")
meta = _save_character(req.name, req.description or '', req.images)
return {"ok": True, "name": meta['name'], "image_count": meta['image_count']}
@router.get("/v1/characters")
async def list_characters():
"""List all saved character profiles (metadata only, no images)."""
return {"characters": _list_characters()}
@router.get("/v1/characters/{name}")
async def get_character(name: str):
"""Get a character profile including its reference images as base64."""
meta = _load_character_meta(name)
if not meta:
raise HTTPException(status_code=404, detail=f"Character '{name}' not found")
images = _load_character_images(name)
return {
"name": meta['name'],
"description": meta.get('description', ''),
"image_count": meta['image_count'],
"created_at": meta['created_at'],
"images": [img.model_dump() for img in images],
}
@router.delete("/v1/characters/{name}")
async def delete_character(name: str):
"""Delete a character profile."""
cdir = _char_dir(name)
if not os.path.isdir(cdir):
raise HTTPException(status_code=404, detail=f"Character '{name}' not found")
import shutil
shutil.rmtree(cdir)
return {"ok": True, "name": name}
This diff is collapsed.
"""
Face swap endpoint.
POST /v1/images/faceswap — swap face in image or video frames
"""
import asyncio
import base64
import io
import os
import subprocess
import tempfile
import time
from typing import Optional
import cv2
import numpy as np
from fastapi import APIRouter, HTTPException, Request
from PIL import Image
from pydantic import BaseModel, ConfigDict
from codai.api.images import save_image_response
router = APIRouter()
global_args = None
global_file_path = None
_INSWAPPER_MODEL_PATH = os.path.expanduser('~/.insightface/models/inswapper_128.onnx')
_INSWAPPER_HF_REPO = 'deepinsight/inswapper'
_INSWAPPER_HF_FILE = 'inswapper_128.onnx'
_face_app = None # FaceAnalysis singleton
_swapper = None # INSwapper singleton
def set_global_args(args):
global global_args
global_args = args
def set_global_file_path(path):
global global_file_path
global_file_path = path
def _ensure_model():
"""Download inswapper_128.onnx if not present."""
if os.path.exists(_INSWAPPER_MODEL_PATH):
return
os.makedirs(os.path.dirname(_INSWAPPER_MODEL_PATH), exist_ok=True)
print(f'Downloading inswapper_128.onnx from HuggingFace…')
try:
from huggingface_hub import hf_hub_download
path = hf_hub_download(
repo_id=_INSWAPPER_HF_REPO,
filename=_INSWAPPER_HF_FILE,
local_dir=os.path.dirname(_INSWAPPER_MODEL_PATH),
)
if path != _INSWAPPER_MODEL_PATH:
import shutil
shutil.move(path, _INSWAPPER_MODEL_PATH)
except Exception as e:
raise RuntimeError(f'Failed to download inswapper model: {e}')
def _get_face_app():
global _face_app
if _face_app is None:
from insightface.app import FaceAnalysis
_face_app = FaceAnalysis(name='buffalo_l', providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
_face_app.prepare(ctx_id=0, det_size=(640, 640))
return _face_app
def _get_swapper():
global _swapper
if _swapper is None:
_ensure_model()
from insightface.model_zoo import get_model
_swapper = get_model(_INSWAPPER_MODEL_PATH, download=False)
_swapper.prepare(ctx_id=0)
return _swapper
def _decode_image(data: str) -> np.ndarray:
"""Decode base64 or data-URI image to BGR numpy array."""
if data.startswith('data:'):
_, b64 = data.split(',', 1)
data = b64
raw = base64.b64decode(data)
arr = np.frombuffer(raw, np.uint8)
img = cv2.imdecode(arr, cv2.IMREAD_COLOR)
if img is None:
raise ValueError('Could not decode image')
return img
def _swap_faces(source_img: np.ndarray, target_img: np.ndarray) -> np.ndarray:
"""Swap all faces in target_img with the face from source_img."""
app = _get_face_app()
swapper = _get_swapper()
src_faces = app.get(source_img)
if not src_faces:
raise ValueError('No face detected in source image')
src_face = src_faces[0]
tgt_faces = app.get(target_img)
if not tgt_faces:
return target_img # no face to swap in target, return as-is
result = target_img.copy()
for tgt_face in tgt_faces:
result = swapper.get(result, tgt_face, src_face, paste_back=True)
return result
def _decode_b64_or_url(data: str) -> bytes:
if data.startswith('data:'):
_, b64 = data.split(',', 1)
return base64.b64decode(b64)
if data.startswith('http'):
import urllib.request
with urllib.request.urlopen(data, timeout=30) as r:
return r.read()
return base64.b64decode(data)
# ---------------------------------------------------------------------------
# Request model
# ---------------------------------------------------------------------------
class FaceSwapRequest(BaseModel):
source_face: str # base64/data-URI image containing the source face
target: str # base64/data-URI image OR video to swap into
target_type: Optional[str] = 'image' # 'image' or 'video'
response_format: Optional[str] = 'url'
model_config = ConfigDict(extra='allow')
# ---------------------------------------------------------------------------
# Endpoint
# ---------------------------------------------------------------------------
@router.post('/v1/images/faceswap')
async def faceswap(request: FaceSwapRequest, http_request: Request = None):
"""
Swap the face from source_face into every face found in target.
target_type: 'image' (default) or 'video'.
"""
try:
_ensure_model()
except RuntimeError as e:
raise HTTPException(status_code=503, detail=str(e))
try:
src_img = _decode_image(request.source_face)
except Exception as e:
raise HTTPException(status_code=400, detail=f'Invalid source_face: {e}')
if request.target_type == 'video':
return await _faceswap_video(src_img, request, http_request)
else:
return await _faceswap_image(src_img, request, http_request)
async def _faceswap_image(src_img, request, http_request):
try:
tgt_img = _decode_image(request.target)
except Exception as e:
raise HTTPException(status_code=400, detail=f'Invalid target: {e}')
try:
result = await asyncio.get_event_loop().run_in_executor(
None, _swap_faces, src_img, tgt_img)
except ValueError as e:
raise HTTPException(status_code=422, detail=str(e))
except Exception as e:
raise HTTPException(status_code=500, detail=f'Face swap failed: {e}')
pil_img = Image.fromarray(cv2.cvtColor(result, cv2.COLOR_BGR2RGB))
img_data = save_image_response(pil_img, request.response_format, http_request)
return {'created': int(time.time()), 'data': [img_data]}
async def _faceswap_video(src_img, request, http_request):
raw = _decode_b64_or_url(request.target)
temps = []
try:
# Write input video
in_tmp = tempfile.NamedTemporaryFile(suffix='.mp4', delete=False)
in_tmp.write(raw); in_tmp.close()
in_path = in_tmp.name
temps.append(in_path)
# Extract frames
frames_dir = tempfile.mkdtemp()
temps.append(frames_dir)
subprocess.run(
['ffmpeg', '-y', '-i', in_path, f'{frames_dir}/%08d.png'],
capture_output=True, check=True)
# Get FPS for reassembly
probe = subprocess.run(
['ffprobe', '-v', 'error', '-select_streams', 'v:0',
'-show_entries', 'stream=r_frame_rate', '-of', 'default=nw=1:nk=1', in_path],
capture_output=True, text=True)
fps_str = probe.stdout.strip() or '25/1'
num, den = fps_str.split('/')
fps = float(num) / float(den)
# Swap faces in each frame
frame_files = sorted(os.listdir(frames_dir))
def _process_frames():
app = _get_face_app()
swapper = _get_swapper()
src_faces = app.get(src_img)
if not src_faces:
raise ValueError('No face detected in source image')
src_face = src_faces[0]
for fname in frame_files:
fpath = os.path.join(frames_dir, fname)
frame = cv2.imread(fpath)
if frame is None:
continue
tgt_faces = app.get(frame)
for tgt_face in tgt_faces:
frame = swapper.get(frame, tgt_face, src_face, paste_back=True)
cv2.imwrite(fpath, frame)
await asyncio.get_event_loop().run_in_executor(None, _process_frames)
# Reassemble video (copy original audio)
out_path = tempfile.mktemp(suffix='_swapped.mp4')
temps.append(out_path)
subprocess.run(
['ffmpeg', '-y', '-framerate', str(fps), '-i', f'{frames_dir}/%08d.png',
'-i', in_path, '-map', '0:v', '-map', '1:a?',
'-c:v', 'libx264', '-c:a', 'copy', '-shortest', out_path],
capture_output=True, check=True)
with open(out_path, 'rb') as f:
out_bytes = f.read()
if global_file_path:
import uuid
fname = f'{uuid.uuid4().hex}_swapped.mp4'
fpath = os.path.join(global_file_path, fname)
os.makedirs(global_file_path, exist_ok=True)
with open(fpath, 'wb') as f:
f.write(out_bytes)
host = http_request.headers.get('host', '127.0.0.1') if http_request else '127.0.0.1'
if ':' in host:
parts = host.split(':')
if len(parts) == 2 and parts[1].isdigit():
host = parts[0]
proto = 'https' if getattr(global_args, 'https', False) else 'http'
port = getattr(global_args, 'port', 8000) if global_args else 8000
data = [{'url': f'{proto}://{host}:{port}/v1/files/{fname}'}]
else:
data = [{'b64_mp4': base64.b64encode(out_bytes).decode()}]
return {'created': int(time.time()), 'data': data}
except subprocess.CalledProcessError as e:
raise HTTPException(status_code=500, detail=f'ffmpeg error: {e.stderr.decode()[:200]}')
except ValueError as e:
raise HTTPException(status_code=422, detail=str(e))
except Exception as e:
raise HTTPException(status_code=500, detail=f'Video face swap failed: {e}')
finally:
import shutil
for t in temps:
try:
if os.path.isdir(t):
shutil.rmtree(t)
else:
os.unlink(t)
except Exception:
pass
This diff is collapsed.
This diff is collapsed.
# CoderAI - OpenAI-compatible API server
# Copyright (C) 2026 Stefy Lanza <stefy@nexlab.net>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
"""Simple in-process token-bucket rate limiter middleware.
Each distinct (client-IP, route-prefix) pair gets its own bucket.
Limits are configured via RateLimitConfig. The defaults below are
intentionally generous; tighten them through the config file or CLI.
Endpoints covered:
/v1/chat/completions — expensive LLM inference
/v1/images/ — image generation
/v1/audio/ — TTS / STT / audio generation
/v1/video/ — video generation
/v1/embeddings — embedding
/v1/completions — legacy completions
"""
import time
import threading
from collections import defaultdict
from typing import Dict, Tuple
from fastapi import Request, Response
from fastapi.responses import JSONResponse
from starlette.middleware.base import BaseHTTPMiddleware
# Per-route-prefix defaults: (max_requests, window_seconds)
_DEFAULT_LIMITS: Dict[str, Tuple[int, int]] = {
"/v1/chat/completions": (60, 60),
"/v1/completions": (60, 60),
"/v1/images/": (30, 60),
"/v1/audio/": (60, 60),
"/v1/video/": (10, 60),
"/v1/embeddings": (120, 60),
}
# API prefixes that count against the request queue
_QUEUED_PREFIXES = ("/v1/",)
# Global toggle — set to False to disable rate limiting entirely.
RATE_LIMITING_ENABLED = True
class _Bucket:
"""Fixed-window counter."""
__slots__ = ("count", "window_start")
def __init__(self, now: float):
self.count = 0
self.window_start = now
class RateLimitMiddleware(BaseHTTPMiddleware):
"""Apply per-IP, per-route-prefix rate limiting to API endpoints."""
def __init__(self, app, limits: Dict[str, Tuple[int, int]] = None):
super().__init__(app)
self._limits = limits or _DEFAULT_LIMITS
# (client_ip, prefix) → _Bucket
self._buckets: Dict[Tuple[str, str], _Bucket] = defaultdict(lambda: _Bucket(time.monotonic()))
self._lock = threading.Lock()
def _get_prefix(self, path: str) -> str:
for prefix in self._limits:
if path.startswith(prefix):
return prefix
return ""
async def dispatch(self, request: Request, call_next):
if not RATE_LIMITING_ENABLED:
return await call_next(request)
path = request.url.path
# Queue-size enforcement for authenticated API requests
if any(path.startswith(p) for p in _QUEUED_PREFIXES):
from codai.queue.manager import queue_manager
if await queue_manager.is_full():
return JSONResponse(
status_code=429,
content={
"error": {
"message": "Server queue is full. Please retry later.",
"type": "rate_limit_error",
"code": 429,
}
},
headers={"Retry-After": "5"},
)
prefix = self._get_prefix(path)
if not prefix:
return await call_next(request)
max_req, window = self._limits[prefix]
client_ip = (
request.headers.get("x-forwarded-for", "").split(",")[0].strip()
or (request.client.host if request.client else "unknown")
)
key = (client_ip, prefix)
now = time.monotonic()
with self._lock:
bucket = self._buckets[key]
if now - bucket.window_start >= window:
bucket.count = 0
bucket.window_start = now
bucket.count += 1
count = bucket.count
remaining = max(0, max_req - count)
reset_at = int(time.time() + (window - (now - self._buckets[key].window_start)))
if count > max_req:
return JSONResponse(
status_code=429,
content={
"error": {
"message": "Rate limit exceeded. Please slow down.",
"type": "rate_limit_error",
"code": 429,
}
},
headers={
"X-RateLimit-Limit": str(max_req),
"X-RateLimit-Remaining": "0",
"X-RateLimit-Reset": str(reset_at),
"Retry-After": str(window),
},
)
response = await call_next(request)
response.headers["X-RateLimit-Limit"] = str(max_req)
response.headers["X-RateLimit-Remaining"] = str(remaining)
response.headers["X-RateLimit-Reset"] = str(reset_at)
return response
This diff is collapsed.
This diff is collapsed.
......@@ -263,7 +263,39 @@ def _apply_camera_motion(kw: dict, camera_motion: str):
kw['camera_motion'] = camera_motion
def _apply_character_refs(kw: dict, character_references: List[str], strength: float):
def _resolve_character_inputs(request) -> tuple[List[str], List[str]]:
"""Return (flat_image_list, name_list) from any combination of request fields."""
images: List[str] = []
names: List[str] = []
# 1. Expand named saved profiles
if request.character_profiles:
try:
from codai.api.characters import resolve_character_profiles
images += resolve_character_profiles(request.character_profiles)
names += list(request.character_profiles)
except Exception:
pass
# 2. Named character slots [{name, images:[...]}, ...]
if request.characters:
for slot in request.characters:
slot_imgs = slot.get('images') or []
images += slot_imgs
if slot.get('name'):
names.append(slot['name'])
# 3. Legacy flat list
if request.character_references:
images += list(request.character_references)
if request.character_names:
names += list(request.character_names)
return images, names
def _apply_character_refs(kw: dict, character_references: List[str], strength: float,
names: Optional[List[str]] = None):
"""Apply character reference images to pipeline kwargs."""
if not character_references:
return
......@@ -291,8 +323,13 @@ def _generate_video(pipe, request: VideoGenerationRequest):
_apply_camera_motion(kw, request.camera_motion)
if request.character_references:
_apply_character_refs(kw, request.character_references, request.character_strength or 0.8)
char_images, char_names = _resolve_character_inputs(request)
if char_images:
_apply_character_refs(kw, char_images, request.character_strength or 0.8, char_names)
# Prepend character names to prompt for better conditioning
if char_names and kw.get('prompt'):
names_hint = ', '.join(char_names)
kw['prompt'] = f"{names_hint}. {kw['prompt']}"
init_src = request.init_image or request.image
......@@ -359,35 +396,49 @@ def _ffmpeg_upscale(path: str, factor: int, temps: list) -> str:
scale = f"scale=iw*{factor}:ih*{factor}:flags=lanczos"
cmd = ['ffmpeg', '-y', '-i', path, '-vf', scale, '-c:a', 'copy', out]
r = subprocess.run(cmd, capture_output=True)
if r.returncode == 0:
return out
return path # fallback to original if ffmpeg fails
if r.returncode != 0:
import logging
logging.getLogger(__name__).warning(
"ffmpeg upscale failed (rc=%d): %s", r.returncode, r.stderr.decode(errors='replace')
)
return path # fallback to original if ffmpeg fails
return out
def _rife_interpolate(path: str, multiplier: int, temps: list) -> str:
out = tempfile.mktemp(suffix='_rife.mp4')
temps.append(out)
# Try rife-ncnn-vulkan binary if available
import shutil
import logging, shutil
_log = logging.getLogger(__name__)
if shutil.which('rife-ncnn-vulkan'):
frames_dir = tempfile.mkdtemp()
out_dir = tempfile.mkdtemp()
temps += [frames_dir, out_dir]
subprocess.run(['ffmpeg', '-y', '-i', path, f'{frames_dir}/%08d.png'],
capture_output=True)
subprocess.run(['rife-ncnn-vulkan', '-i', frames_dir, '-o', out_dir,
'-m', f'rife-v4'], capture_output=True)
subprocess.run(['ffmpeg', '-y', '-r', str(multiplier * 8), '-i',
f'{out_dir}/%08d.png', '-c:v', 'libx264', out],
capture_output=True)
if os.path.exists(out):
return out
r = subprocess.run(['ffmpeg', '-y', '-i', path, f'{frames_dir}/%08d.png'],
capture_output=True)
if r.returncode != 0:
_log.warning("ffmpeg frame extraction failed: %s", r.stderr.decode(errors='replace'))
else:
r = subprocess.run(['rife-ncnn-vulkan', '-i', frames_dir, '-o', out_dir,
'-m', 'rife-v4'], capture_output=True)
if r.returncode != 0:
_log.warning("rife-ncnn-vulkan failed: %s", r.stderr.decode(errors='replace'))
else:
r = subprocess.run(['ffmpeg', '-y', '-r', str(multiplier * 8), '-i',
f'{out_dir}/%08d.png', '-c:v', 'libx264', out],
capture_output=True)
if r.returncode != 0:
_log.warning("ffmpeg reassembly failed: %s", r.stderr.decode(errors='replace'))
elif os.path.exists(out):
return out
# Simple ffmpeg minterpolate fallback
fps_expr = f"fps=fps={multiplier}*source_fps"
cmd = ['ffmpeg', '-y', '-i', path, '-filter:v',
f'minterpolate=fps={multiplier * 8}', '-c:a', 'copy', out]
r = subprocess.run(cmd, capture_output=True)
return out if r.returncode == 0 else path
if r.returncode != 0:
_log.warning("ffmpeg minterpolate failed: %s", r.stderr.decode(errors='replace'))
return path
return out
def _add_audio_to_video(path: str, request: VideoGenerationRequest,
......
This diff is collapsed.
"""
Voice conversion endpoint — converts timbre while preserving pitch, melody and expression.
Unlike TTS-based dubbing, this works correctly for singing and music.
POST /v1/audio/convert — convert voice timbre in audio (speech or singing)
"""
import asyncio
import base64
import io
import os
import tempfile
import time
from typing import Optional
import numpy as np
import soundfile as sf
from fastapi import APIRouter, HTTPException, Request
from pydantic import BaseModel, ConfigDict
router = APIRouter()
global_args = None
global_file_path = None
_wrapper = None # SeedVCWrapper singleton
def set_global_args(args):
global global_args
global_args = args
def set_global_file_path(path):
global global_file_path
global_file_path = path
def _get_wrapper():
global _wrapper
if _wrapper is None:
from seed_vc.seed_vc_wrapper import SeedVCWrapper
_wrapper = SeedVCWrapper()
return _wrapper
def _decode_audio_to_file(data: str, suffix: str = '.wav') -> str:
if data.startswith('data:'):
_, b64 = data.split(',', 1)
raw = base64.b64decode(b64)
else:
raw = base64.b64decode(data)
tmp = tempfile.NamedTemporaryFile(suffix=suffix, delete=False)
tmp.write(raw)
tmp.close()
return tmp.name
def _save_response(audio_np: np.ndarray, sr: int, http_request) -> dict:
buf = io.BytesIO()
sf.write(buf, audio_np, sr, format='WAV')
wav_bytes = buf.getvalue()
import uuid
filename = f'{uuid.uuid4().hex}_converted.wav'
if global_file_path:
os.makedirs(global_file_path, exist_ok=True)
fpath = os.path.join(global_file_path, filename)
with open(fpath, 'wb') as f:
f.write(wav_bytes)
host = http_request.headers.get('host', '127.0.0.1') if http_request else '127.0.0.1'
if ':' in host:
parts = host.split(':')
if len(parts) == 2 and parts[1].isdigit():
host = parts[0]
proto = 'https' if getattr(global_args, 'https', False) else 'http'
port = getattr(global_args, 'port', 8000) if global_args else 8000
return {'url': f'{proto}://{host}:{port}/v1/files/{filename}'}
return {'b64_wav': base64.b64encode(wav_bytes).decode()}
class VoiceConvertRequest(BaseModel):
"""
Convert the timbre of source_audio to match target_voice,
while preserving pitch, melody, rhythm and expression.
Use f0_condition=True for singing/music (slower but pitch-accurate).
Use f0_condition=False for speech (faster).
"""
source_audio: str # base64 audio to convert (the performance)
target_voice: Optional[str] = None # base64 reference audio for target timbre
voice_name: Optional[str] = None # saved voice profile name
f0_condition: Optional[bool] = False # True = singing/music mode (preserves pitch)
pitch_shift: Optional[int] = 0 # semitones to shift after conversion
diffusion_steps: Optional[int] = 10 # quality vs speed (10–30)
length_adjust: Optional[float] = 1.0
inference_cfg_rate: Optional[float] = 0.7
response_format: Optional[str] = 'url'
model_config = ConfigDict(extra='allow')
@router.post('/v1/audio/convert')
async def convert_voice(request: VoiceConvertRequest, http_request: Request = None):
"""
Voice conversion: preserves pitch/melody/expression, changes only timbre.
Set f0_condition=True for singing and music.
"""
target_path = None
temps = []
try:
if request.voice_name:
from codai.api.voice_clone import _load_voice
meta = _load_voice(request.voice_name)
if not meta:
raise HTTPException(status_code=404, detail=f"Voice '{request.voice_name}' not found")
target_path = meta['audio_file']
elif request.target_voice:
target_path = _decode_audio_to_file(request.target_voice)
temps.append(target_path)
else:
raise HTTPException(status_code=400, detail='Provide voice_name or target_voice')
source_path = _decode_audio_to_file(request.source_audio)
temps.append(source_path)
try:
wrapper = _get_wrapper()
except ImportError:
raise HTTPException(status_code=501,
detail='seed-vc not installed. Run: pip install seed-vc')
def _run():
return wrapper.convert_voice(
source=source_path,
target=target_path,
diffusion_steps=request.diffusion_steps or 10,
length_adjust=request.length_adjust or 1.0,
inference_cfg_rate=request.inference_cfg_rate or 0.7,
f0_condition=bool(request.f0_condition),
pitch_shift=request.pitch_shift or 0,
stream_output=False,
)
try:
audio_out = await asyncio.get_event_loop().run_in_executor(None, _run)
except Exception as e:
raise HTTPException(status_code=500, detail=f'Voice conversion failed: {e}')
sr = 44100 if request.f0_condition else 22050
if isinstance(audio_out, tuple):
audio_out = audio_out[0]
result = _save_response(np.array(audio_out).flatten(), sr, http_request)
return {'created': int(time.time()), 'data': [result]}
finally:
for t in temps:
try:
os.unlink(t)
except Exception:
pass
This diff is collapsed.
......@@ -368,7 +368,21 @@ def main():
audio_models = models_config.get("audio_models", [])
for m in audio_models:
mid = _model_id(m)
if mid:
if not mid:
continue
backend = m.get("backend", "") if isinstance(m, dict) else ""
if backend == "whisper-server":
# Register as a whisper-server instance
cfg = _model_cfg(m, "audio")
multi_model_manager.register_whisper_server(
model_id=mid,
server_path=m.get("server_path", config.whisper.server_path or ""),
model_path=m.get("model_path") or None,
port=int(m.get("port", config.whisper.server_port)),
gpu_device=int(m.get("gpu_device", config.vulkan.device_id)),
config=cfg,
)
else:
multi_model_manager.set_audio_model(mid, config=_model_cfg(m, "audio"))
# Image models
......@@ -446,7 +460,18 @@ def main():
print(f" Loaded: {mid}")
else:
print(f" Warning: {mid} failed to load")
# image/audio/vision/tts pre-loading is handled by their respective
elif mtype == "audio" and mid in multi_model_manager.whisper_servers:
wsm = multi_model_manager.whisper_servers[mid]
result = wsm.start(wsm._model_path, gpu_device=wsm._gpu_device)
if wsm.is_running():
ws_key = f"audio:{mid}"
multi_model_manager.models[ws_key] = wsm
multi_model_manager.active_in_vram = ws_key
multi_model_manager.models_in_vram.add(ws_key)
print(f" whisper-server started: {mid}")
else:
print(f" Warning: whisper-server '{mid}' failed to start")
# image/vision/tts pre-loading is handled by their respective
# API modules on first request; we just log intent here.
else:
print(f" Note: pre-loading for {mtype} models happens on first request")
......@@ -550,6 +575,27 @@ def main():
if global_file_path:
set_audiogen_file_path(global_file_path)
# Set voice clone module global args
from codai.api.voice_clone import set_global_args as set_vc_global_args, set_global_file_path as set_vc_file_path
set_vc_global_args(global_args)
if global_file_path:
set_vc_file_path(global_file_path)
from codai.api.voice_convert import set_global_args as set_vconv_global_args, set_global_file_path as set_vconv_file_path
set_vconv_global_args(global_args)
if global_file_path:
set_vconv_file_path(global_file_path)
# Set faceswap module global args
from codai.api.faceswap import set_global_args as set_fs_global_args, set_global_file_path as set_fs_file_path
set_fs_global_args(global_args)
if global_file_path:
set_fs_file_path(global_file_path)
# Set character profiles module global args
from codai.api.characters import set_global_args as set_chars_global_args
set_chars_global_args(global_args)
# Set embeddings module global args
from codai.api.embeddings import set_global_args as set_embed_global_args
set_embed_global_args(global_args)
......@@ -585,6 +631,10 @@ def main():
# Apply queue max size from config
from codai.queue.manager import queue_manager
queue_manager.max_size = config.server.queue_max_size
# Start the server
import uvicorn
print(f"\nStarting server on http://{config.server.host}:{config.server.port}")
......
This diff is collapsed.
# codai.openai — optional LiteLLM integration layer
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
videogen @ 04778e17
Subproject commit 04778e172a9a83d0778f566045f995828c6c3556
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment