Almost all ready

parent c741ff5b
......@@ -21,6 +21,18 @@ township_output
dist
dist-package
*.log
tmp
debug.log
CoderAI.gif
# Produced artifacts and tool session/output dirs (mounted as volumes at runtime,
# never baked into the image)
video_editor/sessions
video_editor.config.json
tools/videogen_output
tools/township_output
tools/coderai_media
samples
# Build outputs
build
......
......@@ -4,7 +4,7 @@
![CoderAI](CoderAI.gif)
An OpenAI-compatible API server to run models on your local GPU with web administration dashboard, supporting multiple GPU backends: NVIDIA (CUDA), AMD (Vulkan), and Intel (Vulkan). Configuration-driven architecture with per-model settings and full multi-modal support.
A multimodal and multi-backend local model orchestrator with an OpenAI-compatible API server to run models on local GPUs, supporting multiple GPU backends: NVIDIA (CUDA), AMD (Vulkan), and Intel (Vulkan). Configuration-driven architecture with per-model settings and full multi-modal support.
## Features
......
......@@ -1420,6 +1420,37 @@ def _scan_caches() -> dict:
"configs": all_configs.get(path, []),
})
# Add configured non-GGUF HF models whose files have been evicted from disk
# (e.g. via "Free disk"). They are absent from the HF cache scan above, so
# surface them here as missing so they keep a Re-download button.
from codai.models.cache import is_huggingface_model_id
existing_hf_ids = {m["id"] for m in result["hf"]}
for path, (settings, mtype) in configured_settings.items():
if path in existing_hf_ids:
continue
s = settings if isinstance(settings, dict) else {}
if s.get("backend") == "whisper-server":
continue
# Only HF-style repo IDs (owner/repo) — skip local paths and GGUF files
if os.path.isabs(path) or path.endswith('.gguf') or not is_huggingface_model_id(path):
continue
# A real local relative path that still exists isn't an evicted model
if os.path.exists(path):
continue
caps = s.get("capabilities") or detect_model_capabilities(path).to_list()
result["hf"].append({
"id": path,
"size_gb": 0, "size_bytes": 0, "revision_count": 0,
"files": [], "file_count": 0,
"in_config": True, "missing": True,
"source_repo": path,
"model_type": mtype if mtype and mtype != "gguf_models" else "text_models",
"settings": s,
"capabilities": caps,
"incomplete": False,
"configs": all_configs.get(path, []),
})
return result
......@@ -1613,6 +1644,96 @@ async def api_delete_cached_model(
return await asyncio.to_thread(_do_delete_model, model_id, cache_type)
@router.post("/admin/api/model-free-disk", summary="Delete a model's files but keep its config")
async def api_model_free_disk(request: Request, username: str = Depends(require_admin)):
"""Reclaim disk space by deleting a model's files while keeping its
models.json entry, so it can be re-downloaded on demand. The source repo is
persisted onto the config entry first so the Re-download button has a target
once the file is gone."""
if config_manager is None:
raise HTTPException(status_code=503, detail="Config manager not initialized")
import os as _os, asyncio
data = await request.json()
path = (data.get("path") or data.get("model_id") or "").strip()
cache_type = data.get("cache_type", "gguf")
source_repo = (data.get("source_repo") or "").strip()
if not path:
raise HTTPException(status_code=400, detail="path is required")
# Persist source_repo onto the matching config entries so re-download works
# after the file is deleted (flat GGUF files retain no HF repo info on disk).
# Skip when the entry key already IS the repo id (HF models re-download by id).
if source_repo and source_repo != path:
fname = _os.path.basename(path) if ("/" in path or _os.sep in path) else ""
changed = False
for cat in ("text_models", "image_models", "audio_models",
"gguf_models", "tts_models", "vision_models", "video_models",
"audio_gen_models", "embedding_models", "spatial_models"):
lst = config_manager.models_data.get(cat, [])
for i, m in enumerate(lst):
key = m if isinstance(m, str) else (m.get("path") or m.get("id") or "")
if key == path or (fname and _os.path.basename(key) == fname):
if isinstance(m, str):
lst[i] = {"path": m, "source_repo": source_repo}
changed = True
elif not m.get("source_repo"):
m["source_repo"] = source_repo
changed = True
if changed:
config_manager.save_models()
result = await asyncio.to_thread(_do_delete_model, path, cache_type)
_broker_notify_models_updated(request)
return result
@router.post("/admin/api/model-add-known", summary="Register a model in config without downloading")
async def api_model_add_known(request: Request, username: str = Depends(require_admin)):
"""Add a model to models.json as a known-but-not-downloaded reference.
The model then appears in the model list as "missing" with a working
Re-download button, without fetching any files now — the same end state as
"Free disk", but reached without ever having the files locally."""
if config_manager is None:
raise HTTPException(status_code=503, detail="Config manager not initialized")
import os as _os
data = await request.json()
model_id = (data.get("model_id") or data.get("path") or "").strip()
if not model_id:
raise HTTPException(status_code=400, detail="model_id is required")
source_repo = (data.get("source_repo") or model_id).strip()
model_type = (data.get("model_type") or "").strip()
is_gguf = (bool(data.get("is_gguf")) or model_type == "gguf_models"
or "gguf" in model_id.lower())
valid = {"text_models", "image_models", "audio_models", "gguf_models", "tts_models",
"vision_models", "video_models", "audio_gen_models", "embedding_models", "spatial_models"}
if is_gguf:
model_type = "gguf_models"
if model_type not in valid:
model_type = "text_models"
# GGUF entries must persist source_repo so Re-download has a target (flat GGUF
# files keep no repo info on disk). Plain HF repos re-download by id, so a bare
# path string is enough and surfaces as a missing HF model.
if is_gguf:
entry = {"path": model_id, "source_repo": source_repo}
else:
entry = model_id
# Dedupe across all categories by path / basename so we don't double-add.
fname = _os.path.basename(model_id) if ("/" in model_id or _os.sep in model_id) else model_id
for cat in valid:
for m in config_manager.models_data.get(cat, []):
key = m if isinstance(m, str) else (m.get("path") or m.get("id") or "")
if key == model_id or (fname and _os.path.basename(key) == fname):
return {"success": True, "already": True}
config_manager.models_data.setdefault(model_type, []).append(entry)
config_manager.save_models()
_broker_notify_models_updated(request)
return {"success": True}
@router.post("/admin/api/model-enable", summary="Enable a model")
async def api_model_enable(request: Request, username: str = Depends(require_admin)):
"""Register a cached model in models.json so CoderAI can use it."""
......
This diff is collapsed.
......@@ -67,6 +67,30 @@ def _make_llama_thermal_criteria():
except Exception:
return None
_CHAT_SUPPORTS_STOPPING_CRITERIA = None
def _chat_supports_stopping_criteria() -> bool:
"""Whether this llama-cpp-python's create_chat_completion accepts
``stopping_criteria``. Older/newer versions differ: create_completion always
takes it, but several create_chat_completion builds do not, raising
'unexpected keyword argument'. Checked once via signature inspection."""
global _CHAT_SUPPORTS_STOPPING_CRITERIA
if _CHAT_SUPPORTS_STOPPING_CRITERIA is None:
supported = False
try:
import inspect
from llama_cpp import Llama as _L
sig = inspect.signature(_L.create_chat_completion)
supported = ("stopping_criteria" in sig.parameters
or any(p.kind == inspect.Parameter.VAR_KEYWORD
for p in sig.parameters.values()))
except Exception:
supported = False
_CHAT_SUPPORTS_STOPPING_CRITERIA = supported
return _CHAT_SUPPORTS_STOPPING_CRITERIA
try:
from llama_cpp import Llama
from llama_cpp.llama_chat_format import ChatFormatterResponse
......@@ -696,7 +720,11 @@ class VulkanBackend(ModelBackend):
self.n_ctx = 0 # 0 means use model's built-in default in llama.cpp
print("DEBUG: --no-ram mode: ignoring --n-ctx, using model default context size")
else:
n_ctx = kwargs.get('n_ctx', 2048)
# Accept either 'n_ctx' (models.json / GGUF) or 'ctx' (CLI / older
# configs); the manager passes both, but be robust to either alone.
n_ctx = kwargs.get('n_ctx')
if n_ctx is None:
n_ctx = kwargs.get('ctx', 2048)
self.n_ctx = n_ctx
# Set verbose
......@@ -775,13 +803,22 @@ class VulkanBackend(ModelBackend):
print(f"Error loading GGUF model: {e}")
raise
finally:
# Restore llama.cpp's default (quiet) logging after load
# Quiet logging after load — but DO NOT drop to NULL + GC the callback.
# ggml keeps the log-callback pointer and may still invoke it during
# generation (e.g. gemma's iSWA hybrid cache logs every step), so a
# garbage-collected ctypes callback becomes a use-after-free → SIGSEGV
# in libffi. Install a persistent no-op callback and keep a strong
# reference on self for the model's lifetime.
if _llama_cpp:
try:
_llama_cpp.llama_log_set(None, None)
@_llama_cpp.llama_log_callback
def _quiet_log_cb(level, text, user_data):
pass
_llama_cpp.llama_log_set(_quiet_log_cb, None)
self._log_cb = _quiet_log_cb # keep alive (prevents GC/UAF)
except Exception:
pass
_log_cb = None # release callback
self._log_cb = None
_log_cb = None # the verbose load-phase callback is no longer referenced
# Post-load layer/buffer summary
try:
......@@ -1278,7 +1315,7 @@ class VulkanBackend(ModelBackend):
if response_format and response_format.get('type') == 'json_object':
kwargs['response_format'] = {'type': 'json_object'}
_tc = _make_llama_thermal_criteria()
if _tc is not None:
if _tc is not None and _chat_supports_stopping_criteria():
kwargs['stopping_criteria'] = _tc
with self._gen_lock:
......@@ -1307,7 +1344,7 @@ class VulkanBackend(ModelBackend):
if stop:
kwargs['stop'] = stop
_tc = _make_llama_thermal_criteria()
if _tc is not None:
if _tc is not None and _chat_supports_stopping_criteria():
kwargs['stopping_criteria'] = _tc
prompt_tokens = 0
......
......@@ -264,6 +264,14 @@ class Config:
# a large-capacity volume when /tmp is small — 4× upscaling extracts many large
# frames and can exhaust a small /tmp ("No space left on device").
tmp_dir: Optional[str] = None
# Periodic cleanup of the temporary-working dir (above). A background janitor
# deletes entries older than tmp_cleanup_max_age_hours every
# tmp_cleanup_interval_minutes. Guards against runaway tmp growth from
# delete=False temp files left by interrupted generations. Only runs when a
# dedicated tmp_dir is configured (never prunes a bare system /tmp).
tmp_cleanup_enabled: bool = True
tmp_cleanup_max_age_hours: float = 24.0
tmp_cleanup_interval_minutes: float = 60.0
hf_chat_templates: list = field(default_factory=list)
reasoning_options: list = field(default_factory=list)
parser: str = "auto"
......@@ -422,6 +430,9 @@ class ConfigManager:
grammar_guided=config_data.get("grammar_guided", False),
file_path=config_data.get("file_path"),
tmp_dir=config_data.get("tmp_dir"),
tmp_cleanup_enabled=config_data.get("tmp_cleanup_enabled", True),
tmp_cleanup_max_age_hours=config_data.get("tmp_cleanup_max_age_hours", 24.0),
tmp_cleanup_interval_minutes=config_data.get("tmp_cleanup_interval_minutes", 60.0),
hf_chat_templates=config_data.get("hf_chat_templates", []),
reasoning_options=config_data.get("reasoning_options", []),
parser=config_data.get("parser", "auto")
......@@ -597,6 +608,9 @@ class ConfigManager:
"grammar_guided": self.config.grammar_guided,
"file_path": self.config.file_path,
"tmp_dir": self.config.tmp_dir,
"tmp_cleanup_enabled": self.config.tmp_cleanup_enabled,
"tmp_cleanup_max_age_hours": self.config.tmp_cleanup_max_age_hours,
"tmp_cleanup_interval_minutes": self.config.tmp_cleanup_interval_minutes,
"hf_chat_templates": self.config.hf_chat_templates,
"reasoning_options": self.config.reasoning_options,
"parser": self.config.parser
......
......@@ -339,6 +339,21 @@ def main():
except Exception as _e:
print(f"WARNING: could not use tmp dir '{_tmp_dir}': {_e} — using OS default")
# Periodically reclaim the dedicated tmp dir (abandoned delete=False scratch
# from interrupted generations). Only runs against a configured tmp_dir, never
# a bare system /tmp. Same mechanism works locally and inside the container.
if _tmp_dir and getattr(config, "tmp_cleanup_enabled", True):
try:
from codai.models.tmp_janitor import start as _start_tmp_janitor
_start_tmp_janitor(
_tmp_dir,
enabled=config.tmp_cleanup_enabled,
max_age_hours=getattr(config, "tmp_cleanup_max_age_hours", 24.0),
interval_minutes=getattr(config, "tmp_cleanup_interval_minutes", 60.0),
)
except Exception as _e:
print(f"WARNING: tmp janitor failed to start: {_e}")
# Apply cache directory overrides from config before any cache module is used.
# We set env vars AND patch huggingface_hub.constants in case the library was
# already imported (constants are computed once at import time from env vars).
......@@ -973,7 +988,9 @@ def main():
global_args.enhance_allow_ffmpeg = config.enhance.allow_ffmpeg
global_args.enhance_allow_rife_ncnn = config.enhance.allow_rife_ncnn
global_args.n_gpu_layers = config.vulkan.n_gpu_layers
global_args.n_ctx = [config.vulkan.n_ctx]
# The global fallback context window. Must be a plain int — it flows into the
# llama.cpp backend's n_ctx, which a list would break ('<' int vs list).
global_args.n_ctx = config.vulkan.n_ctx
global_args.vulkan_device = config.vulkan.device_id
global_args.vulkan_single_gpu = config.vulkan.single_gpu
global_args.image_sample_method = config.image.sample_method
......
......@@ -875,7 +875,7 @@ class MultiModelManager:
return self._get_least_busy_instance(self.default_model)
self._pending_new_instance.discard(self.default_model)
config = self.config.get(self.default_model, {})
config = self._config_for_model(self.default_model)
backend_type = self.model_backend_types.get(self.default_model, "auto")
try:
......@@ -902,8 +902,30 @@ class MultiModelManager:
return v
return default
ctx = _cfg_or_global('ctx', 'n_ctx')
# Context window. The per-model config stores it as 'n_ctx'
# (models.json), while older configs/CLI use 'ctx'. Read either,
# and pass BOTH kwarg names downstream: the GGUF/llama.cpp backend
# reads 'n_ctx', the transformers backend reads 'ctx'.
# Context window. The per-model runtime cfg stores it under 'ctx'
# (build_runtime_kwargs maps the entry's n_ctx → 'ctx'); 'n_ctx' is
# also accepted. The PER-MODEL value must win over the global
# vulkan.n_ctx fallback, so check the config keys first.
ctx = config.get('ctx')
if ctx is None:
ctx = config.get('n_ctx')
if ctx is None and _ga is not None:
ctx = getattr(_ga, 'n_ctx', None)
# Coerce to a positive int: a stray list/str (e.g. an old global
# default wrapped in a list) would otherwise reach llama.cpp and
# raise '<' int-vs-list at load.
if isinstance(ctx, (list, tuple)):
ctx = ctx[0] if ctx else None
try:
ctx = int(ctx) if ctx is not None else None
except (TypeError, ValueError):
ctx = None
if ctx:
kwargs['n_ctx'] = ctx
kwargs['ctx'] = ctx
n_gpu_layers = _cfg_or_global('n_gpu_layers', 'n_gpu_layers')
if n_gpu_layers is not None:
......@@ -974,7 +996,7 @@ class MultiModelManager:
return self._get_least_busy_instance(model_name)
self._pending_new_instance.discard(model_name)
config = self.config.get(model_name, {})
config = self._config_for_model(model_name)
backend_type = self.model_backend_types.get(model_name, "auto")
try:
......@@ -999,8 +1021,30 @@ class MultiModelManager:
return v
return default
ctx = _cfg_or_global('ctx', 'n_ctx')
# Context window. The per-model config stores it as 'n_ctx'
# (models.json), while older configs/CLI use 'ctx'. Read either,
# and pass BOTH kwarg names downstream: the GGUF/llama.cpp backend
# reads 'n_ctx', the transformers backend reads 'ctx'.
# Context window. The per-model runtime cfg stores it under 'ctx'
# (build_runtime_kwargs maps the entry's n_ctx → 'ctx'); 'n_ctx' is
# also accepted. The PER-MODEL value must win over the global
# vulkan.n_ctx fallback, so check the config keys first.
ctx = config.get('ctx')
if ctx is None:
ctx = config.get('n_ctx')
if ctx is None and _ga is not None:
ctx = getattr(_ga, 'n_ctx', None)
# Coerce to a positive int: a stray list/str (e.g. an old global
# default wrapped in a list) would otherwise reach llama.cpp and
# raise '<' int-vs-list at load.
if isinstance(ctx, (list, tuple)):
ctx = ctx[0] if ctx else None
try:
ctx = int(ctx) if ctx is not None else None
except (TypeError, ValueError):
ctx = None
if ctx:
kwargs['n_ctx'] = ctx
kwargs['ctx'] = ctx
n_gpu_layers = _cfg_or_global('n_gpu_layers', 'n_gpu_layers')
if n_gpu_layers is not None:
......@@ -1043,6 +1087,15 @@ class MultiModelManager:
inst_num = pool.count + 1 if pool else 1
print(f"Loading model on demand: {model_name}"
+ (f" (instance {inst_num})" if inst_num > 1 else ""))
# Evict resident models to make room before loading (idempotent —
# a no-op when request_model already freed enough). Guards the
# direct on-demand path, which otherwise loads on top of the
# current model and OOMs (e.g. switching to a larger model).
if inst_num == 1:
try:
self.ensure_vram_for(model_name)
except Exception as _ev_e:
print(f" (ensure_vram_for warning: {_ev_e})")
_snap = self.vram_before_load()
# Tell the backend how much VRAM this model is expected to need so
# it can decide whether Flash-Attention-2 is safe (FA2 requires the
......@@ -1212,6 +1265,37 @@ class MultiModelManager:
self.model_aliases[alias] = model_name
for model_type in self._registered_types_for(model_name):
self._remember_registered_type(alias, model_type)
def _config_for_model(self, name) -> dict:
"""Per-model config dict, tolerant of the id form the caller used.
``self.config`` is keyed by the registration id (usually the model's full
path), but on-demand loads often arrive as a *basename* (e.g.
``gemma-…​.gguf``). A bare ``self.config.get(basename)`` then misses and
returns ``{}``, so every per-model setting (n_ctx, flash_attn, parser,
cache quant, …) is silently dropped and global defaults are used. Resolve
through: exact id → alias map → basename / basename-without-extension."""
if not name:
return {}
cfg = self.config.get(name)
if cfg:
return cfg
target = self.model_aliases.get(name)
if target and target != name:
cfg = self.config.get(target)
if cfg:
return cfg
import os
base = os.path.basename(str(name))
base_noext = base[:-5] if base.endswith(".gguf") else base
for key, kcfg in self.config.items():
if not kcfg:
continue
kbase = os.path.basename(str(key))
kbase_noext = kbase[:-5] if kbase.endswith(".gguf") else kbase
if kbase == base or kbase_noext == base_noext:
return kcfg
return {}
def set_assigned_models(self, keys) -> None:
"""Restrict list_models() to the front-assigned subset (route-keys: alias /
......@@ -2564,7 +2648,12 @@ class MultiModelManager:
4. HuggingFace hub cache size (dense shards or largest GGUF), adjusted.
Returns 0 when the requirement cannot be determined.
"""
cfg = self.config.get(model_key, {})
# Resolve by basename/alias too — a model requested by basename would
# otherwise miss self.config (keyed by full path), return 0, and skip the
# eviction that makes room for it (→ OOM loading on top of a resident model).
cfg = self._config_for_model(model_key)
if not cfg and resolved_name:
cfg = self._config_for_model(resolved_name)
# Unwrap a forwarded `_raw_cfg` so we see the ORIGINAL model entry the
# same way the loaders do (build_kwargs_from_config only copies a few
# keys to the top level — component_quantization lives ONLY in _raw_cfg).
......
......@@ -1058,6 +1058,42 @@ def parse_gemma_native_tool_calls(text: str, tool_names=None):
return out
def parse_xml_wrapped_tool_calls(text: str, tool_names):
"""Parse ``<NAME>…</NAME>`` tool calls where NAME is a declared tool.
Some clients (Kilo/Cline/Roo-style) describe tools in the system prompt and
instruct the model to emit XML-tagged calls. Models then produce e.g.
``<bash>{"command": "ls"}</bash>`` (JSON args) or ``<bash><command>ls</command>
</bash>`` (nested XML params). Neither matches a model's native tool format,
so this recovers them into ``(name, args_dict)``. Restricted to real tool
names so ordinary tagged prose (``<thinking>`` …) isn't misread."""
if not text or not tool_names:
return []
out, seen = [], set()
for name in tool_names:
for m in re.finditer(rf'<{re.escape(name)}\s*>(.*?)</{re.escape(name)}\s*>',
text, re.DOTALL):
inner = m.group(1).strip()
args = None
if inner.startswith('{'):
try:
args = json.loads(inner)
except Exception:
args = None
if args is None:
params = re.findall(r'<(\w+)\s*>(.*?)</\1\s*>', inner, re.DOTALL)
if params:
args = {k: v.strip() for k, v in params}
if not isinstance(args, dict):
continue
key = (name, json.dumps(args, sort_keys=True, default=str))
if key in seen:
continue
seen.add(key)
out.append((name, args))
return out
# 7. GEMMA PARSER
class GemmaParser(BaseParser):
@validate_tool_output
......@@ -1082,6 +1118,14 @@ class GemmaParser(BaseParser):
except:
pass
# XML-tagged tool calls (<bash>{…}</bash>) emitted when the client (Kilo/
# Cline-style) prompts for XML tools rather than the model's native format.
if not results and self.tools:
for name, args in parse_xml_wrapped_tool_calls(text, set(self.tools.keys())):
results.append(self._to_oa(name, args))
if results:
return results
# Fallback: if no tool calls found, try using ToolCallParser
if not results:
tool_call_parser = ToolCallParser()
......
......@@ -134,6 +134,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
libsndfile1 \
libvulkan1 \
mesa-vulkan-drivers \
pciutils \
openssl \
&& rm -rf /var/lib/apt/lists/*
......
......@@ -4,39 +4,22 @@ ARG UBUNTU_VERSION=22.04
FROM scratch AS build_meta
COPY .packaging-cache/build-manifest.json /build-manifest.json
FROM nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION} AS runtime
# ─────────────────────────────────────────────────────────────────────────────
# assembler: stage the local bundle into /opt/coderai. The 27GB COPY of the
# bundle lives ONLY in this stage; the final image copies the assembled
# /opt/coderai once, so the bundle is never stored twice.
# ─────────────────────────────────────────────────────────────────────────────
FROM nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION} AS assembler
ARG PYTHON_VERSION=3.13.5
ARG PBS_RELEASE=20250612
ARG VENV_PYTHON_MINOR=3.13
ENV DEBIAN_FRONTEND=noninteractive \
PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin \
PYTHONUNBUFFERED=1 \
HF_HOME=/cache/huggingface \
HUGGINGFACE_HUB_CACHE=/cache/huggingface/hub \
TRANSFORMERS_CACHE=/cache/huggingface/transformers \
DIFFUSERS_CACHE=/cache/diffusers \
CODERAI_CONFIG_DIR=/config \
CODERAI_MODELS_DIR=/models \
CODERAI_CACHE_DIR=/cache \
CODERAI_HOST=0.0.0.0 \
CODERAI_PORT=8776
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && apt-get install -y --no-install-recommends \
ca-certificates \
curl \
ffmpeg \
git \
libgomp1 \
libgl1 \
libglib2.0-0 \
libsndfile1 \
libvulkan1 \
mesa-vulkan-drivers \
openssl \
rsync \
&& rm -rf /var/lib/apt/lists/*
ca-certificates curl rsync \
&& apt-get clean && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*.deb
RUN set -eux; \
curl -fsSL -o /tmp/python.tar.gz \
......@@ -46,14 +29,12 @@ RUN set -eux; \
rm /tmp/python.tar.gz; \
/opt/coderai/python/bin/python3 --version
ENV PYTHONHOME=/opt/coderai/python \
PATH=/opt/coderai/python/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
# BuildKit named context supplied by packaging/linux/build_oci_image.sh:
# --build-context local_bundle=/path/to/.packaging-cache/oci-venv-context
# The bundle contains the selected venv plus ldd-discovered native libraries from
# the local machine. GPU drivers are intentionally not bundled; NVIDIA Container
# Toolkit / host Vulkan ICDs remain the runtime contract.
# The bundle contains the selected venv plus ldd-discovered native libraries, the
# parler overlay, the isolated lip-sync venvs (+repos/weights), a standalone
# Python 3.10 for them, and the ds4 binary. GPU drivers are intentionally not
# bundled; the NVIDIA Container Toolkit / host Vulkan ICDs remain the contract.
COPY --from=local_bundle / /tmp/local-bundle/
RUN set -eux; \
......@@ -69,20 +50,107 @@ RUN set -eux; \
mkdir -p /opt/coderai/local-libs; \
rsync -a /tmp/local-bundle/local-libs/ /opt/coderai/local-libs/; \
fi; \
if [ -d /tmp/local-bundle/parler-venv/site-packages ]; then \
mkdir -p /opt/coderai/parler-venv/site-packages; \
rsync -a /tmp/local-bundle/parler-venv/site-packages/ /opt/coderai/parler-venv/site-packages/; \
fi; \
if [ -d /tmp/local-bundle/py310 ]; then \
mkdir -p /opt/coderai/py310; \
rsync -a /tmp/local-bundle/py310/ /opt/coderai/py310/; \
fi; \
for d in lipsync_venv Wav2Lip SadTalker ds4; do \
if [ -d "/tmp/local-bundle/$d" ]; then \
mkdir -p "/opt/coderai/$d"; \
rsync -a "/tmp/local-bundle/$d/" "/opt/coderai/$d/"; \
fi; \
done; \
cfg="/opt/coderai/lipsync_venv/pyvenv.cfg"; \
if [ -f "$cfg" ]; then \
sed -i 's|^home *=.*|home = /opt/coderai/py310/bin|; s|^command *=.*|command = /opt/coderai/py310/bin/python3.10|' "$cfg"; \
for p in python python3 python3.10; do ln -sf /opt/coderai/py310/bin/python3.10 "/opt/coderai/lipsync_venv/bin/$p"; done; \
fi; \
if [ -d /tmp/local-bundle/local-bin ]; then \
rsync -a /tmp/local-bundle/local-bin/ /usr/local/bin/; \
find /usr/local/bin -maxdepth 1 -type f -exec chmod +x '{}' \;; \
mkdir -p /opt/coderai/staged-local-bin; \
rsync -a /tmp/local-bundle/local-bin/ /opt/coderai/staged-local-bin/; \
fi; \
rm -rf /tmp/local-bundle; \
find /opt/coderai/python -type d \( -name __pycache__ -o -name tests -o -name test \) -prune -exec rm -rf '{}' +
find /opt/coderai -type d \( -name __pycache__ -o -name tests -o -name test \) -prune -exec rm -rf '{}' + || true
# ─────────────────────────────────────────────────────────────────────────────
# runtime: the shipped image. Copies the assembled tree once (no bundle dup).
# ─────────────────────────────────────────────────────────────────────────────
FROM nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION} AS runtime
ARG PYTHON_VERSION=3.13.5
# Note: PYTHONHOME / the python-prefixed PATH are deliberately NOT set here — that
# would hijack the system python3 during apt's python3-minimal post-install (the
# standalone interpreter is only COPYed in below). They're set after apt.
ENV DEBIAN_FRONTEND=noninteractive \
PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin \
PYTHONUNBUFFERED=1 \
HF_HOME=/cache/huggingface \
HUGGINGFACE_HUB_CACHE=/cache/huggingface/hub \
TRANSFORMERS_CACHE=/cache/huggingface/transformers \
DIFFUSERS_CACHE=/cache/diffusers \
CODERAI_CONFIG_DIR=/config \
CODERAI_MODELS_DIR=/models \
CODERAI_CACHE_DIR=/cache \
CODERAI_HOST=0.0.0.0 \
CODERAI_PORT=8776 \
CODERAI_LIPSYNC_VENV=/opt/coderai/lipsync_venv \
CODERAI_WAV2LIP_SRC=/opt/coderai/Wav2Lip \
CODERAI_WAV2LIP_DIR=/cache/lipsync/Wav2Lip \
CODERAI_SADTALKER_SRC=/opt/coderai/SadTalker \
CODERAI_SADTALKER_DIR=/cache/lipsync/SadTalker \
CODERAI_DS4_DIR=/cache/ds4
RUN apt-get update && apt-get install -y --no-install-recommends \
ca-certificates \
curl \
ffmpeg \
git \
libgomp1 \
libgl1 \
libglib2.0-0 \
libsndfile1 \
libvulkan1 \
mesa-vulkan-drivers \
vulkan-tools \
pciutils \
nginx \
supervisor \
openssl \
rsync \
&& apt-get clean && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*.deb
# The fully assembled CoderAI tree (Python + venvs + tools), copied once.
COPY --from=assembler /opt/coderai /opt/coderai
# Now the standalone interpreter exists, activate it for the app + launchers.
ENV PYTHONHOME=/opt/coderai/python \
PATH=/opt/coderai/python/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
WORKDIR /opt/coderai/app
COPY . /opt/coderai/app
COPY --from=build_meta /build-manifest.json /opt/coderai/BUILD-MANIFEST.json
COPY packaging/linux/launcher/coderai-oci /usr/local/bin/coderai
COPY packaging/linux/launcher/with-env /usr/local/bin/with-env
COPY packaging/linux/launcher/coderai-entrypoint /usr/local/bin/coderai-entrypoint
COPY packaging/linux/launcher/wav2lip /usr/local/bin/wav2lip
COPY packaging/linux/launcher/sadtalker /usr/local/bin/sadtalker
COPY packaging/linux/nginx.conf /etc/nginx/nginx.conf
COPY packaging/linux/supervisord.conf /etc/supervisor/supervisord.conf
COPY packaging/linux/README-RUN.txt /opt/coderai/README-RUN.txt
RUN set -eux; \
chmod +x /usr/local/bin/coderai /opt/coderai/app/coderai; \
if [ -d /opt/coderai/staged-local-bin ]; then \
rsync -a /opt/coderai/staged-local-bin/ /usr/local/bin/; \
find /usr/local/bin -maxdepth 1 -type f -exec chmod +x '{}' \;; \
rm -rf /opt/coderai/staged-local-bin; \
fi; \
chmod +x /usr/local/bin/coderai /usr/local/bin/with-env /usr/local/bin/coderai-entrypoint \
/usr/local/bin/wav2lip /usr/local/bin/sadtalker /opt/coderai/app/coderai; \
mkdir -p /config /models /cache /opt/coderai/app/models; \
rm -rf \
/opt/coderai/app/.git \
......@@ -101,5 +169,7 @@ if missing:
PY
VOLUME ["/config", "/models", "/cache"]
# Single published port: nginx fronts the main server (/) and the tool web UIs
# (/editor/, /videogen/, /township/).
EXPOSE 8776
ENTRYPOINT ["/usr/local/bin/coderai"]
ENTRYPOINT ["/usr/local/bin/coderai-entrypoint"]
......@@ -25,3 +25,127 @@ AMD/Intel Vulkan:
CPU:
No GPU setup is required.
================================================================================
Running the Docker / OCI image
================================================================================
The image publishes ONE port (8776). nginx inside the container fronts:
http://HOST:8776/ CoderAI server + OpenAI-compatible API + admin UI
http://HOST:8776/editor/ Video editor
http://HOST:8776/videogen/ Videogen studio
http://HOST:8776/township/ Township fighters
Three volumes hold all mutable state (everything else in the image is read-only):
/config app config + auth (small)
/models model storage / data path
/cache Hugging Face/diffusers caches + tool outputs (LARGE)
The examples below run the container as YOUR user (recommended). Create and own
the state dirs once, up front:
mkdir -p coderai-config coderai-models coderai-cache
sudo chown -R "$(id -u):$(id -g)" coderai-config coderai-models coderai-cache
Basic run (NVIDIA):
docker run --gpus all --ipc=host -p 8776:8776 \
--user "$(id -u):$(id -g)" \
-v "$PWD/coderai-config:/config" \
-v "$PWD/coderai-models:/models" \
-v "$PWD/coderai-cache:/cache" \
coderai:local
AMD/Intel Vulkan: replace `--gpus all` with `--device /dev/dri`.
CPU only: drop the GPU flag entirely.
Run as container-root instead: just omit the `--user` line (see "Running as a
non-root user" below for rootless / userns-remap alternatives).
External storage for /models and /cache
----------------------------------------
/models and /cache are where the big data lives, so put them on your large
storage and bind-mount them onto the defaults. The in-container paths never
change — only the host side does.
1) Host-mounted big disk / SAN (a path already mounted on the host):
# Make the big-storage dirs owned by the UID you run as:
sudo chown -R "$(id -u):$(id -g)" /srv/coderai/config \
/mnt/bigstorage/coderai/models /mnt/bigstorage/coderai/cache
docker run --gpus all --ipc=host -p 8776:8776 \
--user "$(id -u):$(id -g)" \
-v /srv/coderai/config:/config \
-v /mnt/bigstorage/coderai/models:/models \
-v /mnt/bigstorage/coderai/cache:/cache \
coderai:local
The launcher points HF_HOME at /cache/huggingface and writes tool outputs to
/cache/{videogen_output,township_output}, so /cache on the big disk captures
downloads AND produced artifacts.
2) NFS (shared across machines) — back a Docker volume with the NFS driver:
docker volume create --driver local \
--opt type=nfs --opt o=addr=10.0.0.5,rw,nfsvers=4 \
--opt device=:/export/coderai/models coderai-models
docker volume create --driver local \
--opt type=nfs --opt o=addr=10.0.0.5,rw,nfsvers=4 \
--opt device=:/export/coderai/cache coderai-cache
docker run --gpus all --ipc=host -p 8776:8776 \
--user "$(id -u):$(id -g)" \
-v "$PWD/coderai-config:/config" \
-v coderai-models:/models \
-v coderai-cache:/cache \
coderai:local
(SMB/CIFS works the same way with `--opt type=cifs` and credentials.)
For NFS, the export must let the UID you pass to --user write (e.g. map it,
or set the right ownership on the export); don't rely on no_root_squash.
Performance note: NFS/CIFS are fine as a model LIBRARY, but mmap-heavy weight
loads and KV-cache spill are much faster on local NVMe or a fast SAN. Keep the
active inference weights on fast storage if you can.
Running as a non-root user
--------------------------
The image works as root (PID 1 sets up nginx + the services) AND as an arbitrary
UID. nginx pid/temp and the supervisor socket live under /tmp, logs go to
stdout/stderr, and Python doesn't write .pyc, so no part of the runtime needs to
write outside the mounted volumes.
Option A — run as your own UID/GID (recommended for bind mounts):
# The mounted dirs must be owned by that UID so the app can write to them:
mkdir -p coderai-config coderai-models coderai-cache
sudo chown -R "$(id -u):$(id -g)" coderai-config coderai-models coderai-cache
docker run --gpus all --ipc=host -p 8776:8776 \
--user "$(id -u):$(id -g)" \
-v "$PWD/coderai-config:/config" \
-v "$PWD/coderai-models:/models" \
-v "$PWD/coderai-cache:/cache" \
coderai:local
Caveat: with --user, the in-image standalone Python and app tree stay
root-owned but world-readable, which is all the runtime needs. For NFS, the
export must allow that UID to write (no_root_squash is NOT required when you
pass a real --user; map/allow the UID you run as).
Option B — keep container-root but map it to an unprivileged host UID, with no
image changes. Best when you don't want to manage UIDs/ownership by hand:
* Rootless Docker (run the daemon as a normal user), or
* userns-remap: add { "userns-remap": "default" } to
/etc/docker/daemon.json and restart Docker. Container root (UID 0) then maps
to a high, unprivileged host subordinate UID automatically.
In both cases run the normal command (no --user needed); the container thinks
it is root, but the kernel sees an unprivileged user on the host.
GPU + non-root: NVIDIA Container Toolkit and /dev/dri both work under --user and
under rootless/userns-remap; no extra flags are needed beyond the usual
--gpus all (NVIDIA) or --device /dev/dri (Vulkan).
......@@ -17,6 +17,22 @@ INCLUDE_LOCAL_LIBS=1
AUTO_LOCAL_BINS=1
LOCAL_BINARIES=()
LOCAL_BINARY_DIRS=()
# Optional second venv for Parler-TTS (pinned transformers 4.46). Bundled as an
# overlay whose site-packages is prepended to PYTHONPATH at runtime, shadowing the
# main stack while torch/etc resolve from it underneath.
PARLER_VENV="${CODERAI_PARLER_VENV:-$HOME/.coderai/parler_venv}"
INCLUDE_PARLER=1
# Isolated lip-sync tools (Python 3.10 venvs + repos + weights) and the ds4 native
# engine. Bundled so the image replicates the local install. ds4 DeepSeek-V4 GGUF
# weights are NOT bundled (multi-GB, runtime-downloaded into a volume).
INCLUDE_TOOLS=1
# One shared Python 3.10 venv serves both wav2lip and sadtalker (identical torch),
# halving the torch footprint. Repo code is bundled WITHOUT model weights — those
# download on first lip-sync use into the /cache volume.
LIPSYNC_VENV="${CODERAI_LIPSYNC_VENV:-$HOME/.coderai/lipsync_venv}"
WAV2LIP_DIR="${CODERAI_WAV2LIP_SRC:-$HOME/.coderai/Wav2Lip}"
SADTALKER_DIR="${CODERAI_SADTALKER_SRC:-$HOME/.coderai/SadTalker}"
DS4_DIR="${CODERAI_DS4_DIR:-$HOME/.coderai/ds4}"
usage() {
cat <<'EOF'
......@@ -37,6 +53,10 @@ Options:
--include-local-dir PATH
Copy executable files from a local build directory, including ldd libs.
Can be repeated. Useful for local whisper.cpp build/bin directories.
--parler-venv PATH Bundle this Parler-TTS venv as an overlay (default:
$CODERAI_PARLER_VENV or ~/.coderai/parler_venv if present).
--no-parler Do not bundle the Parler-TTS venv overlay.
--no-tools Do not bundle the lip-sync (wav2lip/sadtalker) venvs or ds4.
-t, --tag TAG Image tag to create (default: coderai:local or OCI_IMAGE from versions.env).
-h, --help Show this help.
......@@ -77,6 +97,24 @@ while [[ $# -gt 0 ]]; do
AUTO_LOCAL_BINS=0
shift
;;
--parler-venv)
BUILD_MODE="venv"
if [[ $# -lt 2 ]]; then
echo "Error: --parler-venv requires a path" >&2
exit 2
fi
PARLER_VENV="$2"
INCLUDE_PARLER=1
shift 2
;;
--no-parler)
INCLUDE_PARLER=0
shift
;;
--no-tools)
INCLUDE_TOOLS=0
shift
;;
--include-local-bin)
BUILD_MODE="venv"
if [[ $# -lt 2 ]]; then
......@@ -184,6 +222,8 @@ discover_local_binaries() {
"$HOME/whisper.cpp/build/bin/server"
"/usr/local/bin/ds4-server"
"${CODERAI_DS4_DIR:-$HOME/.coderai/ds4}/ds4-server"
"/usr/local/bin/rife-ncnn-vulkan"
"$HOME/.local/bin/rife-ncnn-vulkan"
)
local path
for path in "${candidates[@]}"; do
......@@ -231,6 +271,50 @@ prepare_venv_bundle() {
printf ' %s\n' "${LOCAL_BINARIES[@]}"
fi
# Parler-TTS overlay venv. Only its site-packages is needed: it was created with
# --system-site-packages, so it holds just the pinned overrides (transformers
# 4.46, parler_tts, tokenizers, ...); torch/numpy resolve from the main venv.
if [[ "$INCLUDE_PARLER" == "1" && -n "$PARLER_VENV" && -d "$PARLER_VENV" ]]; then
local parler_sp
parler_sp="$PARLER_VENV/lib/python${VENV_PYTHON_MINOR}/site-packages"
if [[ -d "$parler_sp" ]]; then
mkdir -p "$bundle/parler-venv/site-packages"
rsync -a --delete "$parler_sp/" "$bundle/parler-venv/site-packages/"
echo "Bundled Parler-TTS overlay from: $parler_sp"
else
echo "Warning: --parler-venv given but no site-packages at $parler_sp (skipping)" >&2
fi
fi
# Isolated lip-sync tools + ds4 engine. The two venvs share one standalone
# Python 3.10 (read from a venv's pyvenv.cfg `home`); it's bundled once and the
# venvs are re-pointed at it during the image build.
if [[ "$INCLUDE_TOOLS" == "1" ]]; then
local py310_dir=""
if [[ -f "$LIPSYNC_VENV/pyvenv.cfg" ]]; then
local home_bin
home_bin="$(sed -n 's/^home *= *//p' "$LIPSYNC_VENV/pyvenv.cfg" | head -1)"
[[ -n "$home_bin" ]] && py310_dir="$(dirname "$home_bin")"
fi
if [[ -n "$py310_dir" && -d "$py310_dir" ]]; then
mkdir -p "$bundle/py310"
rsync -a "$py310_dir/" "$bundle/py310/"
echo "Bundled standalone Python 3.10 from: $py310_dir"
else
echo "Warning: could not locate the py3.10 interpreter for the lip-sync venv" >&2
fi
local _venv_excl=(--exclude '__pycache__' --exclude '*.pyc' --exclude 'pip/' --exclude '*.dist-info/RECORD')
if [[ -d "$LIPSYNC_VENV" ]]; then rsync -a "${_venv_excl[@]}" "$LIPSYNC_VENV/" "$bundle/lipsync_venv/"; echo "Bundled shared lip-sync venv"; fi
# Repo CODE ONLY — checkpoints/weights are excluded and download at runtime.
if [[ -d "$WAV2LIP_DIR" ]]; then rsync -a --exclude 'checkpoints/' --exclude 'face_detection/detection/sfd/*.pth' "$WAV2LIP_DIR/" "$bundle/Wav2Lip/"; echo "Bundled Wav2Lip code (no weights)"; fi
if [[ -d "$SADTALKER_DIR" ]]; then rsync -a --exclude 'checkpoints/*' --exclude 'gfpgan/weights/*' "$SADTALKER_DIR/" "$bundle/SadTalker/"; echo "Bundled SadTalker code (no weights)"; fi
# ds4: binary + scripts, minus any downloaded multi-GB GGUF weights.
if [[ -d "$DS4_DIR" ]]; then
rsync -a --exclude 'gguf/' --exclude '*.gguf' --exclude '*.gguf.*' "$DS4_DIR/" "$bundle/ds4/"
echo "Bundled ds4 (binary + scripts, no weights)"
fi
fi
if [[ "$include_libs" != "1" ]]; then
return 0
fi
......@@ -245,6 +329,7 @@ bundle = Path(os.environ["VENV_BUNDLE"])
venv = Path(os.environ["VENV_PATH_FOR_LDD"])
local_libs = bundle / "local-libs"
local_bin = bundle / "local-bin"
parler_sp = bundle / "parler-venv" / "site-packages"
skip_prefixes = (
"/lib/ld-linux",
......@@ -278,7 +363,7 @@ skip_starts = (
)
candidates = []
for root in (venv / "lib", venv / "bin", local_bin):
for root in (venv / "lib", venv / "bin", local_bin, parler_sp):
if not root.exists():
continue
for path in root.rglob("*"):
......@@ -415,13 +500,27 @@ cat <<EOF
Built $IMAGE_TAG
Run examples:
Run examples (run as your own UID; create+own the dirs first):
mkdir -p coderai-config coderai-models coderai-cache
sudo chown -R "\$(id -u):\$(id -g)" coderai-config coderai-models coderai-cache
NVIDIA:
$DOCKER_BIN run --gpus all --ipc=host -p 8776:8776 -v "\$PWD/coderai-config:/config" -v "\$PWD/coderai-models:/models" -v "\$PWD/coderai-cache:/cache" $IMAGE_TAG
$DOCKER_BIN run --gpus all --ipc=host -p 8776:8776 --user "\$(id -u):\$(id -g)" -v "\$PWD/coderai-config:/config" -v "\$PWD/coderai-models:/models" -v "\$PWD/coderai-cache:/cache" $IMAGE_TAG
AMD/Intel Vulkan:
$DOCKER_BIN run --device /dev/dri --ipc=host -p 8776:8776 -v "\$PWD/coderai-config:/config" -v "\$PWD/coderai-models:/models" -v "\$PWD/coderai-cache:/cache" $IMAGE_TAG
$DOCKER_BIN run --device /dev/dri --ipc=host -p 8776:8776 --user "\$(id -u):\$(id -g)" -v "\$PWD/coderai-config:/config" -v "\$PWD/coderai-models:/models" -v "\$PWD/coderai-cache:/cache" $IMAGE_TAG
CPU:
$DOCKER_BIN run --ipc=host -p 8776:8776 -v "\$PWD/coderai-config:/config" -v "\$PWD/coderai-models:/models" -v "\$PWD/coderai-cache:/cache" $IMAGE_TAG
$DOCKER_BIN run --ipc=host -p 8776:8776 --user "\$(id -u):\$(id -g)" -v "\$PWD/coderai-config:/config" -v "\$PWD/coderai-models:/models" -v "\$PWD/coderai-cache:/cache" $IMAGE_TAG
(Drop --user to run as container-root, or use rootless/userns-remap Docker.)
One published port (8776) fronts everything via nginx:
/ server+API+admin /editor/ video editor /videogen/ studio /township/ fighters
External storage: point /models and /cache at a big disk or NFS volume —
-v /mnt/bigstorage/coderai/models:/models -v /mnt/bigstorage/coderai/cache:/cache
Non-root: add --user "\$(id -u):\$(id -g)" (mounts must be owned by that UID),
or use rootless/userns-remap Docker with no extra flags.
See packaging/linux/README-RUN.txt (also at /opt/coderai/README-RUN.txt in the image).
EOF
......@@ -72,4 +72,8 @@ if changed:
PY
fi
# Point the server at the shared dedicated temp dir so its janitor prunes it.
if [ -n "${CODERAI_TMP:-}" ]; then
exec /opt/coderai/python/bin/python3 /opt/coderai/app/coderai --config "$CONFIG_DIR" --tmp "$CODERAI_TMP" "$@"
fi
exec /opt/coderai/python/bin/python3 /opt/coderai/app/coderai --config "$CONFIG_DIR" "$@"
......@@ -4709,6 +4709,48 @@ def pick_model(client: CoderAIClient, kind: str, override: str = None) -> str:
# Web UI
# ─────────────────────────────────────────────────────────────────────────────
# App route roots that appear as server-rendered URLs and JS fetch targets. Used
# to make the UI work behind a reverse-proxy sub-path mount (e.g. /township/).
_MOUNT_ROUTES = ("media", "api", "matches", "match", "characters",
"environments", "wardrobe", "prompts", "stream", "stop",
"job", "favicon.ico")
def _mount_html(html: str, prefix: str) -> str:
"""Rewrite a server-rendered page so it works under reverse-proxy sub-path
``prefix`` (e.g. '/township'). Prepends the prefix to app-route URLs in HTML
attributes and injects a fetch/EventSource shim so JS calls are prefixed too.
Idempotent: already-prefixed URLs are not matched again."""
import re as _re
if not prefix:
return html
routes = "|".join(_MOUNT_ROUTES)
# 1) Attribute URLs: href/src/action/poster/value/data-* pointing at a route.
attr_re = _re.compile(
r'((?:href|src|action|poster|value|data-src|data-url)\s*=\s*["\'])'
r'(/(?:' + routes + r')\b)')
html = attr_re.sub(lambda m: m.group(1) + prefix + m.group(2), html)
# 2) Home/nav link to bare root: href="/" -> href="<prefix>/".
html = _re.sub(r'(href\s*=\s*(["\']))/\2',
lambda m: m.group(1) + prefix + '/' + m.group(2), html)
# 3) JS shim: prefix root-absolute fetch()/EventSource() URLs at call time.
if "/*coderai-mount*/" in html:
return html
shim = (
"<script>/*coderai-mount*/(function(){var P=" + repr(prefix) + ";if(!P)return;"
"function fix(u){return (typeof u==='string'&&u.charAt(0)==='/'"
"&&u.charAt(1)!=='/'&&u.indexOf(P+'/')!==0&&u!==P)?P+u:u;}"
"var of=window.fetch.bind(window);window.fetch=function(u,o){return of(fix(u),o);};"
"var OE=window.EventSource;if(OE){var NE=function(u,o){return new OE(fix(u),o);};"
"NE.prototype=OE.prototype;window.EventSource=NE;}})();</script>"
)
if "</head>" in html:
html = html.replace("</head>", shim + "</head>", 1)
else:
html = shim + html
return html
def launch_web_ui(default_args):
"""Launch a local web interface for Township Fighters content generation.
......@@ -9152,8 +9194,27 @@ async function resetPrompts(ev){
except (BrokenPipeError, ConnectionResetError, ConnectionAbortedError):
pass
def _public_prefix(self):
"""Reverse-proxy sub-path mount prefix (e.g. '/township'), or ''."""
p = (self.headers.get("X-Forwarded-Prefix")
or self.headers.get("X-Script-Name") or "")
p = p.strip().rstrip("/")
return p if p.startswith("/") else (("/" + p) if p else "")
def _route(self, path):
"""Strip the forwarded prefix so internal routing is mount-agnostic
whether or not nginx already stripped it."""
pref = self._public_prefix()
if pref and (path == pref or path.startswith(pref + "/")):
path = path[len(pref):] or "/"
return path
def _send(self, code, ctype, body):
if isinstance(body, str): body = body.encode()
if "text/html" in ctype:
pref = self._public_prefix()
if pref:
body = _mount_html(body.decode("utf-8", "replace"), pref).encode("utf-8")
self.send_response(code)
self.send_header("Content-Type", ctype)
self.send_header("Content-Length", str(len(body)))
......@@ -9163,7 +9224,7 @@ async function resetPrompts(ev){
def do_GET(self):
parsed = urllib.parse.urlparse(self.path)
path = parsed.path.rstrip("/") or "/"
path = self._route(parsed.path).rstrip("/") or "/"
if path == "/favicon.ico":
# Bundled icon next to this script (tools/assets/favicon.ico).
......@@ -9343,7 +9404,7 @@ async function resetPrompts(ev){
def do_POST(self):
parsed = urllib.parse.urlparse(self.path)
path = parsed.path
path = self._route(parsed.path)
if path == "/stop":
_state["abort"].set()
......
......@@ -1102,7 +1102,11 @@ HTML_PAGE = r"""
let models=[], profiles={characters:[], environments:[], voices:[], loras:[]};
function $(id){return document.getElementById(id)}
function esc(s){return String(s||'').replace(/[&<>"']/g,m=>({'&':'&amp;','<':'&lt;','>':'&gt;','"':'&quot;',"'":'&#39;'}[m]))}
async function api(path, opts={}){let r=await fetch(path,{headers:{'Content-Type':'application/json'},...opts}); if(!r.ok) throw new Error(await r.text()); return await r.json()}
const PREFIX="__ROOT_PREFIX__";
// Prefix app-local absolute paths (/api, /stream, /media) so they resolve under a
// reverse-proxy sub-path mount; leaves absolute URLs (http...) and others untouched.
function U(p){return (p && p.charAt(0)==='/') ? PREFIX+p : p}
async function api(path, opts={}){let r=await fetch(U(path),{headers:{'Content-Type':'application/json'},...opts}); if(!r.ok) throw new Error(await r.text()); return await r.json()}
function fillSelect(sel, cap, def){let s=$(sel); s.innerHTML=''; let filtered=models.filter(m=>(m.capabilities||[]).includes(cap)); if(!filtered.length) filtered=models; for(let m of filtered){let o=document.createElement('option'); o.value=m.id; o.textContent=m.id; if(m.id===def) o.selected=true; s.appendChild(o)}}
async function loadModels(){let d=await api('/api/models'); models=d.models||[]; fillSelect('image_model','image_generation',d.defaults.image_model); fillSelect('video_model','video_generation',d.defaults.video_model); fillSelect('audio_model','audio_generation',d.defaults.audio_model); $('conn').textContent=`Connected: ${models.length} model(s)`}
async function loadProfiles(){profiles=await api('/api/profiles'); renderProfiles()}
......@@ -1133,9 +1137,9 @@ function addDialogue(btn){let box=btn.closest('.clip').querySelector('.dialogues
function selected(sel){return [...sel.selectedOptions].map(o=>o.value)}
function collectMovie(){let clips=[...document.querySelectorAll('.clip')].map(c=>({title:c.querySelector('.c_title').value,prompt:c.querySelector('.c_prompt').value,characters:selected(c.querySelector('.c_chars')),environments:selected(c.querySelector('.c_envs')),camera_motion:c.querySelector('.c_camera').value,action:c.querySelector('.c_action').value,speech_text:c.querySelector('.c_speech').value,speech_voice:c.querySelector('.c_voice').value,speech_speed:c.querySelector('.c_speed').value,lip_sync:c.querySelector('.c_lipsync').checked,lip_sync_method:$('lip_sync_method').value,music_prompt:c.querySelector('.c_music').value,sfx_prompt:c.querySelector('.c_sfx').value,dialogues:[...c.querySelectorAll('.dialogue')].map(d=>({character:d.querySelector('.d_char').value,voice:d.querySelector('.d_voice').value,text:d.querySelector('.d_text').value,start_time:d.querySelector('.d_start').value,speed:d.querySelector('.d_speed').value,lip_sync:c.querySelector('.c_lipsync').checked}))})); return {title:$('title').value,style:$('style').value,image_model:$('image_model').value,video_model:$('video_model').value,audio_model:$('audio_model').value,default_voice:$('default_voice').value,lip_sync_method:$('lip_sync_method').value,width:+$('width').value,height:+$('height').value,fps:+$('fps').value,num_frames:+$('num_frames').value,steps:+$('steps').value,guidance_scale:+$('guidance_scale').value,negative_prompt:$('negative_prompt').value,use_keyframes:$('use_keyframes').checked,soundtrack_prompt:$('soundtrack_prompt').value,loras:selected($('movie_loras')).map(n=>({name:n,weight:+$('movie_lora_weight').value})),lora_weight:+$('movie_lora_weight').value,movie_count:+$('movie_count').value,clips}}
async function startMovie(){let d=await api('/api/movie/start',{method:'POST',body:JSON.stringify(collectMovie())}); watchJob(d.job_id)}
async function watchJob(id){$('jobout').innerHTML=`<p>Job <span class="pill">${id}</span></p>`; let timer=setInterval(async()=>{let j=await api('/api/job/'+id); $('jobout').innerHTML=`<p><span class="pill">${esc(j.status)}</span> ${j.progress||0}% ${esc(j.message||'')}</p>`+(j.output_url?`<p><a href="${j.output_url}" target="_blank">Open output</a></p>`:'')+(j.error?`<p style="color:var(--bad)">${esc(j.error)}</p>`:''); if(j.status==='done'||j.status==='error'){clearInterval(timer); loadProfiles(); loadGallery()}},1500)}
async function loadGallery(){let d=await api('/api/gallery'); $('gallery_grid').innerHTML=(d.items||[]).map(it=>`<div class="profile">${it.type==='video'?`<video src="${it.url}" controls style="width:100%;height:130px;background:#000"></video>`:`<img src="${it.url}">`}<div class="p"><b>${esc(it.name)}</b><br><a href="${it.url}" target="_blank">open</a></div></div>`).join('')||'<div class="muted">No media yet.</div>'}
function connectLog(){let es=new EventSource('/stream'); es.onmessage=e=>{let l=$('log'); l.textContent+=e.data+'\n'; l.scrollTop=l.scrollHeight}}
async function watchJob(id){$('jobout').innerHTML=`<p>Job <span class="pill">${id}</span></p>`; let timer=setInterval(async()=>{let j=await api('/api/job/'+id); $('jobout').innerHTML=`<p><span class="pill">${esc(j.status)}</span> ${j.progress||0}% ${esc(j.message||'')}</p>`+(j.output_url?`<p><a href="${U(j.output_url)}" target="_blank">Open output</a></p>`:'')+(j.error?`<p style="color:var(--bad)">${esc(j.error)}</p>`:''); if(j.status==='done'||j.status==='error'){clearInterval(timer); loadProfiles(); loadGallery()}},1500)}
async function loadGallery(){let d=await api('/api/gallery'); $('gallery_grid').innerHTML=(d.items||[]).map(it=>`<div class="profile">${it.type==='video'?`<video src="${U(it.url)}" controls style="width:100%;height:130px;background:#000"></video>`:`<img src="${U(it.url)}">`}<div class="p"><b>${esc(it.name)}</b><br><a href="${U(it.url)}" target="_blank">open</a></div></div>`).join('')||'<div class="muted">No media yet.</div>'}
function connectLog(){let es=new EventSource(U('/stream')); es.onmessage=e=>{let l=$('log'); l.textContent+=e.data+'\n'; l.scrollTop=l.scrollHeight}}
document.querySelectorAll('.tab').forEach(t=>t.onclick=()=>{document.querySelectorAll('.tab,.section').forEach(x=>x.classList.remove('active')); t.classList.add('active'); $(t.dataset.tab).classList.add('active')})
loadModels().then(loadProfiles).then(()=>addClip()); loadGallery(); connectLog();
</script>
......@@ -1171,12 +1175,30 @@ def make_handler(app: VideoGenApp):
return {}
return json.loads(self.rfile.read(n).decode("utf-8"))
# -- reverse-proxy helpers (sub-path mounting) -------------------- #
def _public_prefix(self) -> str:
"""Path prefix this app is mounted under, per reverse-proxy headers.
Returns e.g. '/videogen' (no trailing slash) or '' at root."""
p = (self.headers.get("X-Forwarded-Prefix")
or self.headers.get("X-Script-Name") or "")
p = p.strip().rstrip("/")
return p if p.startswith("/") else (("/" + p) if p else "")
def _route(self, path: str) -> str:
"""Strip the forwarded prefix so internal routing is mount-agnostic
whether or not nginx already stripped it."""
pref = self._public_prefix()
if pref and (path == pref or path.startswith(pref + "/")):
path = path[len(pref):] or "/"
return path
def do_GET(self) -> None:
parsed = urllib.parse.urlparse(self.path)
path = parsed.path
path = self._route(parsed.path)
try:
if path == "/":
self._send(200, HTML_PAGE.encode("utf-8"), "text/html; charset=utf-8")
html = HTML_PAGE.replace("__ROOT_PREFIX__", self._public_prefix())
self._send(200, html.encode("utf-8"), "text/html; charset=utf-8")
elif path == "/api/models":
self._json(app.models_payload())
elif path == "/api/profiles":
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment