Almost all ready

9d023ec2 · Stefy Lanza (nextime / spora ) · c741ff5b · 9d023ec2 · 9d023ec2 · 9d023ec2
Commit 9d023ec2 authored Jun 18, 2026 by Stefy Lanza (nextime / spora )
16 changed files
--- a/.dockerignore
+++ b/.dockerignore
@@ -21,6 +21,18 @@ township_output
 dist
 dist-package
 *.log
+tmp
+debug.log
+CoderAI.gif
+
+# Produced artifacts and tool session/output dirs (mounted as volumes at runtime,
+# never baked into the image)
+video_editor/sessions
+video_editor.config.json
+tools/videogen_output
+tools/township_output
+tools/coderai_media
+samples

 # Build outputs
 build

--- a/README.md
+++ b/README.md
@@ -4,7 +4,7 @@

 ![CoderAI](CoderAI.gif)

-An OpenAI-compatible API server to run models on your local GPU with web administration dashboard, supporting multiple GPU backends: NVIDIA (CUDA), AMD (Vulkan), and Intel (Vulkan). Configuration-driven architecture with per-model settings and full multi-modal support.
+A multimodal and multi-backend local model orchestrator with an OpenAI-compatible API server to run models on local GPUs, supporting multiple GPU backends: NVIDIA (CUDA), AMD (Vulkan), and Intel (Vulkan). Configuration-driven architecture with per-model settings and full multi-modal support.

 ## Features


--- a/codai/admin/routes.py
+++ b/codai/admin/routes.py
@@ -1420,6 +1420,37 @@ def _scan_caches() -> dict:
                "configs": all_configs.get(path, []),
            })

+    # Add configured non-GGUF HF models whose files have been evicted from disk
+    # (e.g. via "Free disk"). They are absent from the HF cache scan above, so
+    # surface them here as missing so they keep a Re-download button.
+    from codai.models.cache import is_huggingface_model_id
+    existing_hf_ids = {m["id"] for m in result["hf"]}
+    for path, (settings, mtype) in configured_settings.items():
+        if path in existing_hf_ids:
+            continue
+        s = settings if isinstance(settings, dict) else {}
+        if s.get("backend") == "whisper-server":
+            continue
+        # Only HF-style repo IDs (owner/repo) — skip local paths and GGUF files
+        if os.path.isabs(path) or path.endswith('.gguf') or not is_huggingface_model_id(path):
+            continue
+        # A real local relative path that still exists isn't an evicted model
+        if os.path.exists(path):
+            continue
+        caps = s.get("capabilities") or detect_model_capabilities(path).to_list()
+        result["hf"].append({
+            "id": path,
+            "size_gb": 0, "size_bytes": 0, "revision_count": 0,
+            "files": [], "file_count": 0,
+            "in_config": True, "missing": True,
+            "source_repo": path,
+            "model_type": mtype if mtype and mtype != "gguf_models" else "text_models",
+            "settings": s,
+            "capabilities": caps,
+            "incomplete": False,
+            "configs": all_configs.get(path, []),
+        })
+
    return result


@@ -1613,6 +1644,96 @@ async def api_delete_cached_model(
    return await asyncio.to_thread(_do_delete_model, model_id, cache_type)


+@router.post("/admin/api/model-free-disk", summary="Delete a model's files but keep its config")
+async def api_model_free_disk(request: Request, username: str = Depends(require_admin)):
+    """Reclaim disk space by deleting a model's files while keeping its
+    models.json entry, so it can be re-downloaded on demand. The source repo is
+    persisted onto the config entry first so the Re-download button has a target
+    once the file is gone."""
+    if config_manager is None:
+        raise HTTPException(status_code=503, detail="Config manager not initialized")
+    import os as _os, asyncio
+    data = await request.json()
+    path = (data.get("path") or data.get("model_id") or "").strip()
+    cache_type = data.get("cache_type", "gguf")
+    source_repo = (data.get("source_repo") or "").strip()
+    if not path:
+        raise HTTPException(status_code=400, detail="path is required")
+
+    # Persist source_repo onto the matching config entries so re-download works
+    # after the file is deleted (flat GGUF files retain no HF repo info on disk).
+    # Skip when the entry key already IS the repo id (HF models re-download by id).
+    if source_repo and source_repo != path:
+        fname = _os.path.basename(path) if ("/" in path or _os.sep in path) else ""
+        changed = False
+        for cat in ("text_models", "image_models", "audio_models",
+                    "gguf_models", "tts_models", "vision_models", "video_models",
+                    "audio_gen_models", "embedding_models", "spatial_models"):
+            lst = config_manager.models_data.get(cat, [])
+            for i, m in enumerate(lst):
+                key = m if isinstance(m, str) else (m.get("path") or m.get("id") or "")
+                if key == path or (fname and _os.path.basename(key) == fname):
+                    if isinstance(m, str):
+                        lst[i] = {"path": m, "source_repo": source_repo}
+                        changed = True
+                    elif not m.get("source_repo"):
+                        m["source_repo"] = source_repo
+                        changed = True
+        if changed:
+            config_manager.save_models()
+
+    result = await asyncio.to_thread(_do_delete_model, path, cache_type)
+    _broker_notify_models_updated(request)
+    return result
+
+
+@router.post("/admin/api/model-add-known", summary="Register a model in config without downloading")
+async def api_model_add_known(request: Request, username: str = Depends(require_admin)):
+    """Add a model to models.json as a known-but-not-downloaded reference.
+
+    The model then appears in the model list as "missing" with a working
+    Re-download button, without fetching any files now — the same end state as
+    "Free disk", but reached without ever having the files locally."""
+    if config_manager is None:
+        raise HTTPException(status_code=503, detail="Config manager not initialized")
+    import os as _os
+    data = await request.json()
+    model_id = (data.get("model_id") or data.get("path") or "").strip()
+    if not model_id:
+        raise HTTPException(status_code=400, detail="model_id is required")
+    source_repo = (data.get("source_repo") or model_id).strip()
+    model_type = (data.get("model_type") or "").strip()
+    is_gguf = (bool(data.get("is_gguf")) or model_type == "gguf_models"
+               or "gguf" in model_id.lower())
+    valid = {"text_models", "image_models", "audio_models", "gguf_models", "tts_models",
+             "vision_models", "video_models", "audio_gen_models", "embedding_models", "spatial_models"}
+    if is_gguf:
+        model_type = "gguf_models"
+    if model_type not in valid:
+        model_type = "text_models"
+
+    # GGUF entries must persist source_repo so Re-download has a target (flat GGUF
+    # files keep no repo info on disk). Plain HF repos re-download by id, so a bare
+    # path string is enough and surfaces as a missing HF model.
+    if is_gguf:
+        entry = {"path": model_id, "source_repo": source_repo}
+    else:
+        entry = model_id
+
+    # Dedupe across all categories by path / basename so we don't double-add.
+    fname = _os.path.basename(model_id) if ("/" in model_id or _os.sep in model_id) else model_id
+    for cat in valid:
+        for m in config_manager.models_data.get(cat, []):
+            key = m if isinstance(m, str) else (m.get("path") or m.get("id") or "")
+            if key == model_id or (fname and _os.path.basename(key) == fname):
+                return {"success": True, "already": True}
+
+    config_manager.models_data.setdefault(model_type, []).append(entry)
+    config_manager.save_models()
+    _broker_notify_models_updated(request)
+    return {"success": True}
+
+
 @router.post("/admin/api/model-enable", summary="Enable a model")
 async def api_model_enable(request: Request, username: str = Depends(require_admin)):
    """Register a cached model in models.json so CoderAI can use it."""

--- a/codai/admin/templates/models.html
+++ b/codai/admin/templates/models.html
--- a/codai/backends/vulkan.py
+++ b/codai/backends/vulkan.py
@@ -67,6 +67,30 @@ def _make_llama_thermal_criteria():
    except Exception:
        return None

+
+_CHAT_SUPPORTS_STOPPING_CRITERIA = None
+
+
+def _chat_supports_stopping_criteria() -> bool:
+    """Whether this llama-cpp-python's create_chat_completion accepts
+    ``stopping_criteria``. Older/newer versions differ: create_completion always
+    takes it, but several create_chat_completion builds do not, raising
+    'unexpected keyword argument'. Checked once via signature inspection."""
+    global _CHAT_SUPPORTS_STOPPING_CRITERIA
+    if _CHAT_SUPPORTS_STOPPING_CRITERIA is None:
+        supported = False
+        try:
+            import inspect
+            from llama_cpp import Llama as _L
+            sig = inspect.signature(_L.create_chat_completion)
+            supported = ("stopping_criteria" in sig.parameters
+                         or any(p.kind == inspect.Parameter.VAR_KEYWORD
+                                for p in sig.parameters.values()))
+        except Exception:
+            supported = False
+        _CHAT_SUPPORTS_STOPPING_CRITERIA = supported
+    return _CHAT_SUPPORTS_STOPPING_CRITERIA
+
 try:
    from llama_cpp import Llama
    from llama_cpp.llama_chat_format import ChatFormatterResponse
@@ -696,7 +720,11 @@ class VulkanBackend(ModelBackend):
            self.n_ctx = 0  # 0 means use model's built-in default in llama.cpp
            print("DEBUG: --no-ram mode: ignoring --n-ctx, using model default context size")
        else:
-            n_ctx = kwargs.get('n_ctx', 2048)
+            # Accept either 'n_ctx' (models.json / GGUF) or 'ctx' (CLI / older
+            # configs); the manager passes both, but be robust to either alone.
+            n_ctx = kwargs.get('n_ctx')
+            if n_ctx is None:
+                n_ctx = kwargs.get('ctx', 2048)
            self.n_ctx = n_ctx
        
        # Set verbose
@@ -775,13 +803,22 @@ class VulkanBackend(ModelBackend):
            print(f"Error loading GGUF model: {e}")
            raise
        finally:
-            # Restore llama.cpp's default (quiet) logging after load
+            # Quiet logging after load — but DO NOT drop to NULL + GC the callback.
+            # ggml keeps the log-callback pointer and may still invoke it during
+            # generation (e.g. gemma's iSWA hybrid cache logs every step), so a
+            # garbage-collected ctypes callback becomes a use-after-free → SIGSEGV
+            # in libffi. Install a persistent no-op callback and keep a strong
+            # reference on self for the model's lifetime.
            if _llama_cpp:
                try:
-                    _llama_cpp.llama_log_set(None, None)
+                    @_llama_cpp.llama_log_callback
+                    def _quiet_log_cb(level, text, user_data):
+                        pass
+                    _llama_cpp.llama_log_set(_quiet_log_cb, None)
+                    self._log_cb = _quiet_log_cb   # keep alive (prevents GC/UAF)
                except Exception:
-                    pass
-            _log_cb = None  # release callback
+                    self._log_cb = None
+            _log_cb = None  # the verbose load-phase callback is no longer referenced

        # Post-load layer/buffer summary
        try:
@@ -1278,7 +1315,7 @@ class VulkanBackend(ModelBackend):
        if response_format and response_format.get('type') == 'json_object':
            kwargs['response_format'] = {'type': 'json_object'}
        _tc = _make_llama_thermal_criteria()
-        if _tc is not None:
+        if _tc is not None and _chat_supports_stopping_criteria():
            kwargs['stopping_criteria'] = _tc

        with self._gen_lock:
@@ -1307,7 +1344,7 @@ class VulkanBackend(ModelBackend):
        if stop:
            kwargs['stop'] = stop
        _tc = _make_llama_thermal_criteria()
-        if _tc is not None:
+        if _tc is not None and _chat_supports_stopping_criteria():
            kwargs['stopping_criteria'] = _tc

        prompt_tokens = 0

--- a/codai/config.py
+++ b/codai/config.py
@@ -264,6 +264,14 @@ class Config:
    # a large-capacity volume when /tmp is small — 4× upscaling extracts many large
    # frames and can exhaust a small /tmp ("No space left on device").
    tmp_dir: Optional[str] = None
+    # Periodic cleanup of the temporary-working dir (above). A background janitor
+    # deletes entries older than tmp_cleanup_max_age_hours every
+    # tmp_cleanup_interval_minutes. Guards against runaway tmp growth from
+    # delete=False temp files left by interrupted generations. Only runs when a
+    # dedicated tmp_dir is configured (never prunes a bare system /tmp).
+    tmp_cleanup_enabled: bool = True
+    tmp_cleanup_max_age_hours: float = 24.0
+    tmp_cleanup_interval_minutes: float = 60.0
    hf_chat_templates: list = field(default_factory=list)
    reasoning_options: list = field(default_factory=list)
    parser: str = "auto"
@@ -422,6 +430,9 @@ class ConfigManager:
                grammar_guided=config_data.get("grammar_guided", False),
                file_path=config_data.get("file_path"),
                tmp_dir=config_data.get("tmp_dir"),
+                tmp_cleanup_enabled=config_data.get("tmp_cleanup_enabled", True),
+                tmp_cleanup_max_age_hours=config_data.get("tmp_cleanup_max_age_hours", 24.0),
+                tmp_cleanup_interval_minutes=config_data.get("tmp_cleanup_interval_minutes", 60.0),
                hf_chat_templates=config_data.get("hf_chat_templates", []),
                reasoning_options=config_data.get("reasoning_options", []),
                parser=config_data.get("parser", "auto")
@@ -597,6 +608,9 @@ class ConfigManager:
            "grammar_guided": self.config.grammar_guided,
            "file_path": self.config.file_path,
            "tmp_dir": self.config.tmp_dir,
+            "tmp_cleanup_enabled": self.config.tmp_cleanup_enabled,
+            "tmp_cleanup_max_age_hours": self.config.tmp_cleanup_max_age_hours,
+            "tmp_cleanup_interval_minutes": self.config.tmp_cleanup_interval_minutes,
            "hf_chat_templates": self.config.hf_chat_templates,
            "reasoning_options": self.config.reasoning_options,
            "parser": self.config.parser

--- a/codai/main.py
+++ b/codai/main.py
@@ -339,6 +339,21 @@ def main():
        except Exception as _e:
            print(f"WARNING: could not use tmp dir '{_tmp_dir}': {_e} — using OS default")

+    # Periodically reclaim the dedicated tmp dir (abandoned delete=False scratch
+    # from interrupted generations). Only runs against a configured tmp_dir, never
+    # a bare system /tmp. Same mechanism works locally and inside the container.
+    if _tmp_dir and getattr(config, "tmp_cleanup_enabled", True):
+        try:
+            from codai.models.tmp_janitor import start as _start_tmp_janitor
+            _start_tmp_janitor(
+                _tmp_dir,
+                enabled=config.tmp_cleanup_enabled,
+                max_age_hours=getattr(config, "tmp_cleanup_max_age_hours", 24.0),
+                interval_minutes=getattr(config, "tmp_cleanup_interval_minutes", 60.0),
+            )
+        except Exception as _e:
+            print(f"WARNING: tmp janitor failed to start: {_e}")
+
    # Apply cache directory overrides from config before any cache module is used.
    # We set env vars AND patch huggingface_hub.constants in case the library was
    # already imported (constants are computed once at import time from env vars).
@@ -973,7 +988,9 @@ def main():
    global_args.enhance_allow_ffmpeg = config.enhance.allow_ffmpeg
    global_args.enhance_allow_rife_ncnn = config.enhance.allow_rife_ncnn
    global_args.n_gpu_layers = config.vulkan.n_gpu_layers
-    global_args.n_ctx = [config.vulkan.n_ctx]
+    # The global fallback context window. Must be a plain int — it flows into the
+    # llama.cpp backend's n_ctx, which a list would break ('<' int vs list).
+    global_args.n_ctx = config.vulkan.n_ctx
    global_args.vulkan_device = config.vulkan.device_id
    global_args.vulkan_single_gpu = config.vulkan.single_gpu
    global_args.image_sample_method = config.image.sample_method

--- a/codai/models/manager.py
+++ b/codai/models/manager.py
@@ -875,7 +875,7 @@ class MultiModelManager:
                return self._get_least_busy_instance(self.default_model)
            self._pending_new_instance.discard(self.default_model)

-            config = self.config.get(self.default_model, {})
+            config = self._config_for_model(self.default_model)
            backend_type = self.model_backend_types.get(self.default_model, "auto")

            try:
@@ -902,8 +902,30 @@ class MultiModelManager:
                            return v
                    return default

-                ctx = _cfg_or_global('ctx', 'n_ctx')
+                # Context window. The per-model config stores it as 'n_ctx'
+                # (models.json), while older configs/CLI use 'ctx'. Read either,
+                # and pass BOTH kwarg names downstream: the GGUF/llama.cpp backend
+                # reads 'n_ctx', the transformers backend reads 'ctx'.
+                # Context window. The per-model runtime cfg stores it under 'ctx'
+                # (build_runtime_kwargs maps the entry's n_ctx → 'ctx'); 'n_ctx' is
+                # also accepted. The PER-MODEL value must win over the global
+                # vulkan.n_ctx fallback, so check the config keys first.
+                ctx = config.get('ctx')
+                if ctx is None:
+                    ctx = config.get('n_ctx')
+                if ctx is None and _ga is not None:
+                    ctx = getattr(_ga, 'n_ctx', None)
+                # Coerce to a positive int: a stray list/str (e.g. an old global
+                # default wrapped in a list) would otherwise reach llama.cpp and
+                # raise '<' int-vs-list at load.
+                if isinstance(ctx, (list, tuple)):
+                    ctx = ctx[0] if ctx else None
+                try:
+                    ctx = int(ctx) if ctx is not None else None
+                except (TypeError, ValueError):
+                    ctx = None
                if ctx:
+                    kwargs['n_ctx'] = ctx
                    kwargs['ctx'] = ctx
                n_gpu_layers = _cfg_or_global('n_gpu_layers', 'n_gpu_layers')
                if n_gpu_layers is not None:
@@ -974,7 +996,7 @@ class MultiModelManager:
                return self._get_least_busy_instance(model_name)
            self._pending_new_instance.discard(model_name)

-            config = self.config.get(model_name, {})
+            config = self._config_for_model(model_name)
            backend_type = self.model_backend_types.get(model_name, "auto")

            try:
@@ -999,8 +1021,30 @@ class MultiModelManager:
                            return v
                    return default

-                ctx = _cfg_or_global('ctx', 'n_ctx')
+                # Context window. The per-model config stores it as 'n_ctx'
+                # (models.json), while older configs/CLI use 'ctx'. Read either,
+                # and pass BOTH kwarg names downstream: the GGUF/llama.cpp backend
+                # reads 'n_ctx', the transformers backend reads 'ctx'.
+                # Context window. The per-model runtime cfg stores it under 'ctx'
+                # (build_runtime_kwargs maps the entry's n_ctx → 'ctx'); 'n_ctx' is
+                # also accepted. The PER-MODEL value must win over the global
+                # vulkan.n_ctx fallback, so check the config keys first.
+                ctx = config.get('ctx')
+                if ctx is None:
+                    ctx = config.get('n_ctx')
+                if ctx is None and _ga is not None:
+                    ctx = getattr(_ga, 'n_ctx', None)
+                # Coerce to a positive int: a stray list/str (e.g. an old global
+                # default wrapped in a list) would otherwise reach llama.cpp and
+                # raise '<' int-vs-list at load.
+                if isinstance(ctx, (list, tuple)):
+                    ctx = ctx[0] if ctx else None
+                try:
+                    ctx = int(ctx) if ctx is not None else None
+                except (TypeError, ValueError):
+                    ctx = None
                if ctx:
+                    kwargs['n_ctx'] = ctx
                    kwargs['ctx'] = ctx
                n_gpu_layers = _cfg_or_global('n_gpu_layers', 'n_gpu_layers')
                if n_gpu_layers is not None:
@@ -1043,6 +1087,15 @@ class MultiModelManager:
                inst_num = pool.count + 1 if pool else 1
                print(f"Loading model on demand: {model_name}"
                      + (f" (instance {inst_num})" if inst_num > 1 else ""))
+                # Evict resident models to make room before loading (idempotent —
+                # a no-op when request_model already freed enough). Guards the
+                # direct on-demand path, which otherwise loads on top of the
+                # current model and OOMs (e.g. switching to a larger model).
+                if inst_num == 1:
+                    try:
+                        self.ensure_vram_for(model_name)
+                    except Exception as _ev_e:
+                        print(f"  (ensure_vram_for warning: {_ev_e})")
                _snap = self.vram_before_load()
                # Tell the backend how much VRAM this model is expected to need so
                # it can decide whether Flash-Attention-2 is safe (FA2 requires the
@@ -1212,6 +1265,37 @@ class MultiModelManager:
        self.model_aliases[alias] = model_name
        for model_type in self._registered_types_for(model_name):
            self._remember_registered_type(alias, model_type)
+
+    def _config_for_model(self, name) -> dict:
+        """Per-model config dict, tolerant of the id form the caller used.
+
+        ``self.config`` is keyed by the registration id (usually the model's full
+        path), but on-demand loads often arrive as a *basename* (e.g.
+        ``gemma-….gguf``). A bare ``self.config.get(basename)`` then misses and
+        returns ``{}``, so every per-model setting (n_ctx, flash_attn, parser,
+        cache quant, …) is silently dropped and global defaults are used. Resolve
+        through: exact id → alias map → basename / basename-without-extension."""
+        if not name:
+            return {}
+        cfg = self.config.get(name)
+        if cfg:
+            return cfg
+        target = self.model_aliases.get(name)
+        if target and target != name:
+            cfg = self.config.get(target)
+            if cfg:
+                return cfg
+        import os
+        base = os.path.basename(str(name))
+        base_noext = base[:-5] if base.endswith(".gguf") else base
+        for key, kcfg in self.config.items():
+            if not kcfg:
+                continue
+            kbase = os.path.basename(str(key))
+            kbase_noext = kbase[:-5] if kbase.endswith(".gguf") else kbase
+            if kbase == base or kbase_noext == base_noext:
+                return kcfg
+        return {}
    
    def set_assigned_models(self, keys) -> None:
        """Restrict list_models() to the front-assigned subset (route-keys: alias /
@@ -2564,7 +2648,12 @@ class MultiModelManager:
        4. HuggingFace hub cache size (dense shards or largest GGUF), adjusted.
        Returns 0 when the requirement cannot be determined.
        """
-        cfg = self.config.get(model_key, {})
+        # Resolve by basename/alias too — a model requested by basename would
+        # otherwise miss self.config (keyed by full path), return 0, and skip the
+        # eviction that makes room for it (→ OOM loading on top of a resident model).
+        cfg = self._config_for_model(model_key)
+        if not cfg and resolved_name:
+            cfg = self._config_for_model(resolved_name)
        # Unwrap a forwarded `_raw_cfg` so we see the ORIGINAL model entry the
        # same way the loaders do (build_kwargs_from_config only copies a few
        # keys to the top level — component_quantization lives ONLY in _raw_cfg).

--- a/codai/models/parser.py
+++ b/codai/models/parser.py
@@ -1058,6 +1058,42 @@ def parse_gemma_native_tool_calls(text: str, tool_names=None):
    return out


+def parse_xml_wrapped_tool_calls(text: str, tool_names):
+    """Parse ``<NAME>…</NAME>`` tool calls where NAME is a declared tool.
+
+    Some clients (Kilo/Cline/Roo-style) describe tools in the system prompt and
+    instruct the model to emit XML-tagged calls. Models then produce e.g.
+    ``<bash>{"command": "ls"}</bash>`` (JSON args) or ``<bash><command>ls</command>
+    </bash>`` (nested XML params). Neither matches a model's native tool format,
+    so this recovers them into ``(name, args_dict)``. Restricted to real tool
+    names so ordinary tagged prose (``<thinking>`` …) isn't misread."""
+    if not text or not tool_names:
+        return []
+    out, seen = [], set()
+    for name in tool_names:
+        for m in re.finditer(rf'<{re.escape(name)}\s*>(.*?)</{re.escape(name)}\s*>',
+                             text, re.DOTALL):
+            inner = m.group(1).strip()
+            args = None
+            if inner.startswith('{'):
+                try:
+                    args = json.loads(inner)
+                except Exception:
+                    args = None
+            if args is None:
+                params = re.findall(r'<(\w+)\s*>(.*?)</\1\s*>', inner, re.DOTALL)
+                if params:
+                    args = {k: v.strip() for k, v in params}
+            if not isinstance(args, dict):
+                continue
+            key = (name, json.dumps(args, sort_keys=True, default=str))
+            if key in seen:
+                continue
+            seen.add(key)
+            out.append((name, args))
+    return out
+
+
 # 7. GEMMA PARSER
 class GemmaParser(BaseParser):
    @validate_tool_output
@@ -1082,6 +1118,14 @@ class GemmaParser(BaseParser):
            except:
                pass

+        # XML-tagged tool calls (<bash>{…}</bash>) emitted when the client (Kilo/
+        # Cline-style) prompts for XML tools rather than the model's native format.
+        if not results and self.tools:
+            for name, args in parse_xml_wrapped_tool_calls(text, set(self.tools.keys())):
+                results.append(self._to_oa(name, args))
+            if results:
+                return results
+
        # Fallback: if no tool calls found, try using ToolCallParser
        if not results:
            tool_call_parser = ToolCallParser()

--- a/packaging/linux/Dockerfile.oci
+++ b/packaging/linux/Dockerfile.oci
@@ -134,6 +134,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
      libsndfile1 \
      libvulkan1 \
      mesa-vulkan-drivers \
+      pciutils \
      openssl \
    && rm -rf /var/lib/apt/lists/*


--- a/packaging/linux/Dockerfile.oci-venv
+++ b/packaging/linux/Dockerfile.oci-venv
@@ -4,39 +4,22 @@ ARG UBUNTU_VERSION=22.04
 FROM scratch AS build_meta
 COPY .packaging-cache/build-manifest.json /build-manifest.json

-FROM nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION} AS runtime
+# ─────────────────────────────────────────────────────────────────────────────
+# assembler: stage the local bundle into /opt/coderai. The 27GB COPY of the
+# bundle lives ONLY in this stage; the final image copies the assembled
+# /opt/coderai once, so the bundle is never stored twice.
+# ─────────────────────────────────────────────────────────────────────────────
+FROM nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION} AS assembler

 ARG PYTHON_VERSION=3.13.5
 ARG PBS_RELEASE=20250612
 ARG VENV_PYTHON_MINOR=3.13

-ENV DEBIAN_FRONTEND=noninteractive \
-    PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin \
-    PYTHONUNBUFFERED=1 \
-    HF_HOME=/cache/huggingface \
-    HUGGINGFACE_HUB_CACHE=/cache/huggingface/hub \
-    TRANSFORMERS_CACHE=/cache/huggingface/transformers \
-    DIFFUSERS_CACHE=/cache/diffusers \
-    CODERAI_CONFIG_DIR=/config \
-    CODERAI_MODELS_DIR=/models \
-    CODERAI_CACHE_DIR=/cache \
-    CODERAI_HOST=0.0.0.0 \
-    CODERAI_PORT=8776
+ENV DEBIAN_FRONTEND=noninteractive

 RUN apt-get update && apt-get install -y --no-install-recommends \
-      ca-certificates \
-      curl \
-      ffmpeg \
-      git \
-      libgomp1 \
-      libgl1 \
-      libglib2.0-0 \
-      libsndfile1 \
-      libvulkan1 \
-      mesa-vulkan-drivers \
-      openssl \
-      rsync \
-    && rm -rf /var/lib/apt/lists/*
+      ca-certificates curl rsync \
+    && apt-get clean && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*.deb

 RUN set -eux; \
    curl -fsSL -o /tmp/python.tar.gz \
@@ -46,14 +29,12 @@ RUN set -eux; \
    rm /tmp/python.tar.gz; \
    /opt/coderai/python/bin/python3 --version

-ENV PYTHONHOME=/opt/coderai/python \
-    PATH=/opt/coderai/python/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
-
 # BuildKit named context supplied by packaging/linux/build_oci_image.sh:
 #   --build-context local_bundle=/path/to/.packaging-cache/oci-venv-context
-# The bundle contains the selected venv plus ldd-discovered native libraries from
-# the local machine. GPU drivers are intentionally not bundled; NVIDIA Container
-# Toolkit / host Vulkan ICDs remain the runtime contract.
+# The bundle contains the selected venv plus ldd-discovered native libraries, the
+# parler overlay, the isolated lip-sync venvs (+repos/weights), a standalone
+# Python 3.10 for them, and the ds4 binary. GPU drivers are intentionally not
+# bundled; the NVIDIA Container Toolkit / host Vulkan ICDs remain the contract.
 COPY --from=local_bundle / /tmp/local-bundle/

 RUN set -eux; \
@@ -69,20 +50,107 @@ RUN set -eux; \
      mkdir -p /opt/coderai/local-libs; \
      rsync -a /tmp/local-bundle/local-libs/ /opt/coderai/local-libs/; \
    fi; \
+    if [ -d /tmp/local-bundle/parler-venv/site-packages ]; then \
+      mkdir -p /opt/coderai/parler-venv/site-packages; \
+      rsync -a /tmp/local-bundle/parler-venv/site-packages/ /opt/coderai/parler-venv/site-packages/; \
+    fi; \
+    if [ -d /tmp/local-bundle/py310 ]; then \
+      mkdir -p /opt/coderai/py310; \
+      rsync -a /tmp/local-bundle/py310/ /opt/coderai/py310/; \
+    fi; \
+    for d in lipsync_venv Wav2Lip SadTalker ds4; do \
+      if [ -d "/tmp/local-bundle/$d" ]; then \
+        mkdir -p "/opt/coderai/$d"; \
+        rsync -a "/tmp/local-bundle/$d/" "/opt/coderai/$d/"; \
+      fi; \
+    done; \
+    cfg="/opt/coderai/lipsync_venv/pyvenv.cfg"; \
+    if [ -f "$cfg" ]; then \
+      sed -i 's|^home *=.*|home = /opt/coderai/py310/bin|; s|^command *=.*|command = /opt/coderai/py310/bin/python3.10|' "$cfg"; \
+      for p in python python3 python3.10; do ln -sf /opt/coderai/py310/bin/python3.10 "/opt/coderai/lipsync_venv/bin/$p"; done; \
+    fi; \
    if [ -d /tmp/local-bundle/local-bin ]; then \
-      rsync -a /tmp/local-bundle/local-bin/ /usr/local/bin/; \
-      find /usr/local/bin -maxdepth 1 -type f -exec chmod +x '{}' \;; \
+      mkdir -p /opt/coderai/staged-local-bin; \
+      rsync -a /tmp/local-bundle/local-bin/ /opt/coderai/staged-local-bin/; \
    fi; \
    rm -rf /tmp/local-bundle; \
-    find /opt/coderai/python -type d \( -name __pycache__ -o -name tests -o -name test \) -prune -exec rm -rf '{}' +
+    find /opt/coderai -type d \( -name __pycache__ -o -name tests -o -name test \) -prune -exec rm -rf '{}' + || true
+
+# ─────────────────────────────────────────────────────────────────────────────
+# runtime: the shipped image. Copies the assembled tree once (no bundle dup).
+# ─────────────────────────────────────────────────────────────────────────────
+FROM nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION} AS runtime
+
+ARG PYTHON_VERSION=3.13.5
+
+# Note: PYTHONHOME / the python-prefixed PATH are deliberately NOT set here — that
+# would hijack the system python3 during apt's python3-minimal post-install (the
+# standalone interpreter is only COPYed in below). They're set after apt.
+ENV DEBIAN_FRONTEND=noninteractive \
+    PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin \
+    PYTHONUNBUFFERED=1 \
+    HF_HOME=/cache/huggingface \
+    HUGGINGFACE_HUB_CACHE=/cache/huggingface/hub \
+    TRANSFORMERS_CACHE=/cache/huggingface/transformers \
+    DIFFUSERS_CACHE=/cache/diffusers \
+    CODERAI_CONFIG_DIR=/config \
+    CODERAI_MODELS_DIR=/models \
+    CODERAI_CACHE_DIR=/cache \
+    CODERAI_HOST=0.0.0.0 \
+    CODERAI_PORT=8776 \
+    CODERAI_LIPSYNC_VENV=/opt/coderai/lipsync_venv \
+    CODERAI_WAV2LIP_SRC=/opt/coderai/Wav2Lip \
+    CODERAI_WAV2LIP_DIR=/cache/lipsync/Wav2Lip \
+    CODERAI_SADTALKER_SRC=/opt/coderai/SadTalker \
+    CODERAI_SADTALKER_DIR=/cache/lipsync/SadTalker \
+    CODERAI_DS4_DIR=/cache/ds4
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+      ca-certificates \
+      curl \
+      ffmpeg \
+      git \
+      libgomp1 \
+      libgl1 \
+      libglib2.0-0 \
+      libsndfile1 \
+      libvulkan1 \
+      mesa-vulkan-drivers \
+      vulkan-tools \
+      pciutils \
+      nginx \
+      supervisor \
+      openssl \
+      rsync \
+    && apt-get clean && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*.deb
+
+# The fully assembled CoderAI tree (Python + venvs + tools), copied once.
+COPY --from=assembler /opt/coderai /opt/coderai
+
+# Now the standalone interpreter exists, activate it for the app + launchers.
+ENV PYTHONHOME=/opt/coderai/python \
+    PATH=/opt/coderai/python/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin

 WORKDIR /opt/coderai/app
 COPY . /opt/coderai/app
 COPY --from=build_meta /build-manifest.json /opt/coderai/BUILD-MANIFEST.json
 COPY packaging/linux/launcher/coderai-oci /usr/local/bin/coderai
+COPY packaging/linux/launcher/with-env /usr/local/bin/with-env
+COPY packaging/linux/launcher/coderai-entrypoint /usr/local/bin/coderai-entrypoint
+COPY packaging/linux/launcher/wav2lip /usr/local/bin/wav2lip
+COPY packaging/linux/launcher/sadtalker /usr/local/bin/sadtalker
+COPY packaging/linux/nginx.conf /etc/nginx/nginx.conf
+COPY packaging/linux/supervisord.conf /etc/supervisor/supervisord.conf
+COPY packaging/linux/README-RUN.txt /opt/coderai/README-RUN.txt

 RUN set -eux; \
-    chmod +x /usr/local/bin/coderai /opt/coderai/app/coderai; \
+    if [ -d /opt/coderai/staged-local-bin ]; then \
+      rsync -a /opt/coderai/staged-local-bin/ /usr/local/bin/; \
+      find /usr/local/bin -maxdepth 1 -type f -exec chmod +x '{}' \;; \
+      rm -rf /opt/coderai/staged-local-bin; \
+    fi; \
+    chmod +x /usr/local/bin/coderai /usr/local/bin/with-env /usr/local/bin/coderai-entrypoint \
+             /usr/local/bin/wav2lip /usr/local/bin/sadtalker /opt/coderai/app/coderai; \
    mkdir -p /config /models /cache /opt/coderai/app/models; \
    rm -rf \
      /opt/coderai/app/.git \
@@ -101,5 +169,7 @@ if missing:
 PY

 VOLUME ["/config", "/models", "/cache"]
+# Single published port: nginx fronts the main server (/) and the tool web UIs
+# (/editor/, /videogen/, /township/).
 EXPOSE 8776
-ENTRYPOINT ["/usr/local/bin/coderai"]
+ENTRYPOINT ["/usr/local/bin/coderai-entrypoint"]
--- a/packaging/linux/README-RUN.txt
+++ b/packaging/linux/README-RUN.txt
@@ -25,3 +25,127 @@ AMD/Intel Vulkan:

 CPU:
  No GPU setup is required.
+
+
+================================================================================
+Running the Docker / OCI image
+================================================================================
+
+The image publishes ONE port (8776). nginx inside the container fronts:
+  http://HOST:8776/            CoderAI server + OpenAI-compatible API + admin UI
+  http://HOST:8776/editor/     Video editor
+  http://HOST:8776/videogen/   Videogen studio
+  http://HOST:8776/township/   Township fighters
+
+Three volumes hold all mutable state (everything else in the image is read-only):
+  /config   app config + auth (small)
+  /models   model storage / data path
+  /cache    Hugging Face/diffusers caches + tool outputs (LARGE)
+
+The examples below run the container as YOUR user (recommended). Create and own
+the state dirs once, up front:
+
+  mkdir -p coderai-config coderai-models coderai-cache
+  sudo chown -R "$(id -u):$(id -g)" coderai-config coderai-models coderai-cache
+
+Basic run (NVIDIA):
+  docker run --gpus all --ipc=host -p 8776:8776 \
+    --user "$(id -u):$(id -g)" \
+    -v "$PWD/coderai-config:/config" \
+    -v "$PWD/coderai-models:/models" \
+    -v "$PWD/coderai-cache:/cache" \
+    coderai:local
+
+AMD/Intel Vulkan: replace `--gpus all` with `--device /dev/dri`.
+CPU only: drop the GPU flag entirely.
+Run as container-root instead: just omit the `--user` line (see "Running as a
+non-root user" below for rootless / userns-remap alternatives).
+
+
+External storage for /models and /cache
+----------------------------------------
+/models and /cache are where the big data lives, so put them on your large
+storage and bind-mount them onto the defaults. The in-container paths never
+change — only the host side does.
+
+1) Host-mounted big disk / SAN (a path already mounted on the host):
+
+   # Make the big-storage dirs owned by the UID you run as:
+   sudo chown -R "$(id -u):$(id -g)" /srv/coderai/config \
+     /mnt/bigstorage/coderai/models /mnt/bigstorage/coderai/cache
+
+   docker run --gpus all --ipc=host -p 8776:8776 \
+     --user "$(id -u):$(id -g)" \
+     -v /srv/coderai/config:/config \
+     -v /mnt/bigstorage/coderai/models:/models \
+     -v /mnt/bigstorage/coderai/cache:/cache \
+     coderai:local
+
+   The launcher points HF_HOME at /cache/huggingface and writes tool outputs to
+   /cache/{videogen_output,township_output}, so /cache on the big disk captures
+   downloads AND produced artifacts.
+
+2) NFS (shared across machines) — back a Docker volume with the NFS driver:
+
+   docker volume create --driver local \
+     --opt type=nfs --opt o=addr=10.0.0.5,rw,nfsvers=4 \
+     --opt device=:/export/coderai/models  coderai-models
+   docker volume create --driver local \
+     --opt type=nfs --opt o=addr=10.0.0.5,rw,nfsvers=4 \
+     --opt device=:/export/coderai/cache   coderai-cache
+
+   docker run --gpus all --ipc=host -p 8776:8776 \
+     --user "$(id -u):$(id -g)" \
+     -v "$PWD/coderai-config:/config" \
+     -v coderai-models:/models \
+     -v coderai-cache:/cache \
+     coderai:local
+
+   (SMB/CIFS works the same way with `--opt type=cifs` and credentials.)
+   For NFS, the export must let the UID you pass to --user write (e.g. map it,
+   or set the right ownership on the export); don't rely on no_root_squash.
+
+Performance note: NFS/CIFS are fine as a model LIBRARY, but mmap-heavy weight
+loads and KV-cache spill are much faster on local NVMe or a fast SAN. Keep the
+active inference weights on fast storage if you can.
+
+
+Running as a non-root user
+--------------------------
+The image works as root (PID 1 sets up nginx + the services) AND as an arbitrary
+UID. nginx pid/temp and the supervisor socket live under /tmp, logs go to
+stdout/stderr, and Python doesn't write .pyc, so no part of the runtime needs to
+write outside the mounted volumes.
+
+Option A — run as your own UID/GID (recommended for bind mounts):
+
+   # The mounted dirs must be owned by that UID so the app can write to them:
+   mkdir -p coderai-config coderai-models coderai-cache
+   sudo chown -R "$(id -u):$(id -g)" coderai-config coderai-models coderai-cache
+
+   docker run --gpus all --ipc=host -p 8776:8776 \
+     --user "$(id -u):$(id -g)" \
+     -v "$PWD/coderai-config:/config" \
+     -v "$PWD/coderai-models:/models" \
+     -v "$PWD/coderai-cache:/cache" \
+     coderai:local
+
+   Caveat: with --user, the in-image standalone Python and app tree stay
+   root-owned but world-readable, which is all the runtime needs. For NFS, the
+   export must allow that UID to write (no_root_squash is NOT required when you
+   pass a real --user; map/allow the UID you run as).
+
+Option B — keep container-root but map it to an unprivileged host UID, with no
+image changes. Best when you don't want to manage UIDs/ownership by hand:
+
+   * Rootless Docker (run the daemon as a normal user), or
+   * userns-remap: add  { "userns-remap": "default" }  to
+     /etc/docker/daemon.json and restart Docker. Container root (UID 0) then maps
+     to a high, unprivileged host subordinate UID automatically.
+
+   In both cases run the normal command (no --user needed); the container thinks
+   it is root, but the kernel sees an unprivileged user on the host.
+
+GPU + non-root: NVIDIA Container Toolkit and /dev/dri both work under --user and
+under rootless/userns-remap; no extra flags are needed beyond the usual
+--gpus all (NVIDIA) or --device /dev/dri (Vulkan).
--- a/packaging/linux/build_oci_image.sh
+++ b/packaging/linux/build_oci_image.sh
@@ -17,6 +17,22 @@ INCLUDE_LOCAL_LIBS=1
 AUTO_LOCAL_BINS=1
 LOCAL_BINARIES=()
 LOCAL_BINARY_DIRS=()
+# Optional second venv for Parler-TTS (pinned transformers 4.46). Bundled as an
+# overlay whose site-packages is prepended to PYTHONPATH at runtime, shadowing the
+# main stack while torch/etc resolve from it underneath.
+PARLER_VENV="${CODERAI_PARLER_VENV:-$HOME/.coderai/parler_venv}"
+INCLUDE_PARLER=1
+# Isolated lip-sync tools (Python 3.10 venvs + repos + weights) and the ds4 native
+# engine. Bundled so the image replicates the local install. ds4 DeepSeek-V4 GGUF
+# weights are NOT bundled (multi-GB, runtime-downloaded into a volume).
+INCLUDE_TOOLS=1
+# One shared Python 3.10 venv serves both wav2lip and sadtalker (identical torch),
+# halving the torch footprint. Repo code is bundled WITHOUT model weights — those
+# download on first lip-sync use into the /cache volume.
+LIPSYNC_VENV="${CODERAI_LIPSYNC_VENV:-$HOME/.coderai/lipsync_venv}"
+WAV2LIP_DIR="${CODERAI_WAV2LIP_SRC:-$HOME/.coderai/Wav2Lip}"
+SADTALKER_DIR="${CODERAI_SADTALKER_SRC:-$HOME/.coderai/SadTalker}"
+DS4_DIR="${CODERAI_DS4_DIR:-$HOME/.coderai/ds4}"

 usage() {
  cat <<'EOF'
@@ -37,6 +53,10 @@ Options:
  --include-local-dir PATH
                          Copy executable files from a local build directory, including ldd libs.
                          Can be repeated. Useful for local whisper.cpp build/bin directories.
+  --parler-venv PATH      Bundle this Parler-TTS venv as an overlay (default:
+                          $CODERAI_PARLER_VENV or ~/.coderai/parler_venv if present).
+  --no-parler             Do not bundle the Parler-TTS venv overlay.
+  --no-tools              Do not bundle the lip-sync (wav2lip/sadtalker) venvs or ds4.
  -t, --tag TAG           Image tag to create (default: coderai:local or OCI_IMAGE from versions.env).
  -h, --help              Show this help.

@@ -77,6 +97,24 @@ while [[ $# -gt 0 ]]; do
      AUTO_LOCAL_BINS=0
      shift
      ;;
+    --parler-venv)
+      BUILD_MODE="venv"
+      if [[ $# -lt 2 ]]; then
+        echo "Error: --parler-venv requires a path" >&2
+        exit 2
+      fi
+      PARLER_VENV="$2"
+      INCLUDE_PARLER=1
+      shift 2
+      ;;
+    --no-parler)
+      INCLUDE_PARLER=0
+      shift
+      ;;
+    --no-tools)
+      INCLUDE_TOOLS=0
+      shift
+      ;;
    --include-local-bin)
      BUILD_MODE="venv"
      if [[ $# -lt 2 ]]; then
@@ -184,6 +222,8 @@ discover_local_binaries() {
    "$HOME/whisper.cpp/build/bin/server"
    "/usr/local/bin/ds4-server"
    "${CODERAI_DS4_DIR:-$HOME/.coderai/ds4}/ds4-server"
+    "/usr/local/bin/rife-ncnn-vulkan"
+    "$HOME/.local/bin/rife-ncnn-vulkan"
  )
  local path
  for path in "${candidates[@]}"; do
@@ -231,6 +271,50 @@ prepare_venv_bundle() {
    printf '  %s\n' "${LOCAL_BINARIES[@]}"
  fi

+  # Parler-TTS overlay venv. Only its site-packages is needed: it was created with
+  # --system-site-packages, so it holds just the pinned overrides (transformers
+  # 4.46, parler_tts, tokenizers, ...); torch/numpy resolve from the main venv.
+  if [[ "$INCLUDE_PARLER" == "1" && -n "$PARLER_VENV" && -d "$PARLER_VENV" ]]; then
+    local parler_sp
+    parler_sp="$PARLER_VENV/lib/python${VENV_PYTHON_MINOR}/site-packages"
+    if [[ -d "$parler_sp" ]]; then
+      mkdir -p "$bundle/parler-venv/site-packages"
+      rsync -a --delete "$parler_sp/" "$bundle/parler-venv/site-packages/"
+      echo "Bundled Parler-TTS overlay from: $parler_sp"
+    else
+      echo "Warning: --parler-venv given but no site-packages at $parler_sp (skipping)" >&2
+    fi
+  fi
+
+  # Isolated lip-sync tools + ds4 engine. The two venvs share one standalone
+  # Python 3.10 (read from a venv's pyvenv.cfg `home`); it's bundled once and the
+  # venvs are re-pointed at it during the image build.
+  if [[ "$INCLUDE_TOOLS" == "1" ]]; then
+    local py310_dir=""
+    if [[ -f "$LIPSYNC_VENV/pyvenv.cfg" ]]; then
+      local home_bin
+      home_bin="$(sed -n 's/^home *= *//p' "$LIPSYNC_VENV/pyvenv.cfg" | head -1)"
+      [[ -n "$home_bin" ]] && py310_dir="$(dirname "$home_bin")"
+    fi
+    if [[ -n "$py310_dir" && -d "$py310_dir" ]]; then
+      mkdir -p "$bundle/py310"
+      rsync -a "$py310_dir/" "$bundle/py310/"
+      echo "Bundled standalone Python 3.10 from: $py310_dir"
+    else
+      echo "Warning: could not locate the py3.10 interpreter for the lip-sync venv" >&2
+    fi
+    local _venv_excl=(--exclude '__pycache__' --exclude '*.pyc' --exclude 'pip/' --exclude '*.dist-info/RECORD')
+    if [[ -d "$LIPSYNC_VENV" ]]; then rsync -a "${_venv_excl[@]}" "$LIPSYNC_VENV/" "$bundle/lipsync_venv/"; echo "Bundled shared lip-sync venv"; fi
+    # Repo CODE ONLY — checkpoints/weights are excluded and download at runtime.
+    if [[ -d "$WAV2LIP_DIR" ]]; then rsync -a --exclude 'checkpoints/' --exclude 'face_detection/detection/sfd/*.pth' "$WAV2LIP_DIR/" "$bundle/Wav2Lip/"; echo "Bundled Wav2Lip code (no weights)"; fi
+    if [[ -d "$SADTALKER_DIR" ]]; then rsync -a --exclude 'checkpoints/*' --exclude 'gfpgan/weights/*' "$SADTALKER_DIR/" "$bundle/SadTalker/"; echo "Bundled SadTalker code (no weights)"; fi
+    # ds4: binary + scripts, minus any downloaded multi-GB GGUF weights.
+    if [[ -d "$DS4_DIR" ]]; then
+      rsync -a --exclude 'gguf/' --exclude '*.gguf' --exclude '*.gguf.*' "$DS4_DIR/" "$bundle/ds4/"
+      echo "Bundled ds4 (binary + scripts, no weights)"
+    fi
+  fi
+
  if [[ "$include_libs" != "1" ]]; then
    return 0
  fi
@@ -245,6 +329,7 @@ bundle = Path(os.environ["VENV_BUNDLE"])
 venv = Path(os.environ["VENV_PATH_FOR_LDD"])
 local_libs = bundle / "local-libs"
 local_bin = bundle / "local-bin"
+parler_sp = bundle / "parler-venv" / "site-packages"

 skip_prefixes = (
    "/lib/ld-linux",
@@ -278,7 +363,7 @@ skip_starts = (
 )

 candidates = []
-for root in (venv / "lib", venv / "bin", local_bin):
+for root in (venv / "lib", venv / "bin", local_bin, parler_sp):
    if not root.exists():
        continue
    for path in root.rglob("*"):
@@ -415,13 +500,27 @@ cat <<EOF

 Built $IMAGE_TAG

-Run examples:
+Run examples (run as your own UID; create+own the dirs first):
+  mkdir -p coderai-config coderai-models coderai-cache
+  sudo chown -R "\$(id -u):\$(id -g)" coderai-config coderai-models coderai-cache
+
  NVIDIA:
-    $DOCKER_BIN run --gpus all --ipc=host -p 8776:8776 -v "\$PWD/coderai-config:/config" -v "\$PWD/coderai-models:/models" -v "\$PWD/coderai-cache:/cache" $IMAGE_TAG
+    $DOCKER_BIN run --gpus all --ipc=host -p 8776:8776 --user "\$(id -u):\$(id -g)" -v "\$PWD/coderai-config:/config" -v "\$PWD/coderai-models:/models" -v "\$PWD/coderai-cache:/cache" $IMAGE_TAG

  AMD/Intel Vulkan:
-    $DOCKER_BIN run --device /dev/dri --ipc=host -p 8776:8776 -v "\$PWD/coderai-config:/config" -v "\$PWD/coderai-models:/models" -v "\$PWD/coderai-cache:/cache" $IMAGE_TAG
+    $DOCKER_BIN run --device /dev/dri --ipc=host -p 8776:8776 --user "\$(id -u):\$(id -g)" -v "\$PWD/coderai-config:/config" -v "\$PWD/coderai-models:/models" -v "\$PWD/coderai-cache:/cache" $IMAGE_TAG

  CPU:
-    $DOCKER_BIN run --ipc=host -p 8776:8776 -v "\$PWD/coderai-config:/config" -v "\$PWD/coderai-models:/models" -v "\$PWD/coderai-cache:/cache" $IMAGE_TAG
+    $DOCKER_BIN run --ipc=host -p 8776:8776 --user "\$(id -u):\$(id -g)" -v "\$PWD/coderai-config:/config" -v "\$PWD/coderai-models:/models" -v "\$PWD/coderai-cache:/cache" $IMAGE_TAG
+
+(Drop --user to run as container-root, or use rootless/userns-remap Docker.)
+
+One published port (8776) fronts everything via nginx:
+  /  server+API+admin   /editor/  video editor   /videogen/  studio   /township/  fighters
+
+External storage: point /models and /cache at a big disk or NFS volume —
+  -v /mnt/bigstorage/coderai/models:/models -v /mnt/bigstorage/coderai/cache:/cache
+Non-root: add  --user "\$(id -u):\$(id -g)"  (mounts must be owned by that UID),
+  or use rootless/userns-remap Docker with no extra flags.
+See packaging/linux/README-RUN.txt (also at /opt/coderai/README-RUN.txt in the image).
 EOF
--- a/packaging/linux/launcher/coderai-oci
+++ b/packaging/linux/launcher/coderai-oci
@@ -72,4 +72,8 @@ if changed:
 PY
 fi

+# Point the server at the shared dedicated temp dir so its janitor prunes it.
+if [ -n "${CODERAI_TMP:-}" ]; then
+  exec /opt/coderai/python/bin/python3 /opt/coderai/app/coderai --config "$CONFIG_DIR" --tmp "$CODERAI_TMP" "$@"
+fi
 exec /opt/coderai/python/bin/python3 /opt/coderai/app/coderai --config "$CONFIG_DIR" "$@"
--- a/tools/gen_township_fighters.py
+++ b/tools/gen_township_fighters.py
@@ -4709,6 +4709,48 @@ def pick_model(client: CoderAIClient, kind: str, override: str = None) -> str:
 # Web UI
 # ─────────────────────────────────────────────────────────────────────────────

+# App route roots that appear as server-rendered URLs and JS fetch targets. Used
+# to make the UI work behind a reverse-proxy sub-path mount (e.g. /township/).
+_MOUNT_ROUTES = ("media", "api", "matches", "match", "characters",
+                 "environments", "wardrobe", "prompts", "stream", "stop",
+                 "job", "favicon.ico")
+
+
+def _mount_html(html: str, prefix: str) -> str:
+    """Rewrite a server-rendered page so it works under reverse-proxy sub-path
+    ``prefix`` (e.g. '/township'). Prepends the prefix to app-route URLs in HTML
+    attributes and injects a fetch/EventSource shim so JS calls are prefixed too.
+    Idempotent: already-prefixed URLs are not matched again."""
+    import re as _re
+    if not prefix:
+        return html
+    routes = "|".join(_MOUNT_ROUTES)
+    # 1) Attribute URLs: href/src/action/poster/value/data-* pointing at a route.
+    attr_re = _re.compile(
+        r'((?:href|src|action|poster|value|data-src|data-url)\s*=\s*["\'])'
+        r'(/(?:' + routes + r')\b)')
+    html = attr_re.sub(lambda m: m.group(1) + prefix + m.group(2), html)
+    # 2) Home/nav link to bare root: href="/" -> href="<prefix>/".
+    html = _re.sub(r'(href\s*=\s*(["\']))/\2',
+                   lambda m: m.group(1) + prefix + '/' + m.group(2), html)
+    # 3) JS shim: prefix root-absolute fetch()/EventSource() URLs at call time.
+    if "/*coderai-mount*/" in html:
+        return html
+    shim = (
+        "<script>/*coderai-mount*/(function(){var P=" + repr(prefix) + ";if(!P)return;"
+        "function fix(u){return (typeof u==='string'&&u.charAt(0)==='/'"
+        "&&u.charAt(1)!=='/'&&u.indexOf(P+'/')!==0&&u!==P)?P+u:u;}"
+        "var of=window.fetch.bind(window);window.fetch=function(u,o){return of(fix(u),o);};"
+        "var OE=window.EventSource;if(OE){var NE=function(u,o){return new OE(fix(u),o);};"
+        "NE.prototype=OE.prototype;window.EventSource=NE;}})();</script>"
+    )
+    if "</head>" in html:
+        html = html.replace("</head>", shim + "</head>", 1)
+    else:
+        html = shim + html
+    return html
+
+
 def launch_web_ui(default_args):
    """Launch a local web interface for Township Fighters content generation.

@@ -9152,8 +9194,27 @@ async function resetPrompts(ev){
            except (BrokenPipeError, ConnectionResetError, ConnectionAbortedError):
                pass

+        def _public_prefix(self):
+            """Reverse-proxy sub-path mount prefix (e.g. '/township'), or ''."""
+            p = (self.headers.get("X-Forwarded-Prefix")
+                 or self.headers.get("X-Script-Name") or "")
+            p = p.strip().rstrip("/")
+            return p if p.startswith("/") else (("/" + p) if p else "")
+
+        def _route(self, path):
+            """Strip the forwarded prefix so internal routing is mount-agnostic
+            whether or not nginx already stripped it."""
+            pref = self._public_prefix()
+            if pref and (path == pref or path.startswith(pref + "/")):
+                path = path[len(pref):] or "/"
+            return path
+
        def _send(self, code, ctype, body):
            if isinstance(body, str): body = body.encode()
+            if "text/html" in ctype:
+                pref = self._public_prefix()
+                if pref:
+                    body = _mount_html(body.decode("utf-8", "replace"), pref).encode("utf-8")
            self.send_response(code)
            self.send_header("Content-Type", ctype)
            self.send_header("Content-Length", str(len(body)))
@@ -9163,7 +9224,7 @@ async function resetPrompts(ev){

        def do_GET(self):
            parsed = urllib.parse.urlparse(self.path)
-            path = parsed.path.rstrip("/") or "/"
+            path = self._route(parsed.path).rstrip("/") or "/"

            if path == "/favicon.ico":
                # Bundled icon next to this script (tools/assets/favicon.ico).
@@ -9343,7 +9404,7 @@ async function resetPrompts(ev){

        def do_POST(self):
            parsed = urllib.parse.urlparse(self.path)
-            path = parsed.path
+            path = self._route(parsed.path)

            if path == "/stop":
                _state["abort"].set()

--- a/tools/videogen.py
+++ b/tools/videogen.py
@@ -1102,7 +1102,11 @@ HTML_PAGE = r"""
 let models=[], profiles={characters:[], environments:[], voices:[], loras:[]};
 function $(id){return document.getElementById(id)}
 function esc(s){return String(s||'').replace(/[&<>"']/g,m=>({'&':'&amp;','<':'&lt;','>':'&gt;','"':'&quot;',"'":'&#39;'}[m]))}
-async function api(path, opts={}){let r=await fetch(path,{headers:{'Content-Type':'application/json'},...opts}); if(!r.ok) throw new Error(await r.text()); return await r.json()}
+const PREFIX="__ROOT_PREFIX__";
+// Prefix app-local absolute paths (/api, /stream, /media) so they resolve under a
+// reverse-proxy sub-path mount; leaves absolute URLs (http...) and others untouched.
+function U(p){return (p && p.charAt(0)==='/') ? PREFIX+p : p}
+async function api(path, opts={}){let r=await fetch(U(path),{headers:{'Content-Type':'application/json'},...opts}); if(!r.ok) throw new Error(await r.text()); return await r.json()}
 function fillSelect(sel, cap, def){let s=$(sel); s.innerHTML=''; let filtered=models.filter(m=>(m.capabilities||[]).includes(cap)); if(!filtered.length) filtered=models; for(let m of filtered){let o=document.createElement('option'); o.value=m.id; o.textContent=m.id; if(m.id===def) o.selected=true; s.appendChild(o)}}
 async function loadModels(){let d=await api('/api/models'); models=d.models||[]; fillSelect('image_model','image_generation',d.defaults.image_model); fillSelect('video_model','video_generation',d.defaults.video_model); fillSelect('audio_model','audio_generation',d.defaults.audio_model); $('conn').textContent=`Connected: ${models.length} model(s)`}
 async function loadProfiles(){profiles=await api('/api/profiles'); renderProfiles()}
@@ -1133,9 +1137,9 @@ function addDialogue(btn){let box=btn.closest('.clip').querySelector('.dialogues
 function selected(sel){return [...sel.selectedOptions].map(o=>o.value)}
 function collectMovie(){let clips=[...document.querySelectorAll('.clip')].map(c=>({title:c.querySelector('.c_title').value,prompt:c.querySelector('.c_prompt').value,characters:selected(c.querySelector('.c_chars')),environments:selected(c.querySelector('.c_envs')),camera_motion:c.querySelector('.c_camera').value,action:c.querySelector('.c_action').value,speech_text:c.querySelector('.c_speech').value,speech_voice:c.querySelector('.c_voice').value,speech_speed:c.querySelector('.c_speed').value,lip_sync:c.querySelector('.c_lipsync').checked,lip_sync_method:$('lip_sync_method').value,music_prompt:c.querySelector('.c_music').value,sfx_prompt:c.querySelector('.c_sfx').value,dialogues:[...c.querySelectorAll('.dialogue')].map(d=>({character:d.querySelector('.d_char').value,voice:d.querySelector('.d_voice').value,text:d.querySelector('.d_text').value,start_time:d.querySelector('.d_start').value,speed:d.querySelector('.d_speed').value,lip_sync:c.querySelector('.c_lipsync').checked}))})); return {title:$('title').value,style:$('style').value,image_model:$('image_model').value,video_model:$('video_model').value,audio_model:$('audio_model').value,default_voice:$('default_voice').value,lip_sync_method:$('lip_sync_method').value,width:+$('width').value,height:+$('height').value,fps:+$('fps').value,num_frames:+$('num_frames').value,steps:+$('steps').value,guidance_scale:+$('guidance_scale').value,negative_prompt:$('negative_prompt').value,use_keyframes:$('use_keyframes').checked,soundtrack_prompt:$('soundtrack_prompt').value,loras:selected($('movie_loras')).map(n=>({name:n,weight:+$('movie_lora_weight').value})),lora_weight:+$('movie_lora_weight').value,movie_count:+$('movie_count').value,clips}}
 async function startMovie(){let d=await api('/api/movie/start',{method:'POST',body:JSON.stringify(collectMovie())}); watchJob(d.job_id)}
-async function watchJob(id){$('jobout').innerHTML=`<p>Job <span class="pill">${id}</span></p>`; let timer=setInterval(async()=>{let j=await api('/api/job/'+id); $('jobout').innerHTML=`<p><span class="pill">${esc(j.status)}</span> ${j.progress||0}% ${esc(j.message||'')}</p>`+(j.output_url?`<p><a href="${j.output_url}" target="_blank">Open output</a></p>`:'')+(j.error?`<p style="color:var(--bad)">${esc(j.error)}</p>`:''); if(j.status==='done'||j.status==='error'){clearInterval(timer); loadProfiles(); loadGallery()}},1500)}
-async function loadGallery(){let d=await api('/api/gallery'); $('gallery_grid').innerHTML=(d.items||[]).map(it=>`<div class="profile">${it.type==='video'?`<video src="${it.url}" controls style="width:100%;height:130px;background:#000"></video>`:`<img src="${it.url}">`}<div class="p"><b>${esc(it.name)}</b><br><a href="${it.url}" target="_blank">open</a></div></div>`).join('')||'<div class="muted">No media yet.</div>'}
-function connectLog(){let es=new EventSource('/stream'); es.onmessage=e=>{let l=$('log'); l.textContent+=e.data+'\n'; l.scrollTop=l.scrollHeight}}
+async function watchJob(id){$('jobout').innerHTML=`<p>Job <span class="pill">${id}</span></p>`; let timer=setInterval(async()=>{let j=await api('/api/job/'+id); $('jobout').innerHTML=`<p><span class="pill">${esc(j.status)}</span> ${j.progress||0}% ${esc(j.message||'')}</p>`+(j.output_url?`<p><a href="${U(j.output_url)}" target="_blank">Open output</a></p>`:'')+(j.error?`<p style="color:var(--bad)">${esc(j.error)}</p>`:''); if(j.status==='done'||j.status==='error'){clearInterval(timer); loadProfiles(); loadGallery()}},1500)}
+async function loadGallery(){let d=await api('/api/gallery'); $('gallery_grid').innerHTML=(d.items||[]).map(it=>`<div class="profile">${it.type==='video'?`<video src="${U(it.url)}" controls style="width:100%;height:130px;background:#000"></video>`:`<img src="${U(it.url)}">`}<div class="p"><b>${esc(it.name)}</b><br><a href="${U(it.url)}" target="_blank">open</a></div></div>`).join('')||'<div class="muted">No media yet.</div>'}
+function connectLog(){let es=new EventSource(U('/stream')); es.onmessage=e=>{let l=$('log'); l.textContent+=e.data+'\n'; l.scrollTop=l.scrollHeight}}
 document.querySelectorAll('.tab').forEach(t=>t.onclick=()=>{document.querySelectorAll('.tab,.section').forEach(x=>x.classList.remove('active')); t.classList.add('active'); $(t.dataset.tab).classList.add('active')})
 loadModels().then(loadProfiles).then(()=>addClip()); loadGallery(); connectLog();
 </script>
@@ -1171,12 +1175,30 @@ def make_handler(app: VideoGenApp):
                return {}
            return json.loads(self.rfile.read(n).decode("utf-8"))

+        # -- reverse-proxy helpers (sub-path mounting) -------------------- #
+        def _public_prefix(self) -> str:
+            """Path prefix this app is mounted under, per reverse-proxy headers.
+            Returns e.g. '/videogen' (no trailing slash) or '' at root."""
+            p = (self.headers.get("X-Forwarded-Prefix")
+                 or self.headers.get("X-Script-Name") or "")
+            p = p.strip().rstrip("/")
+            return p if p.startswith("/") else (("/" + p) if p else "")
+
+        def _route(self, path: str) -> str:
+            """Strip the forwarded prefix so internal routing is mount-agnostic
+            whether or not nginx already stripped it."""
+            pref = self._public_prefix()
+            if pref and (path == pref or path.startswith(pref + "/")):
+                path = path[len(pref):] or "/"
+            return path
+
        def do_GET(self) -> None:
            parsed = urllib.parse.urlparse(self.path)
-            path = parsed.path
+            path = self._route(parsed.path)
            try:
                if path == "/":
-                    self._send(200, HTML_PAGE.encode("utf-8"), "text/html; charset=utf-8")
+                    html = HTML_PAGE.replace("__ROOT_PREFIX__", self._public_prefix())
+                    self._send(200, html.encode("utf-8"), "text/html; charset=utf-8")
                elif path == "/api/models":
                    self._json(app.models_payload())
                elif path == "/api/profiles":