township: 2-keyframe outcomes, referees, autogen, generation queue; favicons

Township tool (tools/gen_township_fighters.py):
- Outcome videos now generate TWO keyframes per outcome (finish + victory),
  each anchoring its own clip; victory clip uses a dedicated referee shot.
- Referee characters: new role on create form, kept out of fighter pools,
  dressed as officials, attachable per-match and used in victory keyframes.
- Per-match referee selection (new-match form + match editor, persisted).
- Autogenerate buttons on character/referee, environment and new-match forms
  (LLM-filled, editable before create) via /profile/autogen + /matches/autogen.
- Single-worker generation queue: all coderai-bound jobs (create/regen/train/
  match/process) are serialised and surfaced as "queued", with one persistent
  match-detail monitor replacing the competing per-job pollers (fixes the
  blinking progress when two jobs were launched at once).

coderai: favicon.ico served at /favicon.ico + linked in admin/login templates;
bundled township favicon served at /favicon.ico.

Also gitignore large packaging/runtime artifact dirs (.packaging-cache/, tmp/).
Co-Authored-By: 's avatarClaude Opus 4.8 <noreply@anthropic.com>
parent 80f8fe22
# Files/directories excluded from Docker build context for local distribution builds.
.git
.gitignore
# Local virtualenvs and caches
venv*
.venv
__pycache__
*.py[cod]
.pytest_cache
.mypy_cache
.ruff_cache
.cache
.packaging-cache/*
!.packaging-cache/build-manifest.json
# Large runtime/generated data
models
offload
township_output
dist
dist-package
*.log
# Build outputs
build
*.egg-info
......@@ -29,3 +29,7 @@ test_*.py
# Generated township fighter outputs
township_output/
# Packaging build cache + runtime temp (large artifacts)
.packaging-cache/
tmp/
#!/usr/bin/env bash
set -euo pipefail
exec "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/packaging/linux/build_oci_image.sh" "$@"
......@@ -316,8 +316,15 @@ async def chat_page(request: Request, username: str = Depends(require_auth)):
# API endpoints for admin operations
@router.get("/admin/api/status", summary="Server and model status")
async def api_status(username: str = Depends(require_auth)):
"""Get system status."""
def api_status(username: str = Depends(require_auth)):
"""Get system status.
Defined as a SYNC handler on purpose: it reads VRAM via sysfs / a blocking
``lspci`` subprocess (AMD/Intel) and scans registry/queue state. The Tasks
and dashboard pages poll it continuously, so running it on the event loop
would freeze the whole web UI while a model loads (llama.cpp's load releases
the GIL, so a threadpool worker can still run). FastAPI runs plain ``def``
path operations in its threadpool, off the event loop."""
from codai.models.manager import multi_model_manager
from codai.api.state import get_load_mode
......@@ -1571,7 +1578,11 @@ async def api_model_load(request: Request, username: str = Depends(require_admin
model_cfg = m if isinstance(m, dict) else {}
break
result = multi_model_manager.request_model(path, model_type if model_type != "text" else None)
# Offload to a thread: request_model may block (thermal wait / busy model /
# actual load) and would otherwise freeze the whole admin web UI event loop.
result = await asyncio.to_thread(
multi_model_manager.request_model,
path, model_type if model_type != "text" else None)
if result.get("already_loaded"):
return {"success": True, "already_loaded": True}
......@@ -1583,17 +1594,20 @@ async def api_model_load(request: Request, username: str = Depends(require_admin
free_gb = multi_model_manager._get_free_vram_gb()
if needed_gb > 0 and free_gb < needed_gb:
print(f"Admin model-load: need {needed_gb:.1f} GB VRAM, have {free_gb:.1f} GB free — evicting models")
multi_model_manager._evict_models_for_vram(needed_gb)
await asyncio.to_thread(multi_model_manager._evict_models_for_vram, needed_gb)
elif needed_gb == 0 and multi_model_manager.models and free_gb < 4.0:
# Unknown model size but VRAM nearly full — evict everything to avoid OOM on first attempt
print(f"Admin model-load: unknown model size, only {free_gb:.1f} GB free — evicting models proactively")
multi_model_manager.unload_all_models()
await asyncio.to_thread(multi_model_manager.unload_all_models)
# Not loaded yet — trigger actual load
try:
if model_type == "text":
# _load_model_by_name already records the VRAM delta internally
mm = multi_model_manager._load_model_by_name(result["model_name"] or path)
# In a thread: the GGUF/llama load is heavy and would block the admin
# event loop (freezing the whole web UI) if run inline.
# _load_model_by_name already records the VRAM delta internally.
mm = await asyncio.to_thread(
multi_model_manager._load_model_by_name, result["model_name"] or path)
if mm is None:
raise RuntimeError("Model failed to load")
multi_model_manager.models[result["model_key"] or path] = mm
......@@ -1618,15 +1632,15 @@ async def api_model_load(request: Request, username: str = Depends(require_admin
model_key = f"image:{path}"
_snap = multi_model_manager.vram_before_load()
if _is_gguf_model(path):
resolved = multi_model_manager.load_model(path)
resolved = await asyncio.to_thread(multi_model_manager.load_model, path)
import os as _os
if resolved and _os.path.isfile(resolved):
sd_model = _load_sdcpp_model(resolved, global_args)
sd_model = await asyncio.to_thread(_load_sdcpp_model, resolved, global_args)
if sd_model:
multi_model_manager.add_model(model_key, sd_model)
multi_model_manager.record_vram_delta(model_key, _snap)
else:
pipeline = _load_diffusers_pipeline(path, global_args)
pipeline = await asyncio.to_thread(_load_diffusers_pipeline, path, global_args)
if pipeline:
multi_model_manager.add_model(model_key, pipeline)
multi_model_manager.record_vram_delta(model_key, _snap)
......@@ -2029,10 +2043,15 @@ async def api_turboquant_info(username: str = Depends(require_admin)):
# --- Task / queue management ---
@router.get("/admin/api/tasks", summary="List active and recent tasks")
async def api_tasks(username: str = Depends(require_admin)):
def api_tasks(username: str = Depends(require_admin)):
"""Unified live view of long-running work: in-flight / recent generations
(image, video, audio, text) from the task registry, durable LoRA training
jobs, and queued requests waiting for a slot. The Tasks page polls this."""
jobs, and queued requests waiting for a slot. The Tasks page polls this.
SYNC handler on purpose (runs in FastAPI's threadpool, not the event loop):
it reads disk job records, queue/registry state and thermal sensors. Keeping
it off the event loop means the Tasks page stays responsive while a model is
loading (the load releases the GIL during its C call)."""
from codai.tasks import task_registry
from codai.api.loras import list_jobs
from codai.queue.manager import queue_manager
......@@ -2193,10 +2212,14 @@ def _read_vram_info() -> Optional[dict]:
@router.get("/admin/api/system-stats", summary="Live CPU / GPU / RAM / VRAM usage and temperatures")
async def api_system_stats(username: str = Depends(require_admin)):
def api_system_stats(username: str = Depends(require_admin)):
"""Lightweight hardware telemetry for the Tasks page header: CPU & GPU
utilization and temperature, plus RAM and VRAM usage. All fields are
best-effort and may be null when a sensor/metric is unavailable."""
best-effort and may be null when a sensor/metric is unavailable.
SYNC handler on purpose: the temperature/util/VRAM reads hit sysfs and
blocking sensor calls, so it runs in FastAPI's threadpool to avoid freezing
the event loop (and the Tasks page) while a model is loading."""
from codai.models import thermal
# CPU tile = coderai process-tree usage, scaled 100% PER CORE (0..100*cores),
......@@ -2398,6 +2421,10 @@ async def api_get_settings(username: str = Depends(require_admin)):
"jobs": {
"resume_on_restart": c.jobs.resume_on_restart,
},
"enhance": {
"allow_ffmpeg": c.enhance.allow_ffmpeg,
"allow_rife_ncnn": c.enhance.allow_rife_ncnn,
},
"broker": {
"enabled": c.broker.enabled,
"base_url": c.broker.base_url,
......@@ -2562,6 +2589,22 @@ async def api_save_settings(request: Request, username: str = Depends(require_ad
except Exception:
pass
if "enhance" in data:
en = data["enhance"]
if "allow_ffmpeg" in en:
c.enhance.allow_ffmpeg = bool(en["allow_ffmpeg"])
if "allow_rife_ncnn" in en:
c.enhance.allow_rife_ncnn = bool(en["allow_rife_ncnn"])
# Apply live to global_args so the video pipeline honours it immediately.
try:
from codai.api.state import get_global_args
ga = get_global_args()
if ga is not None:
ga.enhance_allow_ffmpeg = c.enhance.allow_ffmpeg
ga.enhance_allow_rife_ncnn = c.enhance.allow_rife_ncnn
except Exception:
pass
if "broker" in data:
bro = data["broker"]
c.broker.enabled = bool(bro.get("enabled", c.broker.enabled))
......
......@@ -4,6 +4,7 @@
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>{% block title %}CoderAI{% endblock %}</title>
<link rel="icon" type="image/x-icon" href="{{ root_path }}/favicon.ico">
<link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link href="https://fonts.googleapis.com/css2?family=Plus+Jakarta+Sans:wght@400;500;600;700&family=JetBrains+Mono:wght@400;500&display=swap" rel="stylesheet">
......
......@@ -4,6 +4,7 @@
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Sign in — CoderAI</title>
<link rel="icon" type="image/x-icon" href="{{ root_path }}/favicon.ico">
<link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link href="https://fonts.googleapis.com/css2?family=Plus+Jakarta+Sans:wght@400;500;600;700&family=JetBrains+Mono:wght@400;500&display=swap" rel="stylesheet">
......
......@@ -75,6 +75,26 @@
</div>
</div>
<!-- Video enhancement (upscale / FPS interpolation) -->
<div class="card mb-0" style="margin-top:1rem">
<div class="card-title">Video Enhancement</div>
<p class="form-hint" style="margin-bottom:.6rem">Upscaling and FPS interpolation run on in-process torch models by default (ESRGAN upscaler, RIFE/FILM interpolator) — no external tools. Enable these only to fall back to external tools when no model is configured.</p>
<div class="form-row" style="margin:0">
<label style="display:flex;align-items:center;gap:.5rem;cursor:pointer">
<input type="checkbox" id="s-allow-ffmpeg">
<span>Allow ffmpeg (frame I/O / minterpolate)</span>
</label>
<span class="form-hint">Off = use PyAV + models only. On = ffmpeg may be used as a fallback.</span>
</div>
<div class="form-row" style="margin-top:.5rem">
<label style="display:flex;align-items:center;gap:.5rem;cursor:pointer">
<input type="checkbox" id="s-allow-rife-ncnn">
<span>Allow rife-ncnn-vulkan binary</span>
</label>
<span class="form-hint">Off = use an in-process RIFE/FILM model. On = the external rife-ncnn-vulkan binary may be used when no model is configured.</span>
</div>
</div>
<!-- Archive -->
<div class="card mb-0" style="margin-top:1rem">
<div class="card-title">Generation Archive</div>
......@@ -336,6 +356,8 @@ async function loadSettings(){
document.getElementById('s-gguf-cache').value = d.models?.gguf_cache_dir ?? '';
document.getElementById('s-offload-dir').value = d.offload?.directory ?? './offload';
document.getElementById('s-tmp-dir').value = d.tmp_dir ?? '';
document.getElementById('s-allow-ffmpeg').checked = !!(d.enhance && d.enhance.allow_ffmpeg);
document.getElementById('s-allow-rife-ncnn').checked = !!(d.enhance && d.enhance.allow_rife_ncnn);
toggleHttps();
// Archive
const arc = d.archive || {};
......@@ -404,6 +426,10 @@ async function saveSettings(){
directory: document.getElementById('s-offload-dir').value.trim() || './offload',
},
tmp_dir: strOrNull('s-tmp-dir'),
enhance:{
allow_ffmpeg: document.getElementById('s-allow-ffmpeg').checked,
allow_rife_ncnn: document.getElementById('s-allow-rife-ncnn').checked,
},
archive:{
enabled: document.getElementById('s-arc-enabled').checked,
directory: document.getElementById('s-arc-dir').value.trim(),
......
# CoderAI - in-process FILM frame interpolation engine.
#
# FILM (Frame Interpolation for Large Motion) is distributed in several forms.
# The most portable torch artifact is a TorchScript module, which we can load and
# run in-process with no extra architecture code. This wrapper loads such a model
# and exposes the same `.interpolate(a, b)` (t=0.5) interface the RIFE engine uses,
# so the dispatcher is engine-agnostic. Higher fps multipliers come from the
# caller's recursive midpoint bisection.
import torch
class _FilmWrapper:
"""Adapts a loaded FILM module to `.interpolate(img0, img1) -> mid` at t=0.5.
FILM ports expose a few call conventions; we try the common ones and cache
whichever works for this module."""
def __init__(self, module, device):
self.m = module
self.device = device
self._call = None
@torch.no_grad()
def interpolate(self, img0, img1):
t = torch.full((img0.shape[0], 1), 0.5, device=img0.device, dtype=img0.dtype)
candidates = [
lambda: self.m({"x0": img0, "x1": img1, "time": t})["image"],
lambda: self.m({"x0": img0, "x1": img1, "time": t[:, :1, None, None]})["image"],
lambda: self.m(img0, img1, 0.5),
lambda: self.m(img0, img1, t),
lambda: self.m(img0, img1),
]
if self._call is not None:
return self._call()
last = None
for c in candidates:
try:
out = c()
if isinstance(out, dict):
out = out.get("image") or next(iter(out.values()))
self._call = c
return out
except Exception as e: # try the next calling convention
last = e
raise RuntimeError(f"FILM module call failed (incompatible signature): {last}")
def load_film(weights_path: str, device):
"""Load a FILM model. Supports a TorchScript module (.pt/.pth saved via
torch.jit) — the portable in-process form. Raises with guidance otherwise."""
try:
module = torch.jit.load(weights_path, map_location=device)
module.eval()
return _FilmWrapper(module, device)
except Exception as e:
raise RuntimeError(
"FILM weights must be a TorchScript module for in-process use "
f"(torch.jit.load failed: {e}). Provide a scripted FILM model, or use "
"a RIFE interpolation model instead.")
# CoderAI - in-process RIFE (IFNet_HDv3) frame interpolation.
# Vendored architecture matching the RIFE flownet.pkl weights (3 student IFBlocks
# of c=90, input 11ch = warped0+warped1+mask+flow; block_tea is training-only and
# unused at inference). Runs entirely in-process on torch — no subprocess.
import torch
import torch.nn as nn
import torch.nn.functional as F
def _conv(in_planes, out_planes, kernel_size=3, stride=1, padding=1, dilation=1):
return nn.Sequential(
nn.Conv2d(in_planes, out_planes, kernel_size, stride,
padding, dilation=dilation, bias=True),
nn.PReLU(out_planes),
)
class IFBlock(nn.Module):
def __init__(self, in_planes, c=64):
super().__init__()
self.conv0 = nn.Sequential(
_conv(in_planes, c // 2, 3, 2, 1),
_conv(c // 2, c, 3, 2, 1),
)
self.convblock0 = nn.Sequential(_conv(c, c), _conv(c, c))
self.convblock1 = nn.Sequential(_conv(c, c), _conv(c, c))
self.convblock2 = nn.Sequential(_conv(c, c), _conv(c, c))
self.convblock3 = nn.Sequential(_conv(c, c), _conv(c, c))
self.conv1 = nn.Sequential(
nn.ConvTranspose2d(c, c // 2, 4, 2, 1),
nn.PReLU(c // 2),
nn.ConvTranspose2d(c // 2, 4, 4, 2, 1),
)
self.conv2 = nn.Sequential(
nn.ConvTranspose2d(c, c // 2, 4, 2, 1),
nn.PReLU(c // 2),
nn.ConvTranspose2d(c // 2, 1, 4, 2, 1),
)
def forward(self, x, flow, scale=1):
x = F.interpolate(x, scale_factor=1. / scale, mode="bilinear",
align_corners=False, recompute_scale_factor=False)
flow = F.interpolate(flow, scale_factor=1. / scale, mode="bilinear",
align_corners=False, recompute_scale_factor=False) * (1. / scale)
feat = self.conv0(torch.cat((x, flow), 1))
feat = self.convblock0(feat) + feat
feat = self.convblock1(feat) + feat
feat = self.convblock2(feat) + feat
feat = self.convblock3(feat) + feat
flow = self.conv1(feat)
mask = self.conv2(feat)
flow = F.interpolate(flow, scale_factor=scale, mode="bilinear",
align_corners=False, recompute_scale_factor=False) * scale
mask = F.interpolate(mask, scale_factor=scale, mode="bilinear",
align_corners=False, recompute_scale_factor=False)
return flow, mask
_backwarp_cache = {}
def _warp(img, flow):
"""Backward-warp img by flow (B,2,H,W) via grid_sample."""
B, _, H, W = img.shape
dev = img.device
key = (B, H, W, dev)
grid = _backwarp_cache.get(key)
if grid is None:
hor = torch.linspace(-1.0, 1.0, W, device=dev).view(1, 1, 1, W).expand(B, -1, H, -1)
ver = torch.linspace(-1.0, 1.0, H, device=dev).view(1, 1, H, 1).expand(B, -1, -1, W)
grid = torch.cat([hor, ver], 1) # (B,2,H,W)
_backwarp_cache[key] = grid
flow = torch.cat([flow[:, 0:1] / ((W - 1.0) / 2.0),
flow[:, 1:2] / ((H - 1.0) / 2.0)], 1)
g = (grid + flow).permute(0, 2, 3, 1)
return F.grid_sample(img, g, mode="bilinear", padding_mode="border",
align_corners=True)
class IFNet(nn.Module):
"""RIFE IFNet_HDv3 — predicts bidirectional flow + blend mask and returns the
t=0.5 interpolated frame. Higher fps multipliers are produced by recursive
bisection (interpolate the midpoints) by the caller."""
def __init__(self):
super().__init__()
self.block0 = IFBlock(11, c=90)
self.block1 = IFBlock(11, c=90)
self.block2 = IFBlock(11, c=90)
self.block_tea = IFBlock(14, c=90) # training-only; loaded but unused
@torch.no_grad()
def interpolate(self, img0, img1, scale_list=(4, 2, 1)):
B, _, H, W = img0.shape
flow = torch.zeros(B, 4, H, W, device=img0.device, dtype=img0.dtype)
mask = torch.zeros(B, 1, H, W, device=img0.device, dtype=img0.dtype)
warped0, warped1 = img0, img1
for i, block in enumerate((self.block0, self.block1, self.block2)):
fd, md = block(torch.cat((warped0, warped1, mask), 1), flow,
scale=scale_list[i])
flow = flow + fd
mask = mask + md
warped0 = _warp(img0, flow[:, :2])
warped1 = _warp(img1, flow[:, 2:4])
m = torch.sigmoid(mask)
return warped0 * m + warped1 * (1 - m)
def load_ifnet(weights_path: str, device):
"""Build IFNet and load RIFE flownet weights (.pkl/.pth, possibly 'module.'-
prefixed). Returns the eval model on `device`."""
net = IFNet()
sd = torch.load(weights_path, map_location="cpu", weights_only=False)
if isinstance(sd, dict) and "state_dict" in sd and isinstance(sd["state_dict"], dict):
sd = sd["state_dict"]
clean = {}
for k, v in sd.items():
nk = k[7:] if k.startswith("module.") else k
clean[nk] = v
net.load_state_dict(clean, strict=True)
net.eval().to(device)
return net
......@@ -188,6 +188,17 @@ admin_static_dir = Path(__file__).parent.parent / "admin" / "static"
if admin_static_dir.exists():
app.mount("/static/admin", StaticFiles(directory=str(admin_static_dir)), name="admin_static")
# Serve a favicon at the conventional /favicon.ico path so browsers stop 404-ing on it.
from fastapi.responses import FileResponse, Response as _FaviconResponse
_favicon_path = admin_static_dir / "favicon.ico"
@app.get("/favicon.ico", include_in_schema=False)
async def favicon():
if _favicon_path.exists():
return FileResponse(str(_favicon_path), media_type="image/x-icon")
return _FaviconResponse(status_code=404)
# Include routers from submodules
app.include_router(transcriptions_router, tags=["Audio"])
app.include_router(images_router, tags=["Images"])
......
......@@ -724,7 +724,8 @@ async def chat_completions(request: ChatCompletionRequest, http_request: Request
# Fallback: non-streaming
if get_global_debug():
print(f"DEBUG: Using non-streaming fallback for first pass")
first_pass_result = current_manager.generate(
first_pass_result = await asyncio.to_thread(
current_manager.generate,
prompt=raw_prompt_for_generation,
max_tokens=request.max_tokens or 2048,
temperature=request.temperature,
......@@ -744,7 +745,8 @@ async def chat_completions(request: ChatCompletionRequest, http_request: Request
if get_global_debug():
print(f"DEBUG: raw_stream_generate second pass, full_prompt length: {len(full_prompt)}")
second_pass_result = current_manager.generate(
second_pass_result = await asyncio.to_thread(
current_manager.generate,
prompt=full_prompt,
max_tokens=request.max_tokens or 2048,
temperature=request.temperature,
......@@ -882,7 +884,8 @@ async def chat_completions(request: ChatCompletionRequest, http_request: Request
# Non-streaming path (already implemented above)
# First pass: generate until reasoning close tag
first_pass_result = current_manager.generate(
first_pass_result = await asyncio.to_thread(
current_manager.generate,
prompt=raw_prompt_for_generation,
max_tokens=request.max_tokens or 2048,
temperature=request.temperature,
......@@ -991,7 +994,8 @@ async def chat_completions(request: ChatCompletionRequest, http_request: Request
generated_text = reasoning_text + (close_tag or "") + final_text
else:
# Need second pass to get answer
second_pass_result = current_manager.generate(
second_pass_result = await asyncio.to_thread(
current_manager.generate,
prompt=full_prompt,
max_tokens=request.max_tokens or 2048,
temperature=request.temperature,
......@@ -1633,7 +1637,8 @@ async def generate_chat_response(
try:
# Use generate_chat for proper chat template handling
generated_text = current_manager.generate_chat(
generated_text = await asyncio.to_thread(
current_manager.generate_chat,
messages=messages,
max_tokens=max_tokens,
temperature=temperature,
......@@ -1966,7 +1971,8 @@ async def generate_completion_response(
created = int(time.time())
try:
generated_text = current_manager.generate(
generated_text = await asyncio.to_thread(
current_manager.generate,
prompt=prompt,
max_tokens=max_tokens,
temperature=temperature,
......
......@@ -184,6 +184,44 @@ def _encode_mp4_pyav(frames, fps: int, crf: int) -> bytes:
return data
class _Mp4Writer:
"""Streaming H.264 MP4 writer via PyAV — add(frame) one at a time so a large
(e.g. 4×-upscaled) sequence never has to be held fully in memory. In-process,
no ffmpeg."""
def __init__(self, path: str, fps, crf: int = 18):
import av
self._av = av
self.container = av.open(path, mode='w')
self.fps = int(fps) or 1
self.crf = int(crf)
self.stream = None
def add(self, arr):
import numpy as np
if arr.dtype != np.uint8:
arr = np.clip(arr, 0, 255).astype(np.uint8)
if arr.ndim == 2:
arr = np.stack([arr] * 3, axis=-1)
arr = arr[..., :3]
if self.stream is None:
h, w = arr.shape[:2]
s = self.container.add_stream('libx264', rate=self.fps)
s.width, s.height, s.pix_fmt = int(w), int(h), 'yuv420p'
s.options = {'crf': str(self.crf)}
self.stream = s
vframe = self._av.VideoFrame.from_ndarray(arr, format='rgb24')
for pkt in self.stream.encode(vframe):
self.container.mux(pkt)
def close(self):
try:
if self.stream is not None:
for pkt in self.stream.encode():
self.container.mux(pkt)
finally:
self.container.close()
def _enc_dbg(msg: str) -> None:
"""Verbose encode-path logging (only when --debug is active)."""
try:
......@@ -2008,7 +2046,9 @@ def _postprocess_video(mp4_bytes: bytes, request: VideoGenerationRequest,
request.upscale_factor or 2, temp_paths)
if request.interpolate_output and request.fps_multiplier:
path = _rife_interpolate(path, request.fps_multiplier, temp_paths)
# None → auto-select a configured in-process interpolation model.
path = _interpolate_video(path, request.fps_multiplier, temp_paths,
getattr(request, "interpolation_model", None))
if request.add_audio:
path = _add_audio_to_video(path, request, temp_paths)
......@@ -2096,55 +2136,51 @@ def _model_upscale_video(model_name: str, in_path: str, factor: int,
# Cache only a real, loaded upscaler so it's reused across frames/requests.
_UPSCALER_CACHE[resolved] = upscaler
fps = _video_fps(in_path)
frames_dir = tempfile.mkdtemp()
out_dir = tempfile.mkdtemp()
temps += [frames_dir, out_dir]
r = subprocess.run(_ffmpeg(['-i', in_path, f'{frames_dir}/%08d.png']),
capture_output=True, preexec_fn=_cpu_affinity_preexec())
if r.returncode != 0:
raise RuntimeError(
f"frame extraction failed: {r.stderr.decode(errors='replace')}")
import glob
frame_files = sorted(glob.glob(f'{frames_dir}/*.png'))
if not frame_files:
raise RuntimeError("no frames extracted from video")
_log.info("video upscale ×%d via %s (%s) — %d frames",
factor, resolved, backend, len(frame_files))
# Decode frames in-process (PyAV — no ffmpeg subprocess).
import io, numpy as np
from PIL import Image as _PILImage
frames, fps = _decode_video_pyav(in_path)
if not frames:
raise RuntimeError("no frames decoded from video")
n = len(frames)
_log.info("video upscale ×%d via %s (%s) — %d frames (in-process)",
factor, resolved, backend, n)
# Register as a first-class task so it shows in the CoderAI task list and can
# be paused/cancelled like any other model job. (Thermal protection already
# ran at request_model() time, same as every other model.)
_tid = task_registry.register(
"upscale", title=f"Upscale ×{factor} ({len(frame_files)} frames)",
model=resolved, total=len(frame_files))
"upscale", title=f"Upscale ×{factor} ({n} frames)",
model=resolved, total=n)
task_registry.start(_tid)
# Publish per-frame progress through the shared video-progress channel so
# clients polling /v1/video/progress can show "frame X/Y" while we work.
_vid_progress_reset(len(frame_files))
_vid_progress_reset(n)
_vid_progress["phase"] = "upscaling"
_vid_progress["model"] = resolved
# Re-check thermals periodically through the frame loop: request_model()
# only waited once at job start, but an upscale runs the GPU hard for many
# frames over minutes, so a long clip could heat up mid-run. Pause to cool
# every _THERMAL_EVERY frames, exactly like a fresh request would.
# Re-check thermals periodically through the frame loop (request_model() only
# waited once at job start; a long upscale runs the GPU hard for minutes).
_THERMAL_EVERY = 32
try:
from codai.models.thermal import wait_until_safe as _wait_until_safe
except Exception:
_wait_until_safe = None
# Stream upscaled frames straight into the encoder so the (large, ×factor)
# output never has to be fully resident in memory. Audio is not carried in the
# pure in-process path (the enhancement targets are silent clips).
out = tempfile.mktemp(suffix='_up.mp4')
temps.append(out)
writer = _Mp4Writer(out, fps)
try:
for i, fp in enumerate(frame_files):
for i, arr in enumerate(frames):
task_registry.raise_if_cancelled(_tid)
task_registry.wait_if_paused(_tid)
if _wait_until_safe and i and i % _THERMAL_EVERY == 0:
try:
_wait_until_safe(context=f"upscale:{resolved}")
except Exception:
pass # never let thermal monitoring block the upscale
with open(fp, 'rb') as f:
out_img = _run_upscale(upscaler, f.read(), factor)
out_img.save(os.path.join(out_dir, os.path.basename(fp)))
pass
buf = io.BytesIO()
_PILImage.fromarray(arr).save(buf, format="PNG")
out_img = _run_upscale(upscaler, buf.getvalue(), factor)
writer.add(np.asarray(out_img.convert("RGB")))
task_registry.step(_tid, i + 1)
_vid_progress_step(i + 1)
except TaskCancelled:
......@@ -2156,19 +2192,8 @@ def _model_upscale_video(model_name: str, in_path: str, factor: int,
else:
task_registry.finish(_tid, "done")
finally:
writer.close()
_vid_progress_done()
out = tempfile.mktemp(suffix='_up.mp4')
temps.append(out)
cmd = _ffmpeg(['-framerate', f'{fps}', '-i', f'{out_dir}/%08d.png'])
# Carry the original audio track if present.
cmd += ['-i', in_path, '-map', '0:v:0', '-map', '1:a:0?',
'-c:v', 'libx264', '-pix_fmt', 'yuv420p', '-c:a', 'copy',
'-shortest', out]
r = subprocess.run(cmd, capture_output=True, preexec_fn=_cpu_affinity_preexec())
if r.returncode != 0:
raise RuntimeError(
f"reassembly failed: {r.stderr.decode(errors='replace')}")
return out
......@@ -2184,6 +2209,169 @@ def _count_video_frames(path: str) -> int:
return 0
def _enhance_allow_ffmpeg() -> bool:
return bool(getattr(global_args, "enhance_allow_ffmpeg", False))
def _enhance_allow_rife_ncnn() -> bool:
return bool(getattr(global_args, "enhance_allow_rife_ncnn", False))
def _decode_video_pyav(path: str):
"""Decode a video to a list of RGB uint8 frames + its fps, fully in-process
via PyAV (no ffmpeg subprocess). Returns (frames, fps)."""
import av, numpy as np
frames = []
container = av.open(path)
try:
st = container.streams.video[0]
fps = float(st.average_rate or st.guessed_rate or 0) or 0.0
for frame in container.decode(video=0):
frames.append(frame.to_ndarray(format="rgb24"))
finally:
container.close()
if not fps:
fps = 8.0
return frames, fps
# ── In-process frame interpolation (torch models: RIFE / FILM) ────────────────
_INTERP_CACHE: dict = {}
def _resolve_interp_weights(model_name: str):
"""Local weights path for an interpolation model id, or a HF repo download.
Accepts a .pkl/.pth/.safetensors file, a dir containing one, or a HF repo id
(prefers a flownet*/rife* weight). Returns a path or None."""
import os as _os
exts = ('.pkl', '.pth', '.safetensors', '.pt')
if _os.path.isfile(model_name) and model_name.lower().endswith(exts):
return model_name
if _os.path.isdir(model_name):
cands = [f for f in sorted(_os.listdir(model_name)) if f.lower().endswith(exts)]
cands.sort(key=lambda f: (0 if 'flownet' in f.lower() else 1, len(f)))
return _os.path.join(model_name, cands[0]) if cands else None
try:
from huggingface_hub import list_repo_files, hf_hub_download
files = [f for f in list_repo_files(model_name) if f.lower().endswith(exts)]
if not files:
return None
files.sort(key=lambda f: (0 if 'flownet' in f.lower() else
(1 if 'rife' in f.lower() or 'ifnet' in f.lower() else 2),
len(f)))
return hf_hub_download(model_name, files[0])
except Exception:
return None
def _load_interpolator(model_name: str, device):
"""Load an in-process interpolation engine. Returns (engine, obj):
• ('rife', IFNet) — RIFE IFNet (default).
• ('film', model) — FILM (if a torch FILM is resolvable).
Raises if weights can't be resolved/loaded so the caller can surface it."""
key = f"{model_name}@{device}"
cached = _INTERP_CACHE.get(key)
if cached is not None:
return cached
n = (model_name or "").lower()
wp = _resolve_interp_weights(model_name)
if wp is None:
raise RuntimeError(f"no interpolation weights found for '{model_name}'")
if "film" in n:
# FILM torch engine — supported via a HF torch checkpoint when present.
from codai.api._film_net import load_film
eng = ("film", load_film(wp, device))
else:
from codai.api._rife_ifnet import load_ifnet
eng = ("rife", load_ifnet(wp, device))
_INTERP_CACHE[key] = eng
return eng
def _interp_pair(engine, a, b, depth: int):
"""Return the ordered intermediate frames between tensors a,b via recursive
midpoint bisection (2**depth subdivisions → 2**depth - 1 frames)."""
kind, net = engine
if depth <= 0:
return []
if kind == "rife":
mid = net.interpolate(a, b)
else:
mid = net.interpolate(a, b) # FILM exposes the same t=0.5 interface
return _interp_pair(engine, a, mid, depth - 1) + [mid] + _interp_pair(engine, mid, b, depth - 1)
def _interpolate_inprocess(path, multiplier, model_name, temps, tid=None, output_fps=None):
"""Raise FPS with an in-process torch interpolation model (RIFE/FILM). Decodes
+ encodes with PyAV — no ffmpeg, no subprocess. Returns the output path.
`output_fps` sets the final encode rate; None = source_fps × multiplier, which
preserves the original duration/motion speed."""
import torch, numpy as np, math, logging
_log = logging.getLogger(__name__)
device = _derive_device() if torch.cuda.is_available() else "cpu"
engine = _load_interpolator(model_name, device)
frames, fps = _decode_video_pyav(path)
if len(frames) < 2:
raise RuntimeError("need at least 2 frames to interpolate")
# Multiplier → bisection depth (powers of two are exact; others round).
depth = max(1, int(round(math.log2(max(2, int(multiplier))))))
eff_mult = 2 ** depth
out_frames_total = (len(frames) - 1) * eff_mult + 1
_vid_progress_reset(out_frames_total)
_vid_progress["phase"] = "interpolating"
_vid_progress["model"] = model_name
_therm = None
try:
from codai.models.thermal import wait_until_safe as _wus
_therm = _wus
except Exception:
_therm = None
def _to_t(a):
return torch.from_numpy(a.astype(np.float32) / 255.0).permute(2, 0, 1).unsqueeze(0).to(device)
def _to_np(t):
return (t[0].permute(1, 2, 0).clamp(0, 1).cpu().numpy() * 255).round().astype(np.uint8)
def _pad(t):
_, _, h, w = t.shape
ph, pw = (32 - h % 32) % 32, (32 - w % 32) % 32
return torch.nn.functional.pad(t, (0, pw, 0, ph), mode="replicate"), h, w
out = []
done = 0
for i in range(len(frames) - 1):
if tid is not None:
task_registry.raise_if_cancelled(tid)
task_registry.wait_if_paused(tid)
# Re-check thermals through the loop (each pair is several RIFE passes at
# higher multipliers); wait_until_safe blocks until the GPU/CPU cool.
if _therm and i and i % 4 == 0:
try: _therm(context=f"interpolate:{model_name}")
except Exception: pass
a, h, w = _pad(_to_t(frames[i]))
b, _, _ = _pad(_to_t(frames[i + 1]))
mids = [_to_np(m[:, :, :h, :w]) for m in _interp_pair(engine, a, b, depth)]
out.append(frames[i]); out.extend(mids)
done += 1 + len(mids)
_vid_progress_step(done)
if tid is not None:
try: task_registry.step(tid, done)
except Exception: pass
out.append(frames[-1])
_enc_fps = int(output_fps) if output_fps else (int(round(fps * eff_mult)) or 1)
data = _encode_mp4_pyav(out, max(1, _enc_fps), 18)
out_path = tempfile.mktemp(suffix="_interp.mp4")
temps.append(out_path)
with open(out_path, "wb") as f:
f.write(data)
return out_path
def _cpu_thread_limit() -> int:
"""CPU thread budget for helper subprocesses (ffmpeg, rife I/O). Mirrors
coderai's global cap (half the cores, set as OMP_NUM_THREADS at import) so
......@@ -2319,12 +2507,85 @@ def _rife_gpu_id(rife_bin: str) -> int:
return gid
def _rife_interpolate(path: str, multiplier: int, temps: list) -> str:
"""Raise a video's FPS via the RIFE neural frame interpolator (AI, on the GPU).
def _ffmpeg_minterpolate(path: str, multiplier: int, temps: list) -> str:
"""ffmpeg minterpolate FPS raise — GATED behind enhance.allow_ffmpeg (it is a
non-model CPU filter). Thread-capped + CPU-pinned. Returns the output path."""
out = tempfile.mktemp(suffix='_mint.mp4')
temps.append(out)
import logging
_log = logging.getLogger(__name__)
in_fps = _video_fps(path)
cmd = _ffmpeg(['-i', path, '-filter:v',
f'minterpolate=fps={int(round(in_fps * multiplier)) or (multiplier * 8)}',
'-c:a', 'copy', out])
r = subprocess.run(cmd, capture_output=True, preexec_fn=_cpu_affinity_preexec())
if r.returncode != 0 or not os.path.exists(out):
raise RuntimeError(f"ffmpeg minterpolate failed: {r.stderr.decode(errors='replace')}")
return out
AI-or-fail by design: there is NO ffmpeg/minterpolate fallback. If no neural
interpolator is available the call raises so the caller surfaces the error,
exactly like the AI upscaler — interpolation must be a real AI op on CoderAI."""
def _interpolate_video(path: str, multiplier: int, temps: list,
model_name: str = None, output_fps: int = None) -> str:
"""Raise a video's FPS. Default = an in-process torch interpolation MODEL
(RIFE/FILM), decoding/encoding with PyAV — no subprocess, no ffmpeg. The
external rife-ncnn-vulkan binary and ffmpeg minterpolate are only used when
explicitly enabled in Settings (enhance.allow_rife_ncnn / allow_ffmpeg).
By default the output is encoded at fps × multiplier so real duration and
motion speed are preserved (a 20fps clip ×2 plays at 40fps). `output_fps`
overrides that final encode rate."""
import logging
_log = logging.getLogger(__name__)
# 1. In-process model (preferred). Use the requested model, else auto-select a
# configured interpolation model (capability video_interpolation).
name = (model_name or "").strip()
if not name:
try:
name = multi_model_manager.find_capable_model("video_interpolation") or ""
except Exception:
name = ""
if name:
try:
info = multi_model_manager.request_model(name, model_type="image")
resolved = (info or {}).get("model_name") or name
except Exception:
resolved = name
in_frames = _count_video_frames(path)
depth = max(1, int(round(__import__("math").log2(max(2, int(multiplier))))))
out_total = (max(1, in_frames) - 1) * (2 ** depth) + 1
tid = task_registry.register(
"interpolate", title=f"Interpolate ×{multiplier} FPS ({resolved})",
model=resolved, total=out_total)
task_registry.start(tid)
try:
out = _interpolate_inprocess(path, multiplier, resolved, temps, tid,
output_fps=output_fps)
except TaskCancelled:
task_registry.finish(tid, "cancelled"); raise
except Exception as e:
task_registry.finish(tid, "error", str(e)[:200])
raise
else:
task_registry.finish(tid, "done")
return out
finally:
_vid_progress_done()
# 2. External tools — only if explicitly enabled in Settings.
if _enhance_allow_rife_ncnn() and _find_rife_binary():
_log.info("interpolation: no model configured — using enabled rife-ncnn-vulkan")
return _rife_ncnn_interpolate(path, multiplier, temps)
if _enhance_allow_ffmpeg():
_log.info("interpolation: no model configured — using enabled ffmpeg minterpolate")
return _ffmpeg_minterpolate(path, multiplier, temps)
raise RuntimeError(
"No in-process interpolation model is configured and external tools are "
"disabled. Configure a RIFE/FILM model (capability 'video_interpolation', "
"e.g. AlexWortega/RIFE), or enable ffmpeg / rife-ncnn-vulkan in Settings.")
def _rife_ncnn_interpolate(path: str, multiplier: int, temps: list) -> str:
"""Raise a video's FPS via the EXTERNAL rife-ncnn-vulkan binary. Gated — only
reached when enhance.allow_rife_ncnn is enabled and no in-process model is set."""
out = tempfile.mktemp(suffix='_rife.mp4')
temps.append(out)
import logging
......@@ -2333,9 +2594,8 @@ def _rife_interpolate(path: str, multiplier: int, temps: list) -> str:
rife_bin = _find_rife_binary()
if not rife_bin:
raise RuntimeError(
"No AI frame interpolator available: 'rife-ncnn-vulkan' is not installed. "
"FPS interpolation must run on an AI model (no ffmpeg fallback) — install "
"rife-ncnn-vulkan, or disable FPS interpolation (fps_multiplier).")
"rife-ncnn-vulkan is enabled but not installed. Install it, configure "
"an in-process RIFE/FILM model instead, or disable FPS interpolation.")
# The release ships its model folders next to the binary; resolve an absolute
# model path so it's found regardless of the server's CWD.
_rife_root = os.path.dirname(os.path.realpath(rife_bin))
......@@ -2398,16 +2658,20 @@ def _rife_interpolate(path: str, multiplier: int, temps: list) -> str:
stderr=subprocess.PIPE, preexec_fn=_aff)
_stop = threading.Event()
_paused = {"v": False}
# Sample thermals every ~2s (the temp-read cache TTL) rather than the
# cooldown poll_seconds (often 5–10s) so we react to GPU spikes fast.
_THERM_EVERY = 2.0
def _watch():
_last_t = 0.0
_last_t, _hot = 0.0, False
_g = {"gt": None, "ct": None}
while not _stop.is_set():
n = len(_glob.glob(f'{out_dir}/*.png'))
_vid_progress_step(min(n, out_frames) if out_frames else n)
if out_frames:
try: task_registry.step(_tid, min(n, out_frames))
except Exception: pass
# Cancellation → kill the process.
# Cancellation → resume (so it can die) then kill the process.
try:
task_registry.raise_if_cancelled(_tid)
except TaskCancelled:
......@@ -2416,29 +2680,40 @@ def _rife_interpolate(path: str, multiplier: int, temps: list) -> str:
_stop.set(); break
except Exception:
pass
# Thermal gate (every poll_seconds): pause/resume the subprocess.
# Thermal sample with hysteresis: latch hot at *_high, clear only
# once cooled to *_resume.
now = _time.monotonic()
if _ts is not None and (now - _last_t) >= float(getattr(_ts, "poll_seconds", 5.0)):
if _ts is not None and (now - _last_t) >= _THERM_EVERY:
_last_t = now
gt = _rgt() if (_rgt and _ts.gpu_enabled) else None
ct = _rct() if (_rct and _ts.cpu_enabled) else None
too_hot = ((gt is not None and gt >= _ts.gpu_high) or
(ct is not None and ct >= _ts.cpu_high))
cool = ((gt is None or gt <= _ts.gpu_resume) and
(ct is None or ct <= _ts.cpu_resume))
if too_hot and not _paused["v"]:
try:
proc.send_signal(signal.SIGSTOP); _paused["v"] = True
_vid_progress["phase"] = "interpolating (thermal pause)"
_g["gt"], _g["ct"] = gt, ct
if not _hot:
_hot = ((gt is not None and gt >= _ts.gpu_high) or
(ct is not None and ct >= _ts.cpu_high))
else:
_hot = not ((gt is None or gt <= _ts.gpu_resume) and
(ct is None or ct <= _ts.cpu_resume))
# Manual pause (Tasks page) also SIGSTOPs the external process.
try: _manual = bool(task_registry.is_paused(_tid))
except Exception: _manual = False
_should = _hot or _manual
if _should and not _paused["v"]:
try:
proc.send_signal(signal.SIGSTOP); _paused["v"] = True
_vid_progress["phase"] = ("interpolating (thermal pause)"
if _hot else "interpolating (paused)")
if _hot:
import logging
logging.getLogger(__name__).warning(
"rife paused — too hot (GPU %s / CPU %s °C)", gt, ct)
except Exception: pass
elif _paused["v"] and cool:
try:
proc.send_signal(signal.SIGCONT); _paused["v"] = False
_vid_progress["phase"] = "interpolating"
except Exception: pass
"rife-ncnn paused — too hot (GPU %s / CPU %s °C)",
_g["gt"], _g["ct"])
except Exception: pass
elif not _should and _paused["v"]:
try:
proc.send_signal(signal.SIGCONT); _paused["v"] = False
_vid_progress["phase"] = "interpolating"
except Exception: pass
_stop.wait(0.5)
_w = threading.Thread(target=_watch, daemon=True); _w.start()
......@@ -3285,21 +3560,23 @@ async def video_interpolate(request: VideoInterpolateRequest, http_request: Requ
temps.append(in_path)
elif request.init_image and request.end_image:
# Build a 2-frame video from the two images, then interpolate
from PIL import Image as PILImage
import numpy as np, imageio
import numpy as np
img1 = _pil_from_b64(request.init_image)
img2 = _pil_from_b64(request.end_image)
in_path = tempfile.mktemp(suffix='.mp4')
temps.append(in_path)
imageio.mimsave(in_path, [np.array(img1), np.array(img2)],
fps=2, codec='libx264')
# Build the 2-frame source in-process via PyAV (no ffmpeg).
with open(in_path, 'wb') as _fh:
_fh.write(_encode_mp4_pyav([np.array(img1.convert('RGB')),
np.array(img2.convert('RGB'))], 2, 18))
else:
raise HTTPException(status_code=400,
detail="Provide either video or init_image + end_image")
mult = request.fps_multiplier or 2
out_path = await asyncio.get_event_loop().run_in_executor(
None, _rife_interpolate, in_path, mult, temps)
None, _interpolate_video, in_path, mult, temps, request.model,
request.output_fps)
with open(out_path, 'rb') as f:
out_bytes = f.read()
......
......@@ -143,6 +143,45 @@ def _install_layer_log_callback():
return _cb # caller must hold this reference
async def _aiter_blocking(sync_iter):
"""Bridge a blocking (sync) generator onto the asyncio event loop.
llama.cpp's create_(chat_)completion returns a *synchronous* generator whose
first ``next()`` runs the whole prompt prefill and every subsequent ``next()``
runs a full token forward pass. Iterating it directly inside an ``async def``
runs that work on the event loop and freezes every other HTTP request (the
whole web UI) for the duration of a completion.
This pulls one item at a time from a worker thread via ``asyncio.to_thread``
so the loop stays responsive between (and during, since llama.cpp releases the
GIL while computing) token steps. Closing the async generator — e.g. on client
disconnect or task cancellation — closes the underlying sync generator, which
stops llama.cpp at the next token boundary, matching the old inline ``break``.
"""
import asyncio
_SENT = object()
def _next():
try:
return next(sync_iter)
except StopIteration:
return _SENT
try:
while True:
item = await asyncio.to_thread(_next)
if item is _SENT:
break
yield item
finally:
close = getattr(sync_iter, "close", None)
if close is not None:
try:
close()
except Exception:
pass
class VulkanBackend(ModelBackend):
"""Backend for Vulkan (AMD GPUs) using llama-cpp-python with GGUF models."""
......@@ -891,8 +930,8 @@ class VulkanBackend(ModelBackend):
first_chunk = True
prompt_len = len(prompt) if isinstance(prompt, str) else 0
for chunk in self.model.create_completion(
async for chunk in _aiter_blocking(self.model.create_completion(
stopping_criteria=_make_llama_thermal_criteria(),
prompt=prompt,
max_tokens=max_tokens,
......@@ -903,9 +942,9 @@ class VulkanBackend(ModelBackend):
stop=stop,
stream=True,
grammar=use_grammar,
):
)):
text = chunk['choices'][0].get('text', '')
if first_chunk:
# Skip the prompt text on first chunk
# The first chunk includes the full prompt plus the first new token
......@@ -931,8 +970,8 @@ class VulkanBackend(ModelBackend):
try:
first_chunk = True
prompt_len = len(prompt) if isinstance(prompt, str) else 0
for chunk in self.model.create_completion(
async for chunk in _aiter_blocking(self.model.create_completion(
stopping_criteria=_make_llama_thermal_criteria(),
prompt=prompt,
max_tokens=max_tokens,
......@@ -942,7 +981,7 @@ class VulkanBackend(ModelBackend):
repeat_penalty=repeat_penalty,
stop=stop,
stream=True,
):
)):
text = chunk['choices'][0].get('text', '')
if first_chunk:
......@@ -1001,8 +1040,8 @@ class VulkanBackend(ModelBackend):
async def generate_stream():
first_chunk = True
prompt_len = len(prompt)
for chunk in self.model.create_completion(
async for chunk in _aiter_blocking(self.model.create_completion(
stopping_criteria=_make_llama_thermal_criteria(),
prompt=prompt,
max_tokens=max_tokens,
......@@ -1011,7 +1050,7 @@ class VulkanBackend(ModelBackend):
repeat_penalty=repeat_penalty,
stop=stop,
stream=True,
):
)):
text = chunk['choices'][0].get('text', '')
if first_chunk:
......@@ -1181,7 +1220,7 @@ class VulkanBackend(ModelBackend):
prompt_tokens = 0
completion_tokens = 0
try:
for chunk in self.model.create_chat_completion(**kwargs):
async for chunk in _aiter_blocking(self.model.create_chat_completion(**kwargs)):
delta = chunk['choices'][0].get('delta', {})
text = delta.get('content') or ''
if text:
......
......@@ -144,6 +144,17 @@ class JobsConfig:
resume_on_restart: bool = True
@dataclass
class EnhanceConfig:
"""Video enhancement (upscale / FPS interpolation) tool policy.
By default these run fully in-process on torch models (ESRGAN upscaler, RIFE/
FILM interpolator) — no subprocess, no ffmpeg. The flags below OPT IN to the
external tools as alternatives when no model is configured/preferred."""
allow_ffmpeg: bool = False # allow ffmpeg (frame I/O / minterpolate) instead of PyAV+model
allow_rife_ncnn: bool = False # allow the external rife-ncnn-vulkan binary instead of a torch model
@dataclass
class Config:
"""Main configuration class."""
......@@ -158,6 +169,7 @@ class Config:
archive: ArchiveConfig = field(default_factory=ArchiveConfig)
thermal: ThermalConfig = field(default_factory=ThermalConfig)
jobs: JobsConfig = field(default_factory=JobsConfig)
enhance: EnhanceConfig = field(default_factory=EnhanceConfig)
broker: BrokerConfig = field(default_factory=BrokerConfig)
system_prompt: Optional[str] = None
tools_closer_prompt: bool = False
......@@ -318,6 +330,7 @@ class ConfigManager:
archive=ArchiveConfig(**config_data.get("archive", {})),
thermal=ThermalConfig(**config_data.get("thermal", {})),
jobs=JobsConfig(**config_data.get("jobs", {})),
enhance=EnhanceConfig(**config_data.get("enhance", {})),
broker=BrokerConfig(**config_data.get("broker", {})),
system_prompt=config_data.get("system_prompt"),
tools_closer_prompt=config_data.get("tools_closer_prompt", False),
......@@ -443,6 +456,10 @@ class ConfigManager:
"jobs": {
"resume_on_restart": self.config.jobs.resume_on_restart,
},
"enhance": {
"allow_ffmpeg": self.config.enhance.allow_ffmpeg,
"allow_rife_ncnn": self.config.enhance.allow_rife_ncnn,
},
"broker": {
"enabled": self.config.broker.enabled,
"base_url": self.config.broker.base_url,
......
......@@ -813,6 +813,9 @@ def main():
global_args.thermal_soft_throttle_enabled = config.thermal.soft_throttle_enabled
global_args.thermal_soft_throttle_temp = config.thermal.soft_throttle_temp
global_args.thermal_soft_throttle_max_sleep = config.thermal.soft_throttle_max_sleep
# Video-enhancement external-tool policy (default off → in-process models only).
global_args.enhance_allow_ffmpeg = config.enhance.allow_ffmpeg
global_args.enhance_allow_rife_ncnn = config.enhance.allow_rife_ncnn
global_args.n_gpu_layers = config.vulkan.n_gpu_layers
global_args.n_ctx = [config.vulkan.n_ctx]
global_args.vulkan_device = config.vulkan.device_id
......
......@@ -133,7 +133,8 @@ def detect_model_capabilities(model_name: str) -> ModelCapabilities:
return caps
# Video interpolation
if any(x in n for x in ['film-net', 'rife', 'flavr', 'dain', 'frame-interp']):
if any(x in n for x in ['film-net', 'film_net', 'film', 'rife', 'ifnet',
'flavr', 'dain', 'frame-interp', 'interpolat']):
caps.video_interpolation = True
return caps
......
......@@ -187,6 +187,7 @@ class VideoInterpolateRequest(BaseModel):
init_image: Optional[str] = None # first frame
end_image: Optional[str] = None # last frame
fps_multiplier: Optional[int] = 2
output_fps: Optional[int] = None # encode the result at this fps (None = source_fps × multiplier, preserves duration)
response_format: Optional[str] = "url"
model_config = ConfigDict(extra="allow")
......
#!/usr/bin/env bash
set -euo pipefail
exec "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/packaging/linux/package_oci_image.sh" "$@"
#!/usr/bin/env bash
set -euo pipefail
exec "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/packaging/linux/make_tarball_from_venv.sh" "$@"
# Runtime dependency set for the local OCI image.
# Native packages listed here are built in the wheel stage and installed from /opt/wheels.
-r /tmp/requirements.txt
# CUDA/quantization extras that are intentionally optional in source installs but useful
# in the full Linux container distribution. flash-attn and causal-conv1d stay out of
# the base image because they are CUDA-arch-sensitive and fragile to build generally.
bitsandbytes>=0.41.0
sentencepiece>=0.1.99
tiktoken>=0.5.0
tokenizers>=0.15.0
protobuf>=3.20.0
optimum-quanto>=0.2.0
gguf>=0.9.0
# Native wheels are built by packaging/linux/Dockerfile.oci with CUDA+Vulkan enabled.
llama-cpp-python>=0.2.0
stable-diffusion-cpp-python>=0.2.0
whispercpp>=0.0.17
#!/usr/bin/env python3
"""Write a small build manifest for local CoderAI distribution artifacts."""
from __future__ import annotations
import json
import os
import platform
import subprocess
import sys
from datetime import datetime, timezone
from pathlib import Path
def cmd(args: list[str], cwd: str | None = None) -> str:
try:
return subprocess.check_output(args, cwd=cwd, text=True, stderr=subprocess.DEVNULL).strip()
except Exception:
return ""
def package_versions(python_bin: str | None) -> dict[str, str]:
if not python_bin:
return {}
code = r'''
import importlib.metadata as md
names = ["torch", "torchvision", "torchaudio", "transformers", "diffusers", "accelerate", "llama-cpp-python", "stable-diffusion-cpp-python", "whispercpp", "bitsandbytes", "onnxruntime", "onnxruntime-gpu"]
out = {}
for name in names:
try:
out[name] = md.version(name)
except Exception:
pass
import json
print(json.dumps(out, sort_keys=True))
'''
try:
raw = subprocess.check_output([python_bin, "-c", code], text=True, stderr=subprocess.DEVNULL)
return json.loads(raw)
except Exception:
return {}
def main() -> int:
out = Path(os.environ.get("MANIFEST_OUT", "BUILD-MANIFEST.json"))
root = Path(os.environ.get("PROJECT_ROOT", ".")).resolve()
python_bin = os.environ.get("MANIFEST_PYTHON")
local_bins = [p for p in os.environ.get("MANIFEST_LOCAL_BINS", "").split(os.pathsep) if p]
data = {
"artifact": os.environ.get("MANIFEST_ARTIFACT", "unknown"),
"build_mode": os.environ.get("MANIFEST_BUILD_MODE", "unknown"),
"build_time_utc": datetime.now(timezone.utc).isoformat(),
"git_commit": cmd(["git", "rev-parse", "HEAD"], str(root)),
"git_dirty": bool(cmd(["git", "status", "--porcelain"], str(root))),
"host": {
"system": platform.system(),
"release": platform.release(),
"machine": platform.machine(),
},
"python_version": os.environ.get("PYTHON_VERSION", ""),
"python_build_standalone_release": os.environ.get("PBS_RELEASE", ""),
"uv_version": os.environ.get("UV_VERSION", ""),
"cuda_version": os.environ.get("CUDA_VERSION", ""),
"ubuntu_version": os.environ.get("UBUNTU_VERSION", ""),
"source_venv": os.environ.get("MANIFEST_VENV", ""),
"included_local_binaries": local_bins,
"package_versions": package_versions(python_bin),
}
out.parent.mkdir(parents=True, exist_ok=True)
out.write_text(json.dumps(data, indent=2, sort_keys=True) + "\n")
return 0
if __name__ == "__main__":
raise SystemExit(main())
ARG CUDA_VERSION=12.4.1
ARG UBUNTU_VERSION=22.04
FROM scratch AS build_meta
COPY .packaging-cache/build-manifest.json /build-manifest.json
FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION} AS builder
ARG PYTHON_VERSION=3.13.5
ARG PBS_RELEASE=20250612
ARG UV_VERSION=0.7.13
ARG WHISPERCPP_REF=master
ARG LLAMA_CPP_PYTHON_VERSION=
ARG SD_CPP_PYTHON_VERSION=
ENV DEBIAN_FRONTEND=noninteractive \
PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin \
PIP_NO_CACHE_DIR=1 \
UV_SYSTEM_PYTHON=1 \
FORCE_CMAKE=1 \
CMAKE_BUILD_PARALLEL_LEVEL=4 \
MAX_JOBS=4
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
ca-certificates \
cmake \
curl \
git \
libgomp1 \
libvulkan-dev \
ninja-build \
patchelf \
pkg-config \
zstd \
vulkan-tools \
glslang-tools \
&& rm -rf /var/lib/apt/lists/*
RUN set -eux; \
curl -fsSL -o /tmp/python.tar.gz \
"https://github.com/astral-sh/python-build-standalone/releases/download/${PBS_RELEASE}/cpython-${PYTHON_VERSION}+${PBS_RELEASE}-x86_64-unknown-linux-gnu-install_only.tar.gz"; \
mkdir -p /opt/coderai; \
tar -xzf /tmp/python.tar.gz -C /opt/coderai; \
rm /tmp/python.tar.gz; \
/opt/coderai/python/bin/python3 --version
ENV PYTHONHOME=/opt/coderai/python \
PATH=/opt/coderai/python/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
RUN curl -fsSL "https://github.com/astral-sh/uv/releases/download/${UV_VERSION}/uv-x86_64-unknown-linux-gnu.tar.gz" \
| tar -xz -C /usr/local/bin --strip-components=1 uv-x86_64-unknown-linux-gnu/uv
RUN uv pip install --python /opt/coderai/python/bin/python3 --upgrade pip setuptools wheel scikit-build-core cmake ninja numpy pybind11
RUN mkdir -p /opt/wheels
RUN set -eux; \
pkg="llama-cpp-python${LLAMA_CPP_PYTHON_VERSION:+==${LLAMA_CPP_PYTHON_VERSION}}"; \
CMAKE_ARGS="-DGGML_CUDA=ON -DGGML_VULKAN=ON" \
uv pip wheel --python /opt/coderai/python/bin/python3 --wheel-dir /opt/wheels --no-deps --no-cache "$pkg"
RUN set -eux; \
pkg="stable-diffusion-cpp-python${SD_CPP_PYTHON_VERSION:+==${SD_CPP_PYTHON_VERSION}}"; \
CMAKE_ARGS="-DSD_CUDA=ON -DSD_VULKAN=ON -DSD_WEBM=OFF" \
uv pip wheel --python /opt/coderai/python/bin/python3 --wheel-dir /opt/wheels --no-deps --no-cache "$pkg" || \
CMAKE_ARGS="-DSD_CUDA=ON -DSD_WEBM=OFF" \
uv pip wheel --python /opt/coderai/python/bin/python3 --wheel-dir /opt/wheels --no-deps --no-cache "$pkg"
RUN set -eux; \
git clone --depth 1 --branch "$WHISPERCPP_REF" https://github.com/ggerganov/whisper.cpp /tmp/whisper.cpp || \
git clone --depth 1 https://github.com/ggerganov/whisper.cpp /tmp/whisper.cpp; \
if [ -d /tmp/whisper.cpp/bindings/python ]; then \
CMAKE_ARGS="-DWHISPER_VULKAN=ON -DGGML_VULKAN=ON -DGGML_CUDA=ON" \
uv pip wheel --python /opt/coderai/python/bin/python3 --wheel-dir /opt/wheels --no-deps --no-cache /tmp/whisper.cpp/bindings/python || \
CMAKE_ARGS="-DWHISPER_VULKAN=ON -DGGML_VULKAN=ON" \
uv pip wheel --python /opt/coderai/python/bin/python3 --wheel-dir /opt/wheels --no-deps --no-cache /tmp/whisper.cpp/bindings/python || \
uv pip wheel --python /opt/coderai/python/bin/python3 --wheel-dir /opt/wheels --no-deps --no-cache whispercpp; \
else \
uv pip wheel --python /opt/coderai/python/bin/python3 --wheel-dir /opt/wheels --no-deps --no-cache whispercpp; \
fi
WORKDIR /opt/coderai/app
COPY packaging/common/requirements-oci.txt /tmp/requirements-oci.txt
COPY requirements.txt /tmp/requirements.txt
COPY requirements-nvidia.txt /tmp/requirements-nvidia.txt
COPY requirements-vulkan.txt /tmp/requirements-vulkan.txt
COPY . /opt/coderai/app
RUN uv pip install --python /opt/coderai/python/bin/python3 \
--find-links /opt/wheels \
-r /tmp/requirements-oci.txt \
&& rm -rf /root/.cache/uv /tmp/requirements*.txt
RUN set -eux; \
chmod +x /opt/coderai/app/coderai; \
mkdir -p /config /models /cache /opt/coderai/app/models; \
rm -rf \
/opt/coderai/app/.git \
/opt/coderai/app/venv* \
/opt/coderai/app/.venv \
/opt/coderai/app/township_output \
/opt/coderai/app/offload \
/opt/coderai/app/dist \
/opt/coderai/app/.packaging-cache; \
find /opt/coderai/python -type d \( -name __pycache__ -o -name tests -o -name test \) -prune -exec rm -rf '{}' +; \
find /opt/coderai/app -type d -name __pycache__ -prune -exec rm -rf '{}' +
FROM nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION} AS runtime
ARG PYTHON_VERSION=3.13.5
ENV DEBIAN_FRONTEND=noninteractive \
PYTHONHOME=/opt/coderai/python \
PATH=/opt/coderai/python/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin \
PYTHONUNBUFFERED=1 \
HF_HOME=/cache/huggingface \
HUGGINGFACE_HUB_CACHE=/cache/huggingface/hub \
TRANSFORMERS_CACHE=/cache/huggingface/transformers \
DIFFUSERS_CACHE=/cache/diffusers \
CODERAI_CONFIG_DIR=/config \
CODERAI_MODELS_DIR=/models \
CODERAI_CACHE_DIR=/cache \
CODERAI_HOST=0.0.0.0 \
CODERAI_PORT=8776
RUN apt-get update && apt-get install -y --no-install-recommends \
ca-certificates \
ffmpeg \
git \
libgomp1 \
libgl1 \
libglib2.0-0 \
libsndfile1 \
libvulkan1 \
mesa-vulkan-drivers \
openssl \
&& rm -rf /var/lib/apt/lists/*
COPY --from=builder /opt/coderai /opt/coderai
COPY --from=build_meta /build-manifest.json /opt/coderai/BUILD-MANIFEST.json
COPY packaging/linux/launcher/coderai-oci /usr/local/bin/coderai
RUN set -eux; \
chmod +x /usr/local/bin/coderai; \
mkdir -p /config /models /cache
VOLUME ["/config", "/models", "/cache"]
EXPOSE 8776
ENTRYPOINT ["/usr/local/bin/coderai"]
ARG CUDA_VERSION=12.4.1
ARG UBUNTU_VERSION=22.04
FROM scratch AS build_meta
COPY .packaging-cache/build-manifest.json /build-manifest.json
FROM nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION} AS runtime
ARG PYTHON_VERSION=3.13.5
ARG PBS_RELEASE=20250612
ARG VENV_PYTHON_MINOR=3.13
ENV DEBIAN_FRONTEND=noninteractive \
PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin \
PYTHONUNBUFFERED=1 \
HF_HOME=/cache/huggingface \
HUGGINGFACE_HUB_CACHE=/cache/huggingface/hub \
TRANSFORMERS_CACHE=/cache/huggingface/transformers \
DIFFUSERS_CACHE=/cache/diffusers \
CODERAI_CONFIG_DIR=/config \
CODERAI_MODELS_DIR=/models \
CODERAI_CACHE_DIR=/cache \
CODERAI_HOST=0.0.0.0 \
CODERAI_PORT=8776
RUN apt-get update && apt-get install -y --no-install-recommends \
ca-certificates \
curl \
ffmpeg \
git \
libgomp1 \
libgl1 \
libglib2.0-0 \
libsndfile1 \
libvulkan1 \
mesa-vulkan-drivers \
openssl \
rsync \
&& rm -rf /var/lib/apt/lists/*
RUN set -eux; \
curl -fsSL -o /tmp/python.tar.gz \
"https://github.com/astral-sh/python-build-standalone/releases/download/${PBS_RELEASE}/cpython-${PYTHON_VERSION}+${PBS_RELEASE}-x86_64-unknown-linux-gnu-install_only.tar.gz"; \
mkdir -p /opt/coderai; \
tar -xzf /tmp/python.tar.gz -C /opt/coderai; \
rm /tmp/python.tar.gz; \
/opt/coderai/python/bin/python3 --version
ENV PYTHONHOME=/opt/coderai/python \
PATH=/opt/coderai/python/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
# BuildKit named context supplied by packaging/linux/build_oci_image.sh:
# --build-context local_bundle=/path/to/.packaging-cache/oci-venv-context
# The bundle contains the selected venv plus ldd-discovered native libraries from
# the local machine. GPU drivers are intentionally not bundled; NVIDIA Container
# Toolkit / host Vulkan ICDs remain the runtime contract.
COPY --from=local_bundle / /tmp/local-bundle/
RUN set -eux; \
target_sp="/opt/coderai/python/lib/python${VENV_PYTHON_MINOR}/site-packages"; \
source_sp="/tmp/local-bundle/venv/lib/python${VENV_PYTHON_MINOR}/site-packages"; \
test -d "$source_sp"; \
mkdir -p "$target_sp"; \
rsync -a --delete "$source_sp/" "$target_sp/"; \
if [ -d /tmp/local-bundle/venv/bin ]; then \
find /tmp/local-bundle/venv/bin -maxdepth 1 -type f ! -name 'python*' ! -name 'activate*' -exec cp -a '{}' /opt/coderai/python/bin/ \;; \
fi; \
if [ -d /tmp/local-bundle/local-libs ]; then \
mkdir -p /opt/coderai/local-libs; \
rsync -a /tmp/local-bundle/local-libs/ /opt/coderai/local-libs/; \
fi; \
if [ -d /tmp/local-bundle/local-bin ]; then \
rsync -a /tmp/local-bundle/local-bin/ /usr/local/bin/; \
find /usr/local/bin -maxdepth 1 -type f -exec chmod +x '{}' \;; \
fi; \
rm -rf /tmp/local-bundle; \
find /opt/coderai/python -type d \( -name __pycache__ -o -name tests -o -name test \) -prune -exec rm -rf '{}' +
WORKDIR /opt/coderai/app
COPY . /opt/coderai/app
COPY --from=build_meta /build-manifest.json /opt/coderai/BUILD-MANIFEST.json
COPY packaging/linux/launcher/coderai-oci /usr/local/bin/coderai
RUN set -eux; \
chmod +x /usr/local/bin/coderai /opt/coderai/app/coderai; \
mkdir -p /config /models /cache /opt/coderai/app/models; \
rm -rf \
/opt/coderai/app/.git \
/opt/coderai/app/venv* \
/opt/coderai/app/.venv \
/opt/coderai/app/township_output \
/opt/coderai/app/offload \
/opt/coderai/app/dist \
/opt/coderai/app/.packaging-cache; \
find /opt/coderai/app -type d -name __pycache__ -prune -exec rm -rf '{}' +; \
/opt/coderai/python/bin/python3 - <<'PY'
import importlib.util
missing = [name for name in ("fastapi", "uvicorn", "torch") if importlib.util.find_spec(name) is None]
if missing:
raise SystemExit(f"venv image is missing required package(s): {', '.join(missing)}")
PY
VOLUME ["/config", "/models", "/cache"]
EXPOSE 8776
ENTRYPOINT ["/usr/local/bin/coderai"]
CoderAI Linux Tarball
=====================
Run:
./bin/coderai
Then open:
http://127.0.0.1:8776/admin
Default first-run credentials are created by the app:
admin / admin
State directories inside this bundle:
config/ app config and auth
models/ model storage / data path
cache/ Hugging Face, diffusers, and runtime caches
NVIDIA CUDA:
Install a compatible NVIDIA driver on the host. The bundle includes Python/CUDA
runtime packages from the source venv, but not the host GPU driver.
AMD/Intel Vulkan:
Install Vulkan runtime/ICD packages on the host, for example on Debian/Ubuntu:
sudo apt install libvulkan1 mesa-vulkan-drivers
CPU:
No GPU setup is required.
#!/usr/bin/env bash
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
VERSIONS_FILE="$ROOT_DIR/packaging/versions.env"
if [[ -f "$VERSIONS_FILE" ]]; then
# shellcheck disable=SC1090
source "$VERSIONS_FILE"
fi
DOCKER_BIN="${DOCKER:-docker}"
read -r -a DOCKER_CMD <<< "$DOCKER_BIN"
IMAGE_TAG="${OCI_IMAGE:-coderai:local}"
BUILD_MODE="from-scratch"
VENV_PATH=""
INCLUDE_LOCAL_LIBS=1
AUTO_LOCAL_BINS=1
LOCAL_BINARIES=()
LOCAL_BINARY_DIRS=()
usage() {
cat <<'EOF'
Usage:
./build-oci.sh [IMAGE_TAG]
./build-oci.sh --from-venv [IMAGE_TAG]
./build-oci.sh --venv PATH [IMAGE_TAG]
Options:
--from-scratch Build the full OCI image from pinned packages and native wheels (default).
--from-venv Build from the currently activated virtualenv ($VIRTUAL_ENV).
--venv PATH Build from a specific local virtualenv.
--no-local-libs Do not copy ldd-discovered local native libraries from the venv.
--no-auto-local-bins Do not auto-include known locally compiled helper binaries.
--include-local-bin PATH
Copy an extra tested local binary into the image, including its ldd libs.
Can be repeated. Useful for tools such as whisper-server.
--include-local-dir PATH
Copy executable files from a local build directory, including ldd libs.
Can be repeated. Useful for local whisper.cpp build/bin directories.
-t, --tag TAG Image tag to create (default: coderai:local or OCI_IMAGE from versions.env).
-h, --help Show this help.
Examples:
./build-oci.sh
source venv_all/bin/activate && ./build-oci.sh --from-venv coderai:venv
./build-oci.sh --venv ./venv_all -t coderai:venv
./build-oci.sh --venv ./venv_all --include-local-bin /usr/local/bin/whisper-server -t coderai:venv
./build-oci.sh --venv ./venv_all --include-local-dir ~/whisper.cpp/build/bin -t coderai:venv
EOF
}
while [[ $# -gt 0 ]]; do
case "$1" in
--from-scratch)
BUILD_MODE="from-scratch"
shift
;;
--from-venv)
BUILD_MODE="venv"
VENV_PATH="${VIRTUAL_ENV:-}"
shift
;;
--venv)
BUILD_MODE="venv"
if [[ $# -lt 2 ]]; then
echo "Error: --venv requires a path" >&2
exit 2
fi
VENV_PATH="$2"
shift 2
;;
--no-local-libs)
INCLUDE_LOCAL_LIBS=0
shift
;;
--no-auto-local-bins)
AUTO_LOCAL_BINS=0
shift
;;
--include-local-bin)
BUILD_MODE="venv"
if [[ $# -lt 2 ]]; then
echo "Error: --include-local-bin requires a path" >&2
exit 2
fi
LOCAL_BINARIES+=("$2")
shift 2
;;
--include-local-dir)
BUILD_MODE="venv"
if [[ $# -lt 2 ]]; then
echo "Error: --include-local-dir requires a path" >&2
exit 2
fi
LOCAL_BINARY_DIRS+=("$2")
shift 2
;;
-t|--tag)
if [[ $# -lt 2 ]]; then
echo "Error: $1 requires an image tag" >&2
exit 2
fi
IMAGE_TAG="$2"
shift 2
;;
-h|--help)
usage
exit 0
;;
--)
shift
break
;;
-*)
echo "Error: unknown option: $1" >&2
usage >&2
exit 2
;;
*)
IMAGE_TAG="$1"
shift
;;
esac
done
PYTHON_VERSION="${PYTHON_VERSION:-3.13.5}"
PBS_RELEASE="${PBS_RELEASE:-20250612}"
UV_VERSION="${UV_VERSION:-0.7.13}"
CUDA_VERSION="${CUDA_VERSION:-12.4.1}"
UBUNTU_VERSION="${UBUNTU_VERSION:-22.04}"
WHISPERCPP_REF="${WHISPERCPP_REF:-master}"
LLAMA_CPP_PYTHON_VERSION="${LLAMA_CPP_PYTHON_VERSION:-}"
SD_CPP_PYTHON_VERSION="${SD_CPP_PYTHON_VERSION:-}"
write_build_manifest() {
local mode="$1"
local manifest_python="${2:-}"
local venv_path="${3:-}"
local local_bins_joined=""
if [[ ${#LOCAL_BINARIES[@]} -gt 0 ]]; then
local IFS=:
local_bins_joined="${LOCAL_BINARIES[*]}"
fi
MANIFEST_OUT="$ROOT_DIR/.packaging-cache/build-manifest.json" \
PROJECT_ROOT="$ROOT_DIR" \
MANIFEST_ARTIFACT="oci-image" \
MANIFEST_BUILD_MODE="$mode" \
MANIFEST_PYTHON="$manifest_python" \
MANIFEST_VENV="$venv_path" \
MANIFEST_LOCAL_BINS="$local_bins_joined" \
PYTHON_VERSION="$PYTHON_VERSION" \
PBS_RELEASE="$PBS_RELEASE" \
UV_VERSION="$UV_VERSION" \
CUDA_VERSION="$CUDA_VERSION" \
UBUNTU_VERSION="$UBUNTU_VERSION" \
python3 "$ROOT_DIR/packaging/common/write_manifest.py"
}
add_local_binary() {
local path="$1"
local abs_path
if [[ ! -x "$path" || ! -f "$path" ]]; then
return 0
fi
abs_path="$(cd "$(dirname "$path")" && pwd)/$(basename "$path")"
for existing in "${LOCAL_BINARIES[@]}"; do
[[ "$existing" == "$abs_path" ]] && return 0
done
LOCAL_BINARIES+=("$abs_path")
}
discover_local_binaries() {
[[ "$AUTO_LOCAL_BINS" == "1" ]] || return 0
# Common locations used by the existing developer build path. Python extension
# modules are already copied with the venv; this targets standalone helper
# binaries that were compiled locally and tested outside Docker.
local candidates=(
"/usr/local/bin/whisper-server"
"/usr/local/bin/whisper-cli"
"$HOME/whisper.cpp/build/bin/whisper-server"
"$HOME/whisper.cpp/build/bin/whisper-cli"
"$HOME/whisper.cpp/build/bin/main"
"$HOME/whisper.cpp/build/bin/server"
)
local path
for path in "${candidates[@]}"; do
add_local_binary "$path"
done
}
prepare_venv_bundle() {
local venv="$1"
local bundle="$2"
local include_libs="$3"
rm -rf "$bundle"
mkdir -p "$bundle/venv" "$bundle/local-libs" "$bundle/local-bin"
echo "Preparing venv bundle: $bundle"
cp -a "$venv/." "$bundle/venv/"
discover_local_binaries
local dir_path
for dir_path in "${LOCAL_BINARY_DIRS[@]}"; do
if [[ ! -d "$dir_path" ]]; then
echo "Error: local binary directory does not exist: $dir_path" >&2
exit 2
fi
while IFS= read -r -d '' found_bin; do
add_local_binary "$found_bin"
done < <(find "$dir_path" -maxdepth 2 -type f -perm -111 -print0)
done
for bin_path in "${LOCAL_BINARIES[@]}"; do
if [[ ! -x "$bin_path" ]]; then
echo "Error: local binary is not executable: $bin_path" >&2
exit 2
fi
dest_path="$bundle/local-bin/$(basename "$bin_path")"
if [[ -e "$dest_path" && "$bin_path" -ef "$dest_path" ]]; then
continue
fi
cp -a --remove-destination "$bin_path" "$dest_path"
done
if [[ ${#LOCAL_BINARIES[@]} -gt 0 ]]; then
printf 'Included local compiled binaries:\n'
printf ' %s\n' "${LOCAL_BINARIES[@]}"
fi
if [[ "$include_libs" != "1" ]]; then
return 0
fi
VENV_BUNDLE="$bundle" VENV_PATH_FOR_LDD="$venv" python3 - <<'PY'
import os
import shutil
import subprocess
from pathlib import Path
bundle = Path(os.environ["VENV_BUNDLE"])
venv = Path(os.environ["VENV_PATH_FOR_LDD"])
local_libs = bundle / "local-libs"
local_bin = bundle / "local-bin"
skip_prefixes = (
"/lib/ld-linux",
"/lib64/ld-linux",
)
skip_names = {
"linux-vdso.so.1",
"libc.so.6",
"libdl.so.2",
"libm.so.6",
"libpthread.so.0",
"librt.so.1",
"libutil.so.1",
"libresolv.so.2",
"libselinux.so.1",
"libpcre2-8.so.0",
"libacl.so.1",
"libattr.so.1",
"libz.so.1",
"libzstd.so.1",
"liblzma.so.5",
"libbz2.so.1.0",
"libssl.so.3",
"libcrypto.so.3",
"libgcc_s.so.1",
"libstdc++.so.6",
}
skip_starts = (
"libcuda.so", # host driver, injected by NVIDIA Container Toolkit
"libnvidia-", # host driver stack
)
candidates = []
for root in (venv / "lib", venv / "bin", local_bin):
if not root.exists():
continue
for path in root.rglob("*"):
if path.is_file() and (os.access(path, os.X_OK) or ".so" in path.name):
candidates.append(path)
libs = set()
for path in candidates:
try:
proc = subprocess.run(["ldd", str(path)], text=True, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, check=False)
except OSError:
continue
if proc.returncode not in (0, 1):
continue
for line in proc.stdout.splitlines():
line = line.strip()
dep = None
if "=>" in line:
rhs = line.split("=>", 1)[1].strip()
if rhs.startswith("/"):
dep = rhs.split(" (", 1)[0]
elif line.startswith("/"):
dep = line.split(" (", 1)[0]
if not dep:
continue
dep_path = Path(dep)
name = dep_path.name
dep_str = str(dep_path)
if name in skip_names or any(name.startswith(s) for s in skip_starts):
continue
if dep_str.startswith(("/lib/", "/lib64/", "/usr/lib/", "/usr/lib64/")) and "site-packages" not in dep_str:
continue
if any(dep_str.startswith(p) for p in skip_prefixes):
continue
if dep_path.exists():
libs.add(dep_path.resolve())
for src in sorted(libs):
dest = local_libs / src.name
if not dest.exists():
shutil.copy2(src, dest)
# Also create the originally requested soname if it differs from the real path name.
# ldd usually resolves symlinks; native loaders often ask for the symlink soname.
for requested in [p for p in libs if p.name != src.name and p.resolve() == src]:
link = local_libs / requested.name
if not link.exists():
try:
link.symlink_to(src.name)
except OSError:
shutil.copy2(src, link)
print(f"Copied {len(libs)} ldd-discovered native librar{'y' if len(libs) == 1 else 'ies'}")
PY
}
if [[ "$BUILD_MODE" == "venv" ]]; then
if [[ -z "$VENV_PATH" ]]; then
echo "Error: --from-venv requires an activated virtualenv, or use --venv PATH" >&2
exit 2
fi
VENV_PATH="$(cd "$VENV_PATH" && pwd)"
if [[ ! -x "$VENV_PATH/bin/python" ]]; then
echo "Error: '$VENV_PATH' does not look like a Python virtualenv (missing bin/python)" >&2
exit 2
fi
VENV_PYTHON_MINOR="$($VENV_PATH/bin/python - <<'PY'
import sys
print(f"{sys.version_info.major}.{sys.version_info.minor}")
PY
)"
PBS_PYTHON_MINOR="${PYTHON_VERSION%.*}"
if [[ "$VENV_PYTHON_MINOR" != "$PBS_PYTHON_MINOR" ]]; then
cat >&2 <<EOF
Error: venv Python minor ($VENV_PYTHON_MINOR) does not match standalone Python minor ($PBS_PYTHON_MINOR).
Set PYTHON_VERSION in packaging/versions.env to a python-build-standalone release with the same minor,
or use a matching virtualenv.
EOF
exit 2
fi
VENV_CONTEXT="$ROOT_DIR/.packaging-cache/oci-venv-context"
prepare_venv_bundle "$VENV_PATH" "$VENV_CONTEXT" "$INCLUDE_LOCAL_LIBS"
write_build_manifest "venv" "$VENV_PATH/bin/python" "$VENV_PATH"
else
mkdir -p "$ROOT_DIR/.packaging-cache"
write_build_manifest "from-scratch" "" ""
fi
cat <<EOF
Building CoderAI OCI image locally
image: $IMAGE_TAG
mode: $BUILD_MODE
python: $PYTHON_VERSION
PBS release: $PBS_RELEASE
uv: $UV_VERSION
CUDA: $CUDA_VERSION
Ubuntu base: $UBUNTU_VERSION
EOF
if [[ "$BUILD_MODE" == "venv" ]]; then
cat <<EOF
venv: $VENV_PATH
venv python: $VENV_PYTHON_MINOR
local libs: $([[ "$INCLUDE_LOCAL_LIBS" == "1" ]] && echo copied || echo disabled)
local bins: ${#LOCAL_BINARIES[@]}
EOF
fi
if [[ "$BUILD_MODE" == "venv" ]]; then
"${DOCKER_CMD[@]}" build \
-f "$ROOT_DIR/packaging/linux/Dockerfile.oci-venv" \
--build-context "local_bundle=$VENV_CONTEXT" \
--build-arg PYTHON_VERSION="$PYTHON_VERSION" \
--build-arg PBS_RELEASE="$PBS_RELEASE" \
--build-arg CUDA_VERSION="$CUDA_VERSION" \
--build-arg UBUNTU_VERSION="$UBUNTU_VERSION" \
--build-arg VENV_PYTHON_MINOR="$VENV_PYTHON_MINOR" \
-t "$IMAGE_TAG" \
"$ROOT_DIR"
else
"${DOCKER_CMD[@]}" build \
-f "$ROOT_DIR/packaging/linux/Dockerfile.oci" \
--build-arg PYTHON_VERSION="$PYTHON_VERSION" \
--build-arg PBS_RELEASE="$PBS_RELEASE" \
--build-arg UV_VERSION="$UV_VERSION" \
--build-arg CUDA_VERSION="$CUDA_VERSION" \
--build-arg UBUNTU_VERSION="$UBUNTU_VERSION" \
--build-arg WHISPERCPP_REF="$WHISPERCPP_REF" \
--build-arg LLAMA_CPP_PYTHON_VERSION="$LLAMA_CPP_PYTHON_VERSION" \
--build-arg SD_CPP_PYTHON_VERSION="$SD_CPP_PYTHON_VERSION" \
-t "$IMAGE_TAG" \
"$ROOT_DIR"
fi
cat <<EOF
Built $IMAGE_TAG
Run examples:
NVIDIA:
$DOCKER_BIN run --gpus all --ipc=host -p 8776:8776 -v "\$PWD/coderai-config:/config" -v "\$PWD/coderai-models:/models" -v "\$PWD/coderai-cache:/cache" $IMAGE_TAG
AMD/Intel Vulkan:
$DOCKER_BIN run --device /dev/dri --ipc=host -p 8776:8776 -v "\$PWD/coderai-config:/config" -v "\$PWD/coderai-models:/models" -v "\$PWD/coderai-cache:/cache" $IMAGE_TAG
CPU:
$DOCKER_BIN run --ipc=host -p 8776:8776 -v "\$PWD/coderai-config:/config" -v "\$PWD/coderai-models:/models" -v "\$PWD/coderai-cache:/cache" $IMAGE_TAG
EOF
#!/usr/bin/env sh
set -eu
# Runtime launcher for the CoderAI OCI image.
# Keeps config/model/cache state in mounted volumes and exposes the server outside
# the container by default.
: "${CODERAI_CONFIG_DIR:=/config}"
: "${CODERAI_MODELS_DIR:=/models}"
: "${CODERAI_CACHE_DIR:=/cache}"
: "${CODERAI_HOST:=0.0.0.0}"
: "${CODERAI_PORT:=8776}"
export PYTHONHOME=/opt/coderai/python
export PATH="/opt/coderai/python/bin:$PATH"
export XDG_CONFIG_HOME="$CODERAI_CONFIG_DIR"
export XDG_DATA_HOME="$CODERAI_MODELS_DIR"
export XDG_CACHE_HOME="$CODERAI_CACHE_DIR"
export HF_HOME="${HF_HOME:-$CODERAI_CACHE_DIR/huggingface}"
export HUGGINGFACE_HUB_CACHE="${HUGGINGFACE_HUB_CACHE:-$HF_HOME/hub}"
NV="/opt/coderai/python/lib/python3.13/site-packages/nvidia"
LIBS="/opt/coderai/python/lib:/opt/coderai/local-libs"
if [ -d "$NV" ]; then
for d in "$NV"/*/lib; do
[ -d "$d" ] && LIBS="$LIBS:$d"
done
fi
export LD_LIBRARY_PATH="$LIBS${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}"
mkdir -p "$CODERAI_CONFIG_DIR/coderai" "$CODERAI_MODELS_DIR/coderai" "$CODERAI_CACHE_DIR/coderai"
CONFIG_DIR="$CODERAI_CONFIG_DIR/coderai"
CONFIG_FILE="$CONFIG_DIR/config.json"
# Ensure the container binds to all interfaces even when config.json was created
# by the app default, which uses 127.0.0.1 for local desktop installs.
if [ ! -f "$CONFIG_FILE" ]; then
/opt/coderai/python/bin/python3 - "$CONFIG_FILE" "$CODERAI_HOST" "$CODERAI_PORT" <<'PY'
import json
import sys
from pathlib import Path
path = Path(sys.argv[1])
host = sys.argv[2]
port = int(sys.argv[3])
path.write_text(json.dumps({"server": {"host": host, "port": port}}, indent=2) + "\n")
PY
else
/opt/coderai/python/bin/python3 - "$CONFIG_FILE" "$CODERAI_HOST" "$CODERAI_PORT" <<'PY'
import json
import sys
from pathlib import Path
path = Path(sys.argv[1])
host = sys.argv[2]
port = int(sys.argv[3])
try:
data = json.loads(path.read_text())
except Exception:
data = {}
server = data.setdefault("server", {})
changed = False
if server.get("host") in (None, "", "127.0.0.1", "localhost") or host != "0.0.0.0":
if server.get("host") != host:
server["host"] = host
changed = True
if server.get("port") != port:
server["port"] = port
changed = True
if changed:
path.write_text(json.dumps(data, indent=2) + "\n")
PY
fi
exec /opt/coderai/python/bin/python3 /opt/coderai/app/coderai --config "$CONFIG_DIR" "$@"
#!/usr/bin/env sh
set -eu
HERE="$(CDPATH= cd -- "$(dirname -- "$0")/.." && pwd)"
: "${CODERAI_CONFIG_DIR:=$HERE/config}"
: "${CODERAI_MODELS_DIR:=$HERE/models}"
: "${CODERAI_CACHE_DIR:=$HERE/cache}"
: "${CODERAI_HOST:=127.0.0.1}"
: "${CODERAI_PORT:=8776}"
export PYTHONHOME="$HERE/python"
export PATH="$HERE/python/bin:$HERE/bin:$PATH"
export XDG_CONFIG_HOME="$CODERAI_CONFIG_DIR"
export XDG_DATA_HOME="$CODERAI_MODELS_DIR"
export XDG_CACHE_HOME="$CODERAI_CACHE_DIR"
export HF_HOME="${HF_HOME:-$CODERAI_CACHE_DIR/huggingface}"
export HUGGINGFACE_HUB_CACHE="${HUGGINGFACE_HUB_CACHE:-$HF_HOME/hub}"
NV="$HERE/python/lib/python3.13/site-packages/nvidia"
LIBS="$HERE/python/lib:$HERE/local-libs"
if [ -d "$NV" ]; then
for d in "$NV"/*/lib; do
[ -d "$d" ] && LIBS="$LIBS:$d"
done
fi
export LD_LIBRARY_PATH="$LIBS${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}"
mkdir -p "$CODERAI_CONFIG_DIR/coderai" "$CODERAI_MODELS_DIR/coderai" "$CODERAI_CACHE_DIR/coderai"
exec "$HERE/python/bin/python3" "$HERE/app/coderai" --config "$CODERAI_CONFIG_DIR/coderai" "$@"
#!/usr/bin/env bash
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
VERSIONS_FILE="$ROOT_DIR/packaging/versions.env"
if [[ -f "$VERSIONS_FILE" ]]; then
# shellcheck disable=SC1090
source "$VERSIONS_FILE"
fi
VENV_PATH="${VIRTUAL_ENV:-}"
OUT_DIR="$ROOT_DIR/dist"
OUT_NAME=""
INCLUDE_LOCAL_LIBS=1
AUTO_LOCAL_BINS=1
LOCAL_BINARIES=()
LOCAL_BINARY_DIRS=()
PYTHON_VERSION="${PYTHON_VERSION:-3.13.5}"
PBS_RELEASE="${PBS_RELEASE:-20250612}"
UV_VERSION="${UV_VERSION:-0.7.13}"
CUDA_VERSION="${CUDA_VERSION:-12.4.1}"
UBUNTU_VERSION="${UBUNTU_VERSION:-22.04}"
usage() {
cat <<'EOF'
Usage:
packaging/linux/make_tarball_from_venv.sh --venv PATH
source venv_all/bin/activate && packaging/linux/make_tarball_from_venv.sh
Options:
--venv PATH Source virtualenv to package. Defaults to activated $VIRTUAL_ENV.
-o, --output PATH Output .tar.zst path. Defaults to dist/coderai-linux-x64-venv.tar.zst.
--no-local-libs Do not copy ldd-discovered native libraries from the venv.
--no-auto-local-bins Do not auto-include known locally compiled helper binaries.
--include-local-bin PATH
Copy an extra tested local binary into bin/, including its ldd libs.
--include-local-dir PATH
Copy executable files from a local build directory into bin/.
-h, --help Show this help.
EOF
}
while [[ $# -gt 0 ]]; do
case "$1" in
--venv)
[[ $# -ge 2 ]] || { echo "Error: --venv requires a path" >&2; exit 2; }
VENV_PATH="$2"; shift 2 ;;
-o|--output)
[[ $# -ge 2 ]] || { echo "Error: $1 requires a path" >&2; exit 2; }
OUT_NAME="$2"; shift 2 ;;
--no-local-libs) INCLUDE_LOCAL_LIBS=0; shift ;;
--no-auto-local-bins) AUTO_LOCAL_BINS=0; shift ;;
--include-local-bin)
[[ $# -ge 2 ]] || { echo "Error: --include-local-bin requires a path" >&2; exit 2; }
LOCAL_BINARIES+=("$2"); shift 2 ;;
--include-local-dir)
[[ $# -ge 2 ]] || { echo "Error: --include-local-dir requires a path" >&2; exit 2; }
LOCAL_BINARY_DIRS+=("$2"); shift 2 ;;
-h|--help) usage; exit 0 ;;
-*) echo "Error: unknown option: $1" >&2; usage >&2; exit 2 ;;
*) echo "Error: unexpected argument: $1" >&2; usage >&2; exit 2 ;;
esac
done
if [[ -z "$VENV_PATH" ]]; then
echo "Error: pass --venv PATH or activate a virtualenv first" >&2
exit 2
fi
VENV_PATH="$(cd "$VENV_PATH" && pwd)"
[[ -x "$VENV_PATH/bin/python" ]] || { echo "Error: missing venv python: $VENV_PATH/bin/python" >&2; exit 2; }
VENV_PYTHON_MINOR="$($VENV_PATH/bin/python - <<'PY'
import sys
print(f"{sys.version_info.major}.{sys.version_info.minor}")
PY
)"
PBS_PYTHON_MINOR="${PYTHON_VERSION%.*}"
if [[ "$VENV_PYTHON_MINOR" != "$PBS_PYTHON_MINOR" ]]; then
echo "Error: venv Python minor ($VENV_PYTHON_MINOR) does not match standalone Python minor ($PBS_PYTHON_MINOR)" >&2
exit 2
fi
mkdir -p "$OUT_DIR" "$ROOT_DIR/.packaging-cache"
[[ -n "$OUT_NAME" ]] || OUT_NAME="$OUT_DIR/coderai-linux-x64-venv.tar.zst"
STAGE="$ROOT_DIR/.packaging-cache/tarball/coderai"
rm -rf "$ROOT_DIR/.packaging-cache/tarball"
mkdir -p "$STAGE/python" "$STAGE/app" "$STAGE/bin" "$STAGE/local-libs" "$STAGE/config" "$STAGE/models" "$STAGE/cache"
curl -fsSL -o "$ROOT_DIR/.packaging-cache/python.tar.gz" \
"https://github.com/astral-sh/python-build-standalone/releases/download/${PBS_RELEASE}/cpython-${PYTHON_VERSION}+${PBS_RELEASE}-x86_64-unknown-linux-gnu-install_only.tar.gz"
tar -xzf "$ROOT_DIR/.packaging-cache/python.tar.gz" -C "$STAGE"
source_sp="$VENV_PATH/lib/python${VENV_PYTHON_MINOR}/site-packages"
target_sp="$STAGE/python/lib/python${VENV_PYTHON_MINOR}/site-packages"
[[ -d "$source_sp" ]] || { echo "Error: venv site-packages not found: $source_sp" >&2; exit 2; }
rsync -a --delete "$source_sp/" "$target_sp/"
if [[ -d "$VENV_PATH/bin" ]]; then
find "$VENV_PATH/bin" -maxdepth 1 -type f ! -name 'python*' ! -name 'activate*' -exec cp -a '{}' "$STAGE/python/bin/" \;
fi
add_local_binary() {
local path="$1"
local abs_path
[[ -x "$path" && -f "$path" ]] || return 0
abs_path="$(cd "$(dirname "$path")" && pwd)/$(basename "$path")"
for existing in "${LOCAL_BINARIES[@]}"; do
[[ "$existing" == "$abs_path" ]] && return 0
done
LOCAL_BINARIES+=("$abs_path")
}
if [[ "$AUTO_LOCAL_BINS" == "1" ]]; then
for p in "/usr/local/bin/whisper-server" "/usr/local/bin/whisper-cli" "$HOME/whisper.cpp/build/bin/whisper-server" "$HOME/whisper.cpp/build/bin/whisper-cli" "$HOME/whisper.cpp/build/bin/main" "$HOME/whisper.cpp/build/bin/server"; do
add_local_binary "$p"
done
fi
for d in "${LOCAL_BINARY_DIRS[@]}"; do
[[ -d "$d" ]] || { echo "Error: local binary directory does not exist: $d" >&2; exit 2; }
while IFS= read -r -d '' found_bin; do add_local_binary "$found_bin"; done < <(find "$d" -maxdepth 2 -type f -perm -111 -print0)
done
for b in "${LOCAL_BINARIES[@]}"; do
[[ -x "$b" ]] || { echo "Error: local binary is not executable: $b" >&2; exit 2; }
cp -a "$b" "$STAGE/bin/"
done
if [[ "$INCLUDE_LOCAL_LIBS" == "1" ]]; then
VENV_PATH_FOR_LDD="$VENV_PATH" LOCAL_BIN_DIR="$STAGE/bin" LOCAL_LIB_DIR="$STAGE/local-libs" python3 - <<'PY'
import os, shutil, subprocess
from pathlib import Path
venv=Path(os.environ['VENV_PATH_FOR_LDD'])
local_bin=Path(os.environ['LOCAL_BIN_DIR'])
local_lib=Path(os.environ['LOCAL_LIB_DIR'])
skip_names={'linux-vdso.so.1','libc.so.6','libdl.so.2','libm.so.6','libpthread.so.0','librt.so.1','libutil.so.1','libresolv.so.2','libselinux.so.1','libpcre2-8.so.0','libacl.so.1','libattr.so.1','libz.so.1','libzstd.so.1','liblzma.so.5','libbz2.so.1.0','libssl.so.3','libcrypto.so.3','libgcc_s.so.1','libstdc++.so.6'}
skip_starts=('libcuda.so','libnvidia-')
candidates=[]
for root in (venv/'lib', venv/'bin', local_bin):
if root.exists():
for p in root.rglob('*'):
if p.is_file() and (os.access(p, os.X_OK) or '.so' in p.name):
candidates.append(p)
libs=set()
for p in candidates:
proc=subprocess.run(['ldd', str(p)], text=True, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, check=False)
for line in proc.stdout.splitlines():
line=line.strip(); dep=None
if '=>' in line:
rhs=line.split('=>',1)[1].strip()
if rhs.startswith('/'): dep=rhs.split(' (',1)[0]
elif line.startswith('/'):
dep=line.split(' (',1)[0]
if not dep: continue
dp=Path(dep); name=dp.name
if name in skip_names or any(name.startswith(s) for s in skip_starts): continue
if str(dp).startswith(('/lib/','/lib64/','/usr/lib/','/usr/lib64/')) and 'site-packages' not in str(dp): continue
if str(dp).startswith(('/lib/ld-linux','/lib64/ld-linux')): continue
if dp.exists(): libs.add(dp.resolve())
for src in sorted(libs):
dest=local_lib/src.name
if not dest.exists(): shutil.copy2(src,dest)
print(f"Copied {len(libs)} ldd-discovered native libraries")
PY
fi
rsync -a --delete \
--exclude '.git' --exclude 'venv*' --exclude '.venv' --exclude '__pycache__' \
--exclude 'models' --exclude 'offload' --exclude 'township_output' --exclude 'dist' --exclude '.packaging-cache' \
"$ROOT_DIR/" "$STAGE/app/"
cp "$ROOT_DIR/packaging/linux/launcher/coderai-tarball" "$STAGE/bin/coderai"
cp "$ROOT_DIR/packaging/linux/README-RUN.txt" "$STAGE/README-RUN.txt"
chmod +x "$STAGE/bin/coderai"
MANIFEST_OUT="$STAGE/BUILD-MANIFEST.json" PROJECT_ROOT="$ROOT_DIR" MANIFEST_ARTIFACT="linux-tarball" MANIFEST_BUILD_MODE="venv" MANIFEST_PYTHON="$VENV_PATH/bin/python" MANIFEST_VENV="$VENV_PATH" MANIFEST_LOCAL_BINS="$(IFS=:; echo "${LOCAL_BINARIES[*]:-}")" PYTHON_VERSION="$PYTHON_VERSION" PBS_RELEASE="$PBS_RELEASE" UV_VERSION="$UV_VERSION" CUDA_VERSION="$CUDA_VERSION" UBUNTU_VERSION="$UBUNTU_VERSION" python3 "$ROOT_DIR/packaging/common/write_manifest.py"
find "$STAGE" -type d -name __pycache__ -prune -exec rm -rf '{}' +
tar --zstd -cf "$OUT_NAME" -C "$ROOT_DIR/.packaging-cache/tarball" coderai
sha256sum "$OUT_NAME" > "$OUT_NAME.sha256"
cat <<EOF
Created Linux tarball:
archive: $OUT_NAME
checksum: $OUT_NAME.sha256
Extract and run:
tar --zstd -xf "$OUT_NAME"
./coderai/bin/coderai
EOF
#!/usr/bin/env bash
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
VERSIONS_FILE="$ROOT_DIR/packaging/versions.env"
if [[ -f "$VERSIONS_FILE" ]]; then
# shellcheck disable=SC1090
source "$VERSIONS_FILE"
fi
DOCKER_BIN="${DOCKER:-docker}"
read -r -a DOCKER_CMD <<< "$DOCKER_BIN"
IMAGE_TAG="${OCI_IMAGE:-coderai:local}"
OUT_DIR="$ROOT_DIR/dist"
OUT_NAME=""
COMPRESS=1
usage() {
cat <<'EOF'
Usage:
packaging/linux/package_oci_image.sh [IMAGE_TAG]
packaging/linux/package_oci_image.sh -t IMAGE_TAG -o dist/name.tar.zst
Options:
-t, --tag TAG Image tag to export (default: coderai:local or OCI_IMAGE).
-o, --output PATH Output archive path. Defaults to dist/coderai-oci-<tag>.tar.zst.
--no-compress Write an uncompressed docker-save .tar.
-h, --help Show this help.
EOF
}
while [[ $# -gt 0 ]]; do
case "$1" in
-t|--tag)
[[ $# -ge 2 ]] || { echo "Error: $1 requires a tag" >&2; exit 2; }
IMAGE_TAG="$2"
shift 2
;;
-o|--output)
[[ $# -ge 2 ]] || { echo "Error: $1 requires a path" >&2; exit 2; }
OUT_NAME="$2"
shift 2
;;
--no-compress)
COMPRESS=0
shift
;;
-h|--help)
usage
exit 0
;;
-*)
echo "Error: unknown option: $1" >&2
usage >&2
exit 2
;;
*)
IMAGE_TAG="$1"
shift
;;
esac
done
if ! "${DOCKER_CMD[@]}" image inspect "$IMAGE_TAG" >/dev/null 2>&1; then
echo "Error: image not found: $IMAGE_TAG" >&2
echo "Build it first with ./build-oci.sh" >&2
exit 1
fi
safe_tag="${IMAGE_TAG//[^A-Za-z0-9_.-]/-}"
mkdir -p "$OUT_DIR"
if [[ -z "$OUT_NAME" ]]; then
if [[ "$COMPRESS" == "1" ]]; then
OUT_NAME="$OUT_DIR/coderai-oci-${safe_tag}.tar.zst"
else
OUT_NAME="$OUT_DIR/coderai-oci-${safe_tag}.tar"
fi
fi
if [[ "$COMPRESS" == "1" ]]; then
if ! command -v zstd >/dev/null 2>&1; then
echo "Error: zstd is required for compressed export. Use --no-compress or install zstd." >&2
exit 1
fi
"${DOCKER_CMD[@]}" save "$IMAGE_TAG" | zstd -T0 -19 -o "$OUT_NAME"
else
"${DOCKER_CMD[@]}" save -o "$OUT_NAME" "$IMAGE_TAG"
fi
sha256sum "$OUT_NAME" > "$OUT_NAME.sha256"
cat <<EOF
Exported OCI image artifact:
archive: $OUT_NAME
checksum: $OUT_NAME.sha256
Load it with:
$DOCKER_BIN load -i "$OUT_NAME"
EOF
#!/usr/bin/env bash
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
VERSIONS_FILE="$ROOT_DIR/packaging/versions.env"
if [[ -f "$VERSIONS_FILE" ]]; then
# shellcheck disable=SC1090
source "$VERSIONS_FILE"
fi
ENGINE="${CONTAINER_ENGINE:-docker}"
IMAGE_TAG="${OCI_IMAGE:-coderai:local}"
MODE="cpu"
PORT="${CODERAI_PORT:-8776}"
DATA_ROOT="$PWD/coderai-runtime"
DETACH=0
NAME="coderai"
EXTRA_ARGS=()
usage() {
cat <<'EOF'
Usage:
packaging/linux/run_oci.sh [--cpu|--nvidia|--vulkan] [IMAGE_TAG]
Options:
--docker Use docker (default).
--podman Use podman.
--cpu CPU-only run mode (default).
--nvidia NVIDIA CUDA mode; adds --gpus all for Docker.
--vulkan Vulkan mode; adds --device /dev/dri.
-p, --port PORT Host port to expose (default: 8776).
--data-dir PATH Directory for config/models/cache (default: ./coderai-runtime).
--name NAME Container name (default: coderai).
-d, --detach Run in background.
-- ARGS Extra args passed to the container engine before the image name.
-h, --help Show this help.
EOF
}
while [[ $# -gt 0 ]]; do
case "$1" in
--docker) ENGINE=docker; shift ;;
--podman) ENGINE=podman; shift ;;
--cpu) MODE=cpu; shift ;;
--nvidia|--cuda) MODE=nvidia; shift ;;
--vulkan) MODE=vulkan; shift ;;
-p|--port)
[[ $# -ge 2 ]] || { echo "Error: $1 requires a port" >&2; exit 2; }
PORT="$2"; shift 2 ;;
--data-dir)
[[ $# -ge 2 ]] || { echo "Error: --data-dir requires a path" >&2; exit 2; }
DATA_ROOT="$2"; shift 2 ;;
--name)
[[ $# -ge 2 ]] || { echo "Error: --name requires a value" >&2; exit 2; }
NAME="$2"; shift 2 ;;
-d|--detach) DETACH=1; shift ;;
--)
shift
EXTRA_ARGS+=("$@")
break ;;
-h|--help) usage; exit 0 ;;
-*) echo "Error: unknown option: $1" >&2; usage >&2; exit 2 ;;
*) IMAGE_TAG="$1"; shift ;;
esac
done
mkdir -p "$DATA_ROOT/config" "$DATA_ROOT/models" "$DATA_ROOT/cache"
DATA_ROOT="$(cd "$DATA_ROOT" && pwd)"
args=(run --rm --name "$NAME" --ipc=host -p "$PORT:8776" -e CODERAI_HOST=0.0.0.0 -e CODERAI_PORT=8776)
if [[ "$DETACH" == "1" ]]; then
args+=(-d)
fi
case "$MODE" in
nvidia)
if [[ "$ENGINE" == "docker" ]]; then
args+=(--gpus all)
else
args+=(--hooks-dir=/usr/share/containers/oci/hooks.d)
fi
;;
vulkan)
args+=(--device /dev/dri)
;;
cpu) ;;
esac
volume_suffix=""
if [[ "$ENGINE" == "podman" ]]; then
volume_suffix=":Z"
fi
args+=(-v "$DATA_ROOT/config:/config$volume_suffix" -v "$DATA_ROOT/models:/models$volume_suffix" -v "$DATA_ROOT/cache:/cache$volume_suffix")
args+=("${EXTRA_ARGS[@]}" "$IMAGE_TAG")
cat <<EOF
Starting CoderAI OCI container
engine: $ENGINE
image: $IMAGE_TAG
mode: $MODE
url: http://127.0.0.1:$PORT/admin
data: $DATA_ROOT
EOF
exec "$ENGINE" "${args[@]}"
#!/usr/bin/env bash
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
VERSIONS_FILE="$ROOT_DIR/packaging/versions.env"
if [[ -f "$VERSIONS_FILE" ]]; then
# shellcheck disable=SC1090
source "$VERSIONS_FILE"
fi
DOCKER_BIN="${DOCKER:-docker}"
read -r -a DOCKER_CMD <<< "$DOCKER_BIN"
IMAGE_TAG="${OCI_IMAGE:-coderai:local}"
MODE="cpu"
PORT="${CODERAI_PORT:-18776}"
KEEP=0
TIMEOUT=45
CONTAINER_NAME="coderai-smoke-$$"
usage() {
cat <<'EOF'
Usage:
packaging/linux/smoke_test_oci.sh [IMAGE_TAG]
Options:
-t, --tag TAG Image tag to test (default: coderai:local or OCI_IMAGE).
--mode MODE cpu, nvidia, or vulkan (default: cpu).
--port PORT Host port for boot test (default: 18776).
--timeout SECONDS Server boot timeout (default: 45).
--keep Keep the container after failure for inspection.
-h, --help Show this help.
EOF
}
while [[ $# -gt 0 ]]; do
case "$1" in
-t|--tag)
[[ $# -ge 2 ]] || { echo "Error: $1 requires a tag" >&2; exit 2; }
IMAGE_TAG="$2"
shift 2
;;
--mode)
[[ $# -ge 2 ]] || { echo "Error: --mode requires cpu, nvidia, or vulkan" >&2; exit 2; }
MODE="$2"
shift 2
;;
--port)
[[ $# -ge 2 ]] || { echo "Error: --port requires a value" >&2; exit 2; }
PORT="$2"
shift 2
;;
--timeout)
[[ $# -ge 2 ]] || { echo "Error: --timeout requires seconds" >&2; exit 2; }
TIMEOUT="$2"
shift 2
;;
--keep)
KEEP=1
shift
;;
-h|--help)
usage
exit 0
;;
-*)
echo "Error: unknown option: $1" >&2
usage >&2
exit 2
;;
*)
IMAGE_TAG="$1"
shift
;;
esac
done
case "$MODE" in
cpu|nvidia|vulkan) ;;
*) echo "Error: --mode must be cpu, nvidia, or vulkan" >&2; exit 2 ;;
esac
cleanup() {
if [[ "$KEEP" != "1" ]]; then
"${DOCKER_CMD[@]}" rm -f "$CONTAINER_NAME" >/dev/null 2>&1 || true
fi
}
trap cleanup EXIT
if ! "${DOCKER_CMD[@]}" image inspect "$IMAGE_TAG" >/dev/null 2>&1; then
echo "Error: image not found: $IMAGE_TAG" >&2
exit 1
fi
IMPORT_CHECK='import importlib.util, json
mods=["fastapi","uvicorn","torch","transformers","diffusers","accelerate","llama_cpp","PIL"]
optional=["stable_diffusion_cpp","whispercpp","bitsandbytes","onnxruntime"]
out={"required":{},"optional":{}}
missing=[]
for m in mods:
ok=importlib.util.find_spec(m) is not None
out["required"][m]=ok
if not ok: missing.append(m)
for m in optional:
out["optional"][m]=importlib.util.find_spec(m) is not None
try:
import torch
out["torch_cuda_available"]=bool(torch.cuda.is_available())
out["torch_cuda_device_count"]=int(torch.cuda.device_count())
except Exception as e:
out["torch_cuda_error"]=str(e)
print(json.dumps(out, sort_keys=True))
if missing:
raise SystemExit("missing required imports: "+", ".join(missing))'
run_args=(--rm)
case "$MODE" in
nvidia) run_args+=(--gpus all) ;;
vulkan) run_args+=(--device /dev/dri) ;;
esac
echo "Checking imports in $IMAGE_TAG..."
"${DOCKER_CMD[@]}" run "${run_args[@]}" --entrypoint /opt/coderai/python/bin/python3 "$IMAGE_TAG" -c "$IMPORT_CHECK"
tmp_dir="$ROOT_DIR/.packaging-cache/smoke-$MODE-$$"
rm -rf "$tmp_dir" 2>/dev/null || true
mkdir -p "$tmp_dir/config" "$tmp_dir/models" "$tmp_dir/cache"
container_args=(-d --name "$CONTAINER_NAME" -p "$PORT:8776" -e CODERAI_HOST=0.0.0.0 -e CODERAI_PORT=8776 -v "$tmp_dir/config:/config" -v "$tmp_dir/models:/models" -v "$tmp_dir/cache:/cache")
case "$MODE" in
nvidia) container_args+=(--gpus all --ipc=host) ;;
vulkan) container_args+=(--device /dev/dri --ipc=host) ;;
cpu) container_args+=(--ipc=host) ;;
esac
echo "Starting boot test container on http://127.0.0.1:$PORT ..."
"${DOCKER_CMD[@]}" run "${container_args[@]}" "$IMAGE_TAG" >/dev/null
start=$SECONDS
until status=$(curl -sS -o /dev/null -w '%{http_code}' "http://127.0.0.1:$PORT/admin" 2>/dev/null) && [[ "$status" =~ ^(200|301|302|401|403)$ ]]; do
if (( SECONDS - start > TIMEOUT )); then
echo "Server did not respond within ${TIMEOUT}s" >&2
"${DOCKER_CMD[@]}" logs "$CONTAINER_NAME" >&2 || true
exit 1
fi
sleep 1
done
models_status=$(curl -sS -o /dev/null -w '%{http_code}' "http://127.0.0.1:$PORT/v1/models" 2>/dev/null || true)
if [[ ! "$models_status" =~ ^(200|401|403)$ ]]; then
echo "Unexpected /v1/models status: $models_status" >&2
"$DOCKER_BIN" logs "$CONTAINER_NAME" >&2 || true
exit 1
fi
echo "Smoke test passed for $IMAGE_TAG ($MODE)."
# CoderAI local distribution pins
# This file is sourced by packaging/linux/build_oci_image.sh and used as Docker build args.
# python-build-standalone release 20250612 ships CPython 3.13.5. The app only
# requires the 3.13 minor line, while native wheels are built against the target
# interpreter during image assembly.
PYTHON_VERSION=3.13.5
PBS_RELEASE=20250612
UV_VERSION=0.7.13
CUDA_VERSION=12.6.3
UBUNTU_VERSION=24.04
VULKAN_PACKAGES="libvulkan-dev vulkan-tools glslang-tools"
# Native module refs. Empty means use the resolver/package default from PyPI.
LLAMA_CPP_PYTHON_VERSION=
SD_CPP_PYTHON_VERSION=
WHISPERCPP_REF=master
# Local image defaults.
OCI_IMAGE=coderai:local
OCI_BUILD_CONTEXT=.
#!/usr/bin/env bash
set -euo pipefail
exec "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/packaging/linux/run_oci.sh" "$@"
#!/usr/bin/env bash
set -euo pipefail
exec "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/packaging/linux/smoke_test_oci.sh" "$@"
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment