township: 2-keyframe outcomes, referees, autogen, generation queue; favicons

Township tool (tools/gen_township_fighters.py):
- Outcome videos now generate TWO keyframes per outcome (finish + victory),
  each anchoring its own clip; victory clip uses a dedicated referee shot.
- Referee characters: new role on create form, kept out of fighter pools,
  dressed as officials, attachable per-match and used in victory keyframes.
- Per-match referee selection (new-match form + match editor, persisted).
- Autogenerate buttons on character/referee, environment and new-match forms
  (LLM-filled, editable before create) via /profile/autogen + /matches/autogen.
- Single-worker generation queue: all coderai-bound jobs (create/regen/train/
  match/process) are serialised and surfaced as "queued", with one persistent
  match-detail monitor replacing the competing per-job pollers (fixes the
  blinking progress when two jobs were launched at once).

coderai: favicon.ico served at /favicon.ico + linked in admin/login templates;
bundled township favicon served at /favicon.ico.

Also gitignore large packaging/runtime artifact dirs (.packaging-cache/, tmp/).
Co-Authored-By: 's avatarClaude Opus 4.8 <noreply@anthropic.com>
parent 80f8fe22
# Files/directories excluded from Docker build context for local distribution builds.
.git
.gitignore
# Local virtualenvs and caches
venv*
.venv
__pycache__
*.py[cod]
.pytest_cache
.mypy_cache
.ruff_cache
.cache
.packaging-cache/*
!.packaging-cache/build-manifest.json
# Large runtime/generated data
models
offload
township_output
dist
dist-package
*.log
# Build outputs
build
*.egg-info
......@@ -29,3 +29,7 @@ test_*.py
# Generated township fighter outputs
township_output/
# Packaging build cache + runtime temp (large artifacts)
.packaging-cache/
tmp/
#!/usr/bin/env bash
set -euo pipefail
exec "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/packaging/linux/build_oci_image.sh" "$@"
......@@ -316,8 +316,15 @@ async def chat_page(request: Request, username: str = Depends(require_auth)):
# API endpoints for admin operations
@router.get("/admin/api/status", summary="Server and model status")
async def api_status(username: str = Depends(require_auth)):
"""Get system status."""
def api_status(username: str = Depends(require_auth)):
"""Get system status.
Defined as a SYNC handler on purpose: it reads VRAM via sysfs / a blocking
``lspci`` subprocess (AMD/Intel) and scans registry/queue state. The Tasks
and dashboard pages poll it continuously, so running it on the event loop
would freeze the whole web UI while a model loads (llama.cpp's load releases
the GIL, so a threadpool worker can still run). FastAPI runs plain ``def``
path operations in its threadpool, off the event loop."""
from codai.models.manager import multi_model_manager
from codai.api.state import get_load_mode
......@@ -1571,7 +1578,11 @@ async def api_model_load(request: Request, username: str = Depends(require_admin
model_cfg = m if isinstance(m, dict) else {}
break
result = multi_model_manager.request_model(path, model_type if model_type != "text" else None)
# Offload to a thread: request_model may block (thermal wait / busy model /
# actual load) and would otherwise freeze the whole admin web UI event loop.
result = await asyncio.to_thread(
multi_model_manager.request_model,
path, model_type if model_type != "text" else None)
if result.get("already_loaded"):
return {"success": True, "already_loaded": True}
......@@ -1583,17 +1594,20 @@ async def api_model_load(request: Request, username: str = Depends(require_admin
free_gb = multi_model_manager._get_free_vram_gb()
if needed_gb > 0 and free_gb < needed_gb:
print(f"Admin model-load: need {needed_gb:.1f} GB VRAM, have {free_gb:.1f} GB free — evicting models")
multi_model_manager._evict_models_for_vram(needed_gb)
await asyncio.to_thread(multi_model_manager._evict_models_for_vram, needed_gb)
elif needed_gb == 0 and multi_model_manager.models and free_gb < 4.0:
# Unknown model size but VRAM nearly full — evict everything to avoid OOM on first attempt
print(f"Admin model-load: unknown model size, only {free_gb:.1f} GB free — evicting models proactively")
multi_model_manager.unload_all_models()
await asyncio.to_thread(multi_model_manager.unload_all_models)
# Not loaded yet — trigger actual load
try:
if model_type == "text":
# _load_model_by_name already records the VRAM delta internally
mm = multi_model_manager._load_model_by_name(result["model_name"] or path)
# In a thread: the GGUF/llama load is heavy and would block the admin
# event loop (freezing the whole web UI) if run inline.
# _load_model_by_name already records the VRAM delta internally.
mm = await asyncio.to_thread(
multi_model_manager._load_model_by_name, result["model_name"] or path)
if mm is None:
raise RuntimeError("Model failed to load")
multi_model_manager.models[result["model_key"] or path] = mm
......@@ -1618,15 +1632,15 @@ async def api_model_load(request: Request, username: str = Depends(require_admin
model_key = f"image:{path}"
_snap = multi_model_manager.vram_before_load()
if _is_gguf_model(path):
resolved = multi_model_manager.load_model(path)
resolved = await asyncio.to_thread(multi_model_manager.load_model, path)
import os as _os
if resolved and _os.path.isfile(resolved):
sd_model = _load_sdcpp_model(resolved, global_args)
sd_model = await asyncio.to_thread(_load_sdcpp_model, resolved, global_args)
if sd_model:
multi_model_manager.add_model(model_key, sd_model)
multi_model_manager.record_vram_delta(model_key, _snap)
else:
pipeline = _load_diffusers_pipeline(path, global_args)
pipeline = await asyncio.to_thread(_load_diffusers_pipeline, path, global_args)
if pipeline:
multi_model_manager.add_model(model_key, pipeline)
multi_model_manager.record_vram_delta(model_key, _snap)
......@@ -2029,10 +2043,15 @@ async def api_turboquant_info(username: str = Depends(require_admin)):
# --- Task / queue management ---
@router.get("/admin/api/tasks", summary="List active and recent tasks")
async def api_tasks(username: str = Depends(require_admin)):
def api_tasks(username: str = Depends(require_admin)):
"""Unified live view of long-running work: in-flight / recent generations
(image, video, audio, text) from the task registry, durable LoRA training
jobs, and queued requests waiting for a slot. The Tasks page polls this."""
jobs, and queued requests waiting for a slot. The Tasks page polls this.
SYNC handler on purpose (runs in FastAPI's threadpool, not the event loop):
it reads disk job records, queue/registry state and thermal sensors. Keeping
it off the event loop means the Tasks page stays responsive while a model is
loading (the load releases the GIL during its C call)."""
from codai.tasks import task_registry
from codai.api.loras import list_jobs
from codai.queue.manager import queue_manager
......@@ -2193,10 +2212,14 @@ def _read_vram_info() -> Optional[dict]:
@router.get("/admin/api/system-stats", summary="Live CPU / GPU / RAM / VRAM usage and temperatures")
async def api_system_stats(username: str = Depends(require_admin)):
def api_system_stats(username: str = Depends(require_admin)):
"""Lightweight hardware telemetry for the Tasks page header: CPU & GPU
utilization and temperature, plus RAM and VRAM usage. All fields are
best-effort and may be null when a sensor/metric is unavailable."""
best-effort and may be null when a sensor/metric is unavailable.
SYNC handler on purpose: the temperature/util/VRAM reads hit sysfs and
blocking sensor calls, so it runs in FastAPI's threadpool to avoid freezing
the event loop (and the Tasks page) while a model is loading."""
from codai.models import thermal
# CPU tile = coderai process-tree usage, scaled 100% PER CORE (0..100*cores),
......@@ -2398,6 +2421,10 @@ async def api_get_settings(username: str = Depends(require_admin)):
"jobs": {
"resume_on_restart": c.jobs.resume_on_restart,
},
"enhance": {
"allow_ffmpeg": c.enhance.allow_ffmpeg,
"allow_rife_ncnn": c.enhance.allow_rife_ncnn,
},
"broker": {
"enabled": c.broker.enabled,
"base_url": c.broker.base_url,
......@@ -2562,6 +2589,22 @@ async def api_save_settings(request: Request, username: str = Depends(require_ad
except Exception:
pass
if "enhance" in data:
en = data["enhance"]
if "allow_ffmpeg" in en:
c.enhance.allow_ffmpeg = bool(en["allow_ffmpeg"])
if "allow_rife_ncnn" in en:
c.enhance.allow_rife_ncnn = bool(en["allow_rife_ncnn"])
# Apply live to global_args so the video pipeline honours it immediately.
try:
from codai.api.state import get_global_args
ga = get_global_args()
if ga is not None:
ga.enhance_allow_ffmpeg = c.enhance.allow_ffmpeg
ga.enhance_allow_rife_ncnn = c.enhance.allow_rife_ncnn
except Exception:
pass
if "broker" in data:
bro = data["broker"]
c.broker.enabled = bool(bro.get("enabled", c.broker.enabled))
......
......@@ -4,6 +4,7 @@
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>{% block title %}CoderAI{% endblock %}</title>
<link rel="icon" type="image/x-icon" href="{{ root_path }}/favicon.ico">
<link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link href="https://fonts.googleapis.com/css2?family=Plus+Jakarta+Sans:wght@400;500;600;700&family=JetBrains+Mono:wght@400;500&display=swap" rel="stylesheet">
......
......@@ -4,6 +4,7 @@
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Sign in — CoderAI</title>
<link rel="icon" type="image/x-icon" href="{{ root_path }}/favicon.ico">
<link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link href="https://fonts.googleapis.com/css2?family=Plus+Jakarta+Sans:wght@400;500;600;700&family=JetBrains+Mono:wght@400;500&display=swap" rel="stylesheet">
......
......@@ -75,6 +75,26 @@
</div>
</div>
<!-- Video enhancement (upscale / FPS interpolation) -->
<div class="card mb-0" style="margin-top:1rem">
<div class="card-title">Video Enhancement</div>
<p class="form-hint" style="margin-bottom:.6rem">Upscaling and FPS interpolation run on in-process torch models by default (ESRGAN upscaler, RIFE/FILM interpolator) — no external tools. Enable these only to fall back to external tools when no model is configured.</p>
<div class="form-row" style="margin:0">
<label style="display:flex;align-items:center;gap:.5rem;cursor:pointer">
<input type="checkbox" id="s-allow-ffmpeg">
<span>Allow ffmpeg (frame I/O / minterpolate)</span>
</label>
<span class="form-hint">Off = use PyAV + models only. On = ffmpeg may be used as a fallback.</span>
</div>
<div class="form-row" style="margin-top:.5rem">
<label style="display:flex;align-items:center;gap:.5rem;cursor:pointer">
<input type="checkbox" id="s-allow-rife-ncnn">
<span>Allow rife-ncnn-vulkan binary</span>
</label>
<span class="form-hint">Off = use an in-process RIFE/FILM model. On = the external rife-ncnn-vulkan binary may be used when no model is configured.</span>
</div>
</div>
<!-- Archive -->
<div class="card mb-0" style="margin-top:1rem">
<div class="card-title">Generation Archive</div>
......@@ -336,6 +356,8 @@ async function loadSettings(){
document.getElementById('s-gguf-cache').value = d.models?.gguf_cache_dir ?? '';
document.getElementById('s-offload-dir').value = d.offload?.directory ?? './offload';
document.getElementById('s-tmp-dir').value = d.tmp_dir ?? '';
document.getElementById('s-allow-ffmpeg').checked = !!(d.enhance && d.enhance.allow_ffmpeg);
document.getElementById('s-allow-rife-ncnn').checked = !!(d.enhance && d.enhance.allow_rife_ncnn);
toggleHttps();
// Archive
const arc = d.archive || {};
......@@ -404,6 +426,10 @@ async function saveSettings(){
directory: document.getElementById('s-offload-dir').value.trim() || './offload',
},
tmp_dir: strOrNull('s-tmp-dir'),
enhance:{
allow_ffmpeg: document.getElementById('s-allow-ffmpeg').checked,
allow_rife_ncnn: document.getElementById('s-allow-rife-ncnn').checked,
},
archive:{
enabled: document.getElementById('s-arc-enabled').checked,
directory: document.getElementById('s-arc-dir').value.trim(),
......
# CoderAI - in-process FILM frame interpolation engine.
#
# FILM (Frame Interpolation for Large Motion) is distributed in several forms.
# The most portable torch artifact is a TorchScript module, which we can load and
# run in-process with no extra architecture code. This wrapper loads such a model
# and exposes the same `.interpolate(a, b)` (t=0.5) interface the RIFE engine uses,
# so the dispatcher is engine-agnostic. Higher fps multipliers come from the
# caller's recursive midpoint bisection.
import torch
class _FilmWrapper:
"""Adapts a loaded FILM module to `.interpolate(img0, img1) -> mid` at t=0.5.
FILM ports expose a few call conventions; we try the common ones and cache
whichever works for this module."""
def __init__(self, module, device):
self.m = module
self.device = device
self._call = None
@torch.no_grad()
def interpolate(self, img0, img1):
t = torch.full((img0.shape[0], 1), 0.5, device=img0.device, dtype=img0.dtype)
candidates = [
lambda: self.m({"x0": img0, "x1": img1, "time": t})["image"],
lambda: self.m({"x0": img0, "x1": img1, "time": t[:, :1, None, None]})["image"],
lambda: self.m(img0, img1, 0.5),
lambda: self.m(img0, img1, t),
lambda: self.m(img0, img1),
]
if self._call is not None:
return self._call()
last = None
for c in candidates:
try:
out = c()
if isinstance(out, dict):
out = out.get("image") or next(iter(out.values()))
self._call = c
return out
except Exception as e: # try the next calling convention
last = e
raise RuntimeError(f"FILM module call failed (incompatible signature): {last}")
def load_film(weights_path: str, device):
"""Load a FILM model. Supports a TorchScript module (.pt/.pth saved via
torch.jit) — the portable in-process form. Raises with guidance otherwise."""
try:
module = torch.jit.load(weights_path, map_location=device)
module.eval()
return _FilmWrapper(module, device)
except Exception as e:
raise RuntimeError(
"FILM weights must be a TorchScript module for in-process use "
f"(torch.jit.load failed: {e}). Provide a scripted FILM model, or use "
"a RIFE interpolation model instead.")
# CoderAI - in-process RIFE (IFNet_HDv3) frame interpolation.
# Vendored architecture matching the RIFE flownet.pkl weights (3 student IFBlocks
# of c=90, input 11ch = warped0+warped1+mask+flow; block_tea is training-only and
# unused at inference). Runs entirely in-process on torch — no subprocess.
import torch
import torch.nn as nn
import torch.nn.functional as F
def _conv(in_planes, out_planes, kernel_size=3, stride=1, padding=1, dilation=1):
return nn.Sequential(
nn.Conv2d(in_planes, out_planes, kernel_size, stride,
padding, dilation=dilation, bias=True),
nn.PReLU(out_planes),
)
class IFBlock(nn.Module):
def __init__(self, in_planes, c=64):
super().__init__()
self.conv0 = nn.Sequential(
_conv(in_planes, c // 2, 3, 2, 1),
_conv(c // 2, c, 3, 2, 1),
)
self.convblock0 = nn.Sequential(_conv(c, c), _conv(c, c))
self.convblock1 = nn.Sequential(_conv(c, c), _conv(c, c))
self.convblock2 = nn.Sequential(_conv(c, c), _conv(c, c))
self.convblock3 = nn.Sequential(_conv(c, c), _conv(c, c))
self.conv1 = nn.Sequential(
nn.ConvTranspose2d(c, c // 2, 4, 2, 1),
nn.PReLU(c // 2),
nn.ConvTranspose2d(c // 2, 4, 4, 2, 1),
)
self.conv2 = nn.Sequential(
nn.ConvTranspose2d(c, c // 2, 4, 2, 1),
nn.PReLU(c // 2),
nn.ConvTranspose2d(c // 2, 1, 4, 2, 1),
)
def forward(self, x, flow, scale=1):
x = F.interpolate(x, scale_factor=1. / scale, mode="bilinear",
align_corners=False, recompute_scale_factor=False)
flow = F.interpolate(flow, scale_factor=1. / scale, mode="bilinear",
align_corners=False, recompute_scale_factor=False) * (1. / scale)
feat = self.conv0(torch.cat((x, flow), 1))
feat = self.convblock0(feat) + feat
feat = self.convblock1(feat) + feat
feat = self.convblock2(feat) + feat
feat = self.convblock3(feat) + feat
flow = self.conv1(feat)
mask = self.conv2(feat)
flow = F.interpolate(flow, scale_factor=scale, mode="bilinear",
align_corners=False, recompute_scale_factor=False) * scale
mask = F.interpolate(mask, scale_factor=scale, mode="bilinear",
align_corners=False, recompute_scale_factor=False)
return flow, mask
_backwarp_cache = {}
def _warp(img, flow):
"""Backward-warp img by flow (B,2,H,W) via grid_sample."""
B, _, H, W = img.shape
dev = img.device
key = (B, H, W, dev)
grid = _backwarp_cache.get(key)
if grid is None:
hor = torch.linspace(-1.0, 1.0, W, device=dev).view(1, 1, 1, W).expand(B, -1, H, -1)
ver = torch.linspace(-1.0, 1.0, H, device=dev).view(1, 1, H, 1).expand(B, -1, -1, W)
grid = torch.cat([hor, ver], 1) # (B,2,H,W)
_backwarp_cache[key] = grid
flow = torch.cat([flow[:, 0:1] / ((W - 1.0) / 2.0),
flow[:, 1:2] / ((H - 1.0) / 2.0)], 1)
g = (grid + flow).permute(0, 2, 3, 1)
return F.grid_sample(img, g, mode="bilinear", padding_mode="border",
align_corners=True)
class IFNet(nn.Module):
"""RIFE IFNet_HDv3 — predicts bidirectional flow + blend mask and returns the
t=0.5 interpolated frame. Higher fps multipliers are produced by recursive
bisection (interpolate the midpoints) by the caller."""
def __init__(self):
super().__init__()
self.block0 = IFBlock(11, c=90)
self.block1 = IFBlock(11, c=90)
self.block2 = IFBlock(11, c=90)
self.block_tea = IFBlock(14, c=90) # training-only; loaded but unused
@torch.no_grad()
def interpolate(self, img0, img1, scale_list=(4, 2, 1)):
B, _, H, W = img0.shape
flow = torch.zeros(B, 4, H, W, device=img0.device, dtype=img0.dtype)
mask = torch.zeros(B, 1, H, W, device=img0.device, dtype=img0.dtype)
warped0, warped1 = img0, img1
for i, block in enumerate((self.block0, self.block1, self.block2)):
fd, md = block(torch.cat((warped0, warped1, mask), 1), flow,
scale=scale_list[i])
flow = flow + fd
mask = mask + md
warped0 = _warp(img0, flow[:, :2])
warped1 = _warp(img1, flow[:, 2:4])
m = torch.sigmoid(mask)
return warped0 * m + warped1 * (1 - m)
def load_ifnet(weights_path: str, device):
"""Build IFNet and load RIFE flownet weights (.pkl/.pth, possibly 'module.'-
prefixed). Returns the eval model on `device`."""
net = IFNet()
sd = torch.load(weights_path, map_location="cpu", weights_only=False)
if isinstance(sd, dict) and "state_dict" in sd and isinstance(sd["state_dict"], dict):
sd = sd["state_dict"]
clean = {}
for k, v in sd.items():
nk = k[7:] if k.startswith("module.") else k
clean[nk] = v
net.load_state_dict(clean, strict=True)
net.eval().to(device)
return net
......@@ -188,6 +188,17 @@ admin_static_dir = Path(__file__).parent.parent / "admin" / "static"
if admin_static_dir.exists():
app.mount("/static/admin", StaticFiles(directory=str(admin_static_dir)), name="admin_static")
# Serve a favicon at the conventional /favicon.ico path so browsers stop 404-ing on it.
from fastapi.responses import FileResponse, Response as _FaviconResponse
_favicon_path = admin_static_dir / "favicon.ico"
@app.get("/favicon.ico", include_in_schema=False)
async def favicon():
if _favicon_path.exists():
return FileResponse(str(_favicon_path), media_type="image/x-icon")
return _FaviconResponse(status_code=404)
# Include routers from submodules
app.include_router(transcriptions_router, tags=["Audio"])
app.include_router(images_router, tags=["Images"])
......
......@@ -724,7 +724,8 @@ async def chat_completions(request: ChatCompletionRequest, http_request: Request
# Fallback: non-streaming
if get_global_debug():
print(f"DEBUG: Using non-streaming fallback for first pass")
first_pass_result = current_manager.generate(
first_pass_result = await asyncio.to_thread(
current_manager.generate,
prompt=raw_prompt_for_generation,
max_tokens=request.max_tokens or 2048,
temperature=request.temperature,
......@@ -744,7 +745,8 @@ async def chat_completions(request: ChatCompletionRequest, http_request: Request
if get_global_debug():
print(f"DEBUG: raw_stream_generate second pass, full_prompt length: {len(full_prompt)}")
second_pass_result = current_manager.generate(
second_pass_result = await asyncio.to_thread(
current_manager.generate,
prompt=full_prompt,
max_tokens=request.max_tokens or 2048,
temperature=request.temperature,
......@@ -882,7 +884,8 @@ async def chat_completions(request: ChatCompletionRequest, http_request: Request
# Non-streaming path (already implemented above)
# First pass: generate until reasoning close tag
first_pass_result = current_manager.generate(
first_pass_result = await asyncio.to_thread(
current_manager.generate,
prompt=raw_prompt_for_generation,
max_tokens=request.max_tokens or 2048,
temperature=request.temperature,
......@@ -991,7 +994,8 @@ async def chat_completions(request: ChatCompletionRequest, http_request: Request
generated_text = reasoning_text + (close_tag or "") + final_text
else:
# Need second pass to get answer
second_pass_result = current_manager.generate(
second_pass_result = await asyncio.to_thread(
current_manager.generate,
prompt=full_prompt,
max_tokens=request.max_tokens or 2048,
temperature=request.temperature,
......@@ -1633,7 +1637,8 @@ async def generate_chat_response(
try:
# Use generate_chat for proper chat template handling
generated_text = current_manager.generate_chat(
generated_text = await asyncio.to_thread(
current_manager.generate_chat,
messages=messages,
max_tokens=max_tokens,
temperature=temperature,
......@@ -1966,7 +1971,8 @@ async def generate_completion_response(
created = int(time.time())
try:
generated_text = current_manager.generate(
generated_text = await asyncio.to_thread(
current_manager.generate,
prompt=prompt,
max_tokens=max_tokens,
temperature=temperature,
......
This diff is collapsed.
......@@ -143,6 +143,45 @@ def _install_layer_log_callback():
return _cb # caller must hold this reference
async def _aiter_blocking(sync_iter):
"""Bridge a blocking (sync) generator onto the asyncio event loop.
llama.cpp's create_(chat_)completion returns a *synchronous* generator whose
first ``next()`` runs the whole prompt prefill and every subsequent ``next()``
runs a full token forward pass. Iterating it directly inside an ``async def``
runs that work on the event loop and freezes every other HTTP request (the
whole web UI) for the duration of a completion.
This pulls one item at a time from a worker thread via ``asyncio.to_thread``
so the loop stays responsive between (and during, since llama.cpp releases the
GIL while computing) token steps. Closing the async generator — e.g. on client
disconnect or task cancellation — closes the underlying sync generator, which
stops llama.cpp at the next token boundary, matching the old inline ``break``.
"""
import asyncio
_SENT = object()
def _next():
try:
return next(sync_iter)
except StopIteration:
return _SENT
try:
while True:
item = await asyncio.to_thread(_next)
if item is _SENT:
break
yield item
finally:
close = getattr(sync_iter, "close", None)
if close is not None:
try:
close()
except Exception:
pass
class VulkanBackend(ModelBackend):
"""Backend for Vulkan (AMD GPUs) using llama-cpp-python with GGUF models."""
......@@ -891,8 +930,8 @@ class VulkanBackend(ModelBackend):
first_chunk = True
prompt_len = len(prompt) if isinstance(prompt, str) else 0
for chunk in self.model.create_completion(
async for chunk in _aiter_blocking(self.model.create_completion(
stopping_criteria=_make_llama_thermal_criteria(),
prompt=prompt,
max_tokens=max_tokens,
......@@ -903,9 +942,9 @@ class VulkanBackend(ModelBackend):
stop=stop,
stream=True,
grammar=use_grammar,
):
)):
text = chunk['choices'][0].get('text', '')
if first_chunk:
# Skip the prompt text on first chunk
# The first chunk includes the full prompt plus the first new token
......@@ -931,8 +970,8 @@ class VulkanBackend(ModelBackend):
try:
first_chunk = True
prompt_len = len(prompt) if isinstance(prompt, str) else 0
for chunk in self.model.create_completion(
async for chunk in _aiter_blocking(self.model.create_completion(
stopping_criteria=_make_llama_thermal_criteria(),
prompt=prompt,
max_tokens=max_tokens,
......@@ -942,7 +981,7 @@ class VulkanBackend(ModelBackend):
repeat_penalty=repeat_penalty,
stop=stop,
stream=True,
):
)):
text = chunk['choices'][0].get('text', '')
if first_chunk:
......@@ -1001,8 +1040,8 @@ class VulkanBackend(ModelBackend):
async def generate_stream():
first_chunk = True
prompt_len = len(prompt)
for chunk in self.model.create_completion(
async for chunk in _aiter_blocking(self.model.create_completion(
stopping_criteria=_make_llama_thermal_criteria(),
prompt=prompt,
max_tokens=max_tokens,
......@@ -1011,7 +1050,7 @@ class VulkanBackend(ModelBackend):
repeat_penalty=repeat_penalty,
stop=stop,
stream=True,
):
)):
text = chunk['choices'][0].get('text', '')
if first_chunk:
......@@ -1181,7 +1220,7 @@ class VulkanBackend(ModelBackend):
prompt_tokens = 0
completion_tokens = 0
try:
for chunk in self.model.create_chat_completion(**kwargs):
async for chunk in _aiter_blocking(self.model.create_chat_completion(**kwargs)):
delta = chunk['choices'][0].get('delta', {})
text = delta.get('content') or ''
if text:
......
......@@ -144,6 +144,17 @@ class JobsConfig:
resume_on_restart: bool = True
@dataclass
class EnhanceConfig:
"""Video enhancement (upscale / FPS interpolation) tool policy.
By default these run fully in-process on torch models (ESRGAN upscaler, RIFE/
FILM interpolator) — no subprocess, no ffmpeg. The flags below OPT IN to the
external tools as alternatives when no model is configured/preferred."""
allow_ffmpeg: bool = False # allow ffmpeg (frame I/O / minterpolate) instead of PyAV+model
allow_rife_ncnn: bool = False # allow the external rife-ncnn-vulkan binary instead of a torch model
@dataclass
class Config:
"""Main configuration class."""
......@@ -158,6 +169,7 @@ class Config:
archive: ArchiveConfig = field(default_factory=ArchiveConfig)
thermal: ThermalConfig = field(default_factory=ThermalConfig)
jobs: JobsConfig = field(default_factory=JobsConfig)
enhance: EnhanceConfig = field(default_factory=EnhanceConfig)
broker: BrokerConfig = field(default_factory=BrokerConfig)
system_prompt: Optional[str] = None
tools_closer_prompt: bool = False
......@@ -318,6 +330,7 @@ class ConfigManager:
archive=ArchiveConfig(**config_data.get("archive", {})),
thermal=ThermalConfig(**config_data.get("thermal", {})),
jobs=JobsConfig(**config_data.get("jobs", {})),
enhance=EnhanceConfig(**config_data.get("enhance", {})),
broker=BrokerConfig(**config_data.get("broker", {})),
system_prompt=config_data.get("system_prompt"),
tools_closer_prompt=config_data.get("tools_closer_prompt", False),
......@@ -443,6 +456,10 @@ class ConfigManager:
"jobs": {
"resume_on_restart": self.config.jobs.resume_on_restart,
},
"enhance": {
"allow_ffmpeg": self.config.enhance.allow_ffmpeg,
"allow_rife_ncnn": self.config.enhance.allow_rife_ncnn,
},
"broker": {
"enabled": self.config.broker.enabled,
"base_url": self.config.broker.base_url,
......
......@@ -813,6 +813,9 @@ def main():
global_args.thermal_soft_throttle_enabled = config.thermal.soft_throttle_enabled
global_args.thermal_soft_throttle_temp = config.thermal.soft_throttle_temp
global_args.thermal_soft_throttle_max_sleep = config.thermal.soft_throttle_max_sleep
# Video-enhancement external-tool policy (default off → in-process models only).
global_args.enhance_allow_ffmpeg = config.enhance.allow_ffmpeg
global_args.enhance_allow_rife_ncnn = config.enhance.allow_rife_ncnn
global_args.n_gpu_layers = config.vulkan.n_gpu_layers
global_args.n_ctx = [config.vulkan.n_ctx]
global_args.vulkan_device = config.vulkan.device_id
......
......@@ -133,7 +133,8 @@ def detect_model_capabilities(model_name: str) -> ModelCapabilities:
return caps
# Video interpolation
if any(x in n for x in ['film-net', 'rife', 'flavr', 'dain', 'frame-interp']):
if any(x in n for x in ['film-net', 'film_net', 'film', 'rife', 'ifnet',
'flavr', 'dain', 'frame-interp', 'interpolat']):
caps.video_interpolation = True
return caps
......
......@@ -187,6 +187,7 @@ class VideoInterpolateRequest(BaseModel):
init_image: Optional[str] = None # first frame
end_image: Optional[str] = None # last frame
fps_multiplier: Optional[int] = 2
output_fps: Optional[int] = None # encode the result at this fps (None = source_fps × multiplier, preserves duration)
response_format: Optional[str] = "url"
model_config = ConfigDict(extra="allow")
......
#!/usr/bin/env bash
set -euo pipefail
exec "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/packaging/linux/package_oci_image.sh" "$@"
#!/usr/bin/env bash
set -euo pipefail
exec "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/packaging/linux/make_tarball_from_venv.sh" "$@"
# Runtime dependency set for the local OCI image.
# Native packages listed here are built in the wheel stage and installed from /opt/wheels.
-r /tmp/requirements.txt
# CUDA/quantization extras that are intentionally optional in source installs but useful
# in the full Linux container distribution. flash-attn and causal-conv1d stay out of
# the base image because they are CUDA-arch-sensitive and fragile to build generally.
bitsandbytes>=0.41.0
sentencepiece>=0.1.99
tiktoken>=0.5.0
tokenizers>=0.15.0
protobuf>=3.20.0
optimum-quanto>=0.2.0
gguf>=0.9.0
# Native wheels are built by packaging/linux/Dockerfile.oci with CUDA+Vulkan enabled.
llama-cpp-python>=0.2.0
stable-diffusion-cpp-python>=0.2.0
whispercpp>=0.0.17
#!/usr/bin/env python3
"""Write a small build manifest for local CoderAI distribution artifacts."""
from __future__ import annotations
import json
import os
import platform
import subprocess
import sys
from datetime import datetime, timezone
from pathlib import Path
def cmd(args: list[str], cwd: str | None = None) -> str:
try:
return subprocess.check_output(args, cwd=cwd, text=True, stderr=subprocess.DEVNULL).strip()
except Exception:
return ""
def package_versions(python_bin: str | None) -> dict[str, str]:
if not python_bin:
return {}
code = r'''
import importlib.metadata as md
names = ["torch", "torchvision", "torchaudio", "transformers", "diffusers", "accelerate", "llama-cpp-python", "stable-diffusion-cpp-python", "whispercpp", "bitsandbytes", "onnxruntime", "onnxruntime-gpu"]
out = {}
for name in names:
try:
out[name] = md.version(name)
except Exception:
pass
import json
print(json.dumps(out, sort_keys=True))
'''
try:
raw = subprocess.check_output([python_bin, "-c", code], text=True, stderr=subprocess.DEVNULL)
return json.loads(raw)
except Exception:
return {}
def main() -> int:
out = Path(os.environ.get("MANIFEST_OUT", "BUILD-MANIFEST.json"))
root = Path(os.environ.get("PROJECT_ROOT", ".")).resolve()
python_bin = os.environ.get("MANIFEST_PYTHON")
local_bins = [p for p in os.environ.get("MANIFEST_LOCAL_BINS", "").split(os.pathsep) if p]
data = {
"artifact": os.environ.get("MANIFEST_ARTIFACT", "unknown"),
"build_mode": os.environ.get("MANIFEST_BUILD_MODE", "unknown"),
"build_time_utc": datetime.now(timezone.utc).isoformat(),
"git_commit": cmd(["git", "rev-parse", "HEAD"], str(root)),
"git_dirty": bool(cmd(["git", "status", "--porcelain"], str(root))),
"host": {
"system": platform.system(),
"release": platform.release(),
"machine": platform.machine(),
},
"python_version": os.environ.get("PYTHON_VERSION", ""),
"python_build_standalone_release": os.environ.get("PBS_RELEASE", ""),
"uv_version": os.environ.get("UV_VERSION", ""),
"cuda_version": os.environ.get("CUDA_VERSION", ""),
"ubuntu_version": os.environ.get("UBUNTU_VERSION", ""),
"source_venv": os.environ.get("MANIFEST_VENV", ""),
"included_local_binaries": local_bins,
"package_versions": package_versions(python_bin),
}
out.parent.mkdir(parents=True, exist_ok=True)
out.write_text(json.dumps(data, indent=2, sort_keys=True) + "\n")
return 0
if __name__ == "__main__":
raise SystemExit(main())
ARG CUDA_VERSION=12.4.1
ARG UBUNTU_VERSION=22.04
FROM scratch AS build_meta
COPY .packaging-cache/build-manifest.json /build-manifest.json
FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION} AS builder
ARG PYTHON_VERSION=3.13.5
ARG PBS_RELEASE=20250612
ARG UV_VERSION=0.7.13
ARG WHISPERCPP_REF=master
ARG LLAMA_CPP_PYTHON_VERSION=
ARG SD_CPP_PYTHON_VERSION=
ENV DEBIAN_FRONTEND=noninteractive \
PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin \
PIP_NO_CACHE_DIR=1 \
UV_SYSTEM_PYTHON=1 \
FORCE_CMAKE=1 \
CMAKE_BUILD_PARALLEL_LEVEL=4 \
MAX_JOBS=4
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
ca-certificates \
cmake \
curl \
git \
libgomp1 \
libvulkan-dev \
ninja-build \
patchelf \
pkg-config \
zstd \
vulkan-tools \
glslang-tools \
&& rm -rf /var/lib/apt/lists/*
RUN set -eux; \
curl -fsSL -o /tmp/python.tar.gz \
"https://github.com/astral-sh/python-build-standalone/releases/download/${PBS_RELEASE}/cpython-${PYTHON_VERSION}+${PBS_RELEASE}-x86_64-unknown-linux-gnu-install_only.tar.gz"; \
mkdir -p /opt/coderai; \
tar -xzf /tmp/python.tar.gz -C /opt/coderai; \
rm /tmp/python.tar.gz; \
/opt/coderai/python/bin/python3 --version
ENV PYTHONHOME=/opt/coderai/python \
PATH=/opt/coderai/python/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
RUN curl -fsSL "https://github.com/astral-sh/uv/releases/download/${UV_VERSION}/uv-x86_64-unknown-linux-gnu.tar.gz" \
| tar -xz -C /usr/local/bin --strip-components=1 uv-x86_64-unknown-linux-gnu/uv
RUN uv pip install --python /opt/coderai/python/bin/python3 --upgrade pip setuptools wheel scikit-build-core cmake ninja numpy pybind11
RUN mkdir -p /opt/wheels
RUN set -eux; \
pkg="llama-cpp-python${LLAMA_CPP_PYTHON_VERSION:+==${LLAMA_CPP_PYTHON_VERSION}}"; \
CMAKE_ARGS="-DGGML_CUDA=ON -DGGML_VULKAN=ON" \
uv pip wheel --python /opt/coderai/python/bin/python3 --wheel-dir /opt/wheels --no-deps --no-cache "$pkg"
RUN set -eux; \
pkg="stable-diffusion-cpp-python${SD_CPP_PYTHON_VERSION:+==${SD_CPP_PYTHON_VERSION}}"; \
CMAKE_ARGS="-DSD_CUDA=ON -DSD_VULKAN=ON -DSD_WEBM=OFF" \
uv pip wheel --python /opt/coderai/python/bin/python3 --wheel-dir /opt/wheels --no-deps --no-cache "$pkg" || \
CMAKE_ARGS="-DSD_CUDA=ON -DSD_WEBM=OFF" \
uv pip wheel --python /opt/coderai/python/bin/python3 --wheel-dir /opt/wheels --no-deps --no-cache "$pkg"
RUN set -eux; \
git clone --depth 1 --branch "$WHISPERCPP_REF" https://github.com/ggerganov/whisper.cpp /tmp/whisper.cpp || \
git clone --depth 1 https://github.com/ggerganov/whisper.cpp /tmp/whisper.cpp; \
if [ -d /tmp/whisper.cpp/bindings/python ]; then \
CMAKE_ARGS="-DWHISPER_VULKAN=ON -DGGML_VULKAN=ON -DGGML_CUDA=ON" \
uv pip wheel --python /opt/coderai/python/bin/python3 --wheel-dir /opt/wheels --no-deps --no-cache /tmp/whisper.cpp/bindings/python || \
CMAKE_ARGS="-DWHISPER_VULKAN=ON -DGGML_VULKAN=ON" \
uv pip wheel --python /opt/coderai/python/bin/python3 --wheel-dir /opt/wheels --no-deps --no-cache /tmp/whisper.cpp/bindings/python || \
uv pip wheel --python /opt/coderai/python/bin/python3 --wheel-dir /opt/wheels --no-deps --no-cache whispercpp; \
else \
uv pip wheel --python /opt/coderai/python/bin/python3 --wheel-dir /opt/wheels --no-deps --no-cache whispercpp; \
fi
WORKDIR /opt/coderai/app
COPY packaging/common/requirements-oci.txt /tmp/requirements-oci.txt
COPY requirements.txt /tmp/requirements.txt
COPY requirements-nvidia.txt /tmp/requirements-nvidia.txt
COPY requirements-vulkan.txt /tmp/requirements-vulkan.txt
COPY . /opt/coderai/app
RUN uv pip install --python /opt/coderai/python/bin/python3 \
--find-links /opt/wheels \
-r /tmp/requirements-oci.txt \
&& rm -rf /root/.cache/uv /tmp/requirements*.txt
RUN set -eux; \
chmod +x /opt/coderai/app/coderai; \
mkdir -p /config /models /cache /opt/coderai/app/models; \
rm -rf \
/opt/coderai/app/.git \
/opt/coderai/app/venv* \
/opt/coderai/app/.venv \
/opt/coderai/app/township_output \
/opt/coderai/app/offload \
/opt/coderai/app/dist \
/opt/coderai/app/.packaging-cache; \
find /opt/coderai/python -type d \( -name __pycache__ -o -name tests -o -name test \) -prune -exec rm -rf '{}' +; \
find /opt/coderai/app -type d -name __pycache__ -prune -exec rm -rf '{}' +
FROM nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION} AS runtime
ARG PYTHON_VERSION=3.13.5
ENV DEBIAN_FRONTEND=noninteractive \
PYTHONHOME=/opt/coderai/python \
PATH=/opt/coderai/python/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin \
PYTHONUNBUFFERED=1 \
HF_HOME=/cache/huggingface \
HUGGINGFACE_HUB_CACHE=/cache/huggingface/hub \
TRANSFORMERS_CACHE=/cache/huggingface/transformers \
DIFFUSERS_CACHE=/cache/diffusers \
CODERAI_CONFIG_DIR=/config \
CODERAI_MODELS_DIR=/models \
CODERAI_CACHE_DIR=/cache \
CODERAI_HOST=0.0.0.0 \
CODERAI_PORT=8776
RUN apt-get update && apt-get install -y --no-install-recommends \
ca-certificates \
ffmpeg \
git \
libgomp1 \
libgl1 \
libglib2.0-0 \
libsndfile1 \
libvulkan1 \
mesa-vulkan-drivers \
openssl \
&& rm -rf /var/lib/apt/lists/*
COPY --from=builder /opt/coderai /opt/coderai
COPY --from=build_meta /build-manifest.json /opt/coderai/BUILD-MANIFEST.json
COPY packaging/linux/launcher/coderai-oci /usr/local/bin/coderai
RUN set -eux; \
chmod +x /usr/local/bin/coderai; \
mkdir -p /config /models /cache
VOLUME ["/config", "/models", "/cache"]
EXPOSE 8776
ENTRYPOINT ["/usr/local/bin/coderai"]
ARG CUDA_VERSION=12.4.1
ARG UBUNTU_VERSION=22.04
FROM scratch AS build_meta
COPY .packaging-cache/build-manifest.json /build-manifest.json
FROM nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION} AS runtime
ARG PYTHON_VERSION=3.13.5
ARG PBS_RELEASE=20250612
ARG VENV_PYTHON_MINOR=3.13
ENV DEBIAN_FRONTEND=noninteractive \
PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin \
PYTHONUNBUFFERED=1 \
HF_HOME=/cache/huggingface \
HUGGINGFACE_HUB_CACHE=/cache/huggingface/hub \
TRANSFORMERS_CACHE=/cache/huggingface/transformers \
DIFFUSERS_CACHE=/cache/diffusers \
CODERAI_CONFIG_DIR=/config \
CODERAI_MODELS_DIR=/models \
CODERAI_CACHE_DIR=/cache \
CODERAI_HOST=0.0.0.0 \
CODERAI_PORT=8776
RUN apt-get update && apt-get install -y --no-install-recommends \
ca-certificates \
curl \
ffmpeg \
git \
libgomp1 \
libgl1 \
libglib2.0-0 \
libsndfile1 \
libvulkan1 \
mesa-vulkan-drivers \
openssl \
rsync \
&& rm -rf /var/lib/apt/lists/*
RUN set -eux; \
curl -fsSL -o /tmp/python.tar.gz \
"https://github.com/astral-sh/python-build-standalone/releases/download/${PBS_RELEASE}/cpython-${PYTHON_VERSION}+${PBS_RELEASE}-x86_64-unknown-linux-gnu-install_only.tar.gz"; \
mkdir -p /opt/coderai; \
tar -xzf /tmp/python.tar.gz -C /opt/coderai; \
rm /tmp/python.tar.gz; \
/opt/coderai/python/bin/python3 --version
ENV PYTHONHOME=/opt/coderai/python \
PATH=/opt/coderai/python/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
# BuildKit named context supplied by packaging/linux/build_oci_image.sh:
# --build-context local_bundle=/path/to/.packaging-cache/oci-venv-context
# The bundle contains the selected venv plus ldd-discovered native libraries from
# the local machine. GPU drivers are intentionally not bundled; NVIDIA Container
# Toolkit / host Vulkan ICDs remain the runtime contract.
COPY --from=local_bundle / /tmp/local-bundle/
RUN set -eux; \
target_sp="/opt/coderai/python/lib/python${VENV_PYTHON_MINOR}/site-packages"; \
source_sp="/tmp/local-bundle/venv/lib/python${VENV_PYTHON_MINOR}/site-packages"; \
test -d "$source_sp"; \
mkdir -p "$target_sp"; \
rsync -a --delete "$source_sp/" "$target_sp/"; \
if [ -d /tmp/local-bundle/venv/bin ]; then \
find /tmp/local-bundle/venv/bin -maxdepth 1 -type f ! -name 'python*' ! -name 'activate*' -exec cp -a '{}' /opt/coderai/python/bin/ \;; \
fi; \
if [ -d /tmp/local-bundle/local-libs ]; then \
mkdir -p /opt/coderai/local-libs; \
rsync -a /tmp/local-bundle/local-libs/ /opt/coderai/local-libs/; \
fi; \
if [ -d /tmp/local-bundle/local-bin ]; then \
rsync -a /tmp/local-bundle/local-bin/ /usr/local/bin/; \
find /usr/local/bin -maxdepth 1 -type f -exec chmod +x '{}' \;; \
fi; \
rm -rf /tmp/local-bundle; \
find /opt/coderai/python -type d \( -name __pycache__ -o -name tests -o -name test \) -prune -exec rm -rf '{}' +
WORKDIR /opt/coderai/app
COPY . /opt/coderai/app
COPY --from=build_meta /build-manifest.json /opt/coderai/BUILD-MANIFEST.json
COPY packaging/linux/launcher/coderai-oci /usr/local/bin/coderai
RUN set -eux; \
chmod +x /usr/local/bin/coderai /opt/coderai/app/coderai; \
mkdir -p /config /models /cache /opt/coderai/app/models; \
rm -rf \
/opt/coderai/app/.git \
/opt/coderai/app/venv* \
/opt/coderai/app/.venv \
/opt/coderai/app/township_output \
/opt/coderai/app/offload \
/opt/coderai/app/dist \
/opt/coderai/app/.packaging-cache; \
find /opt/coderai/app -type d -name __pycache__ -prune -exec rm -rf '{}' +; \
/opt/coderai/python/bin/python3 - <<'PY'
import importlib.util
missing = [name for name in ("fastapi", "uvicorn", "torch") if importlib.util.find_spec(name) is None]
if missing:
raise SystemExit(f"venv image is missing required package(s): {', '.join(missing)}")
PY
VOLUME ["/config", "/models", "/cache"]
EXPOSE 8776
ENTRYPOINT ["/usr/local/bin/coderai"]
CoderAI Linux Tarball
=====================
Run:
./bin/coderai
Then open:
http://127.0.0.1:8776/admin
Default first-run credentials are created by the app:
admin / admin
State directories inside this bundle:
config/ app config and auth
models/ model storage / data path
cache/ Hugging Face, diffusers, and runtime caches
NVIDIA CUDA:
Install a compatible NVIDIA driver on the host. The bundle includes Python/CUDA
runtime packages from the source venv, but not the host GPU driver.
AMD/Intel Vulkan:
Install Vulkan runtime/ICD packages on the host, for example on Debian/Ubuntu:
sudo apt install libvulkan1 mesa-vulkan-drivers
CPU:
No GPU setup is required.
This diff is collapsed.
#!/usr/bin/env sh
set -eu
# Runtime launcher for the CoderAI OCI image.
# Keeps config/model/cache state in mounted volumes and exposes the server outside
# the container by default.
: "${CODERAI_CONFIG_DIR:=/config}"
: "${CODERAI_MODELS_DIR:=/models}"
: "${CODERAI_CACHE_DIR:=/cache}"
: "${CODERAI_HOST:=0.0.0.0}"
: "${CODERAI_PORT:=8776}"
export PYTHONHOME=/opt/coderai/python
export PATH="/opt/coderai/python/bin:$PATH"
export XDG_CONFIG_HOME="$CODERAI_CONFIG_DIR"
export XDG_DATA_HOME="$CODERAI_MODELS_DIR"
export XDG_CACHE_HOME="$CODERAI_CACHE_DIR"
export HF_HOME="${HF_HOME:-$CODERAI_CACHE_DIR/huggingface}"
export HUGGINGFACE_HUB_CACHE="${HUGGINGFACE_HUB_CACHE:-$HF_HOME/hub}"
NV="/opt/coderai/python/lib/python3.13/site-packages/nvidia"
LIBS="/opt/coderai/python/lib:/opt/coderai/local-libs"
if [ -d "$NV" ]; then
for d in "$NV"/*/lib; do
[ -d "$d" ] && LIBS="$LIBS:$d"
done
fi
export LD_LIBRARY_PATH="$LIBS${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}"
mkdir -p "$CODERAI_CONFIG_DIR/coderai" "$CODERAI_MODELS_DIR/coderai" "$CODERAI_CACHE_DIR/coderai"
CONFIG_DIR="$CODERAI_CONFIG_DIR/coderai"
CONFIG_FILE="$CONFIG_DIR/config.json"
# Ensure the container binds to all interfaces even when config.json was created
# by the app default, which uses 127.0.0.1 for local desktop installs.
if [ ! -f "$CONFIG_FILE" ]; then
/opt/coderai/python/bin/python3 - "$CONFIG_FILE" "$CODERAI_HOST" "$CODERAI_PORT" <<'PY'
import json
import sys
from pathlib import Path
path = Path(sys.argv[1])
host = sys.argv[2]
port = int(sys.argv[3])
path.write_text(json.dumps({"server": {"host": host, "port": port}}, indent=2) + "\n")
PY
else
/opt/coderai/python/bin/python3 - "$CONFIG_FILE" "$CODERAI_HOST" "$CODERAI_PORT" <<'PY'
import json
import sys
from pathlib import Path
path = Path(sys.argv[1])
host = sys.argv[2]
port = int(sys.argv[3])
try:
data = json.loads(path.read_text())
except Exception:
data = {}
server = data.setdefault("server", {})
changed = False
if server.get("host") in (None, "", "127.0.0.1", "localhost") or host != "0.0.0.0":
if server.get("host") != host:
server["host"] = host
changed = True
if server.get("port") != port:
server["port"] = port
changed = True
if changed:
path.write_text(json.dumps(data, indent=2) + "\n")
PY
fi
exec /opt/coderai/python/bin/python3 /opt/coderai/app/coderai --config "$CONFIG_DIR" "$@"
#!/usr/bin/env sh
set -eu
HERE="$(CDPATH= cd -- "$(dirname -- "$0")/.." && pwd)"
: "${CODERAI_CONFIG_DIR:=$HERE/config}"
: "${CODERAI_MODELS_DIR:=$HERE/models}"
: "${CODERAI_CACHE_DIR:=$HERE/cache}"
: "${CODERAI_HOST:=127.0.0.1}"
: "${CODERAI_PORT:=8776}"
export PYTHONHOME="$HERE/python"
export PATH="$HERE/python/bin:$HERE/bin:$PATH"
export XDG_CONFIG_HOME="$CODERAI_CONFIG_DIR"
export XDG_DATA_HOME="$CODERAI_MODELS_DIR"
export XDG_CACHE_HOME="$CODERAI_CACHE_DIR"
export HF_HOME="${HF_HOME:-$CODERAI_CACHE_DIR/huggingface}"
export HUGGINGFACE_HUB_CACHE="${HUGGINGFACE_HUB_CACHE:-$HF_HOME/hub}"
NV="$HERE/python/lib/python3.13/site-packages/nvidia"
LIBS="$HERE/python/lib:$HERE/local-libs"
if [ -d "$NV" ]; then
for d in "$NV"/*/lib; do
[ -d "$d" ] && LIBS="$LIBS:$d"
done
fi
export LD_LIBRARY_PATH="$LIBS${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}"
mkdir -p "$CODERAI_CONFIG_DIR/coderai" "$CODERAI_MODELS_DIR/coderai" "$CODERAI_CACHE_DIR/coderai"
exec "$HERE/python/bin/python3" "$HERE/app/coderai" --config "$CODERAI_CONFIG_DIR/coderai" "$@"
#!/usr/bin/env bash
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
VERSIONS_FILE="$ROOT_DIR/packaging/versions.env"
if [[ -f "$VERSIONS_FILE" ]]; then
# shellcheck disable=SC1090
source "$VERSIONS_FILE"
fi
VENV_PATH="${VIRTUAL_ENV:-}"
OUT_DIR="$ROOT_DIR/dist"
OUT_NAME=""
INCLUDE_LOCAL_LIBS=1
AUTO_LOCAL_BINS=1
LOCAL_BINARIES=()
LOCAL_BINARY_DIRS=()
PYTHON_VERSION="${PYTHON_VERSION:-3.13.5}"
PBS_RELEASE="${PBS_RELEASE:-20250612}"
UV_VERSION="${UV_VERSION:-0.7.13}"
CUDA_VERSION="${CUDA_VERSION:-12.4.1}"
UBUNTU_VERSION="${UBUNTU_VERSION:-22.04}"
usage() {
cat <<'EOF'
Usage:
packaging/linux/make_tarball_from_venv.sh --venv PATH
source venv_all/bin/activate && packaging/linux/make_tarball_from_venv.sh
Options:
--venv PATH Source virtualenv to package. Defaults to activated $VIRTUAL_ENV.
-o, --output PATH Output .tar.zst path. Defaults to dist/coderai-linux-x64-venv.tar.zst.
--no-local-libs Do not copy ldd-discovered native libraries from the venv.
--no-auto-local-bins Do not auto-include known locally compiled helper binaries.
--include-local-bin PATH
Copy an extra tested local binary into bin/, including its ldd libs.
--include-local-dir PATH
Copy executable files from a local build directory into bin/.
-h, --help Show this help.
EOF
}
while [[ $# -gt 0 ]]; do
case "$1" in
--venv)
[[ $# -ge 2 ]] || { echo "Error: --venv requires a path" >&2; exit 2; }
VENV_PATH="$2"; shift 2 ;;
-o|--output)
[[ $# -ge 2 ]] || { echo "Error: $1 requires a path" >&2; exit 2; }
OUT_NAME="$2"; shift 2 ;;
--no-local-libs) INCLUDE_LOCAL_LIBS=0; shift ;;
--no-auto-local-bins) AUTO_LOCAL_BINS=0; shift ;;
--include-local-bin)
[[ $# -ge 2 ]] || { echo "Error: --include-local-bin requires a path" >&2; exit 2; }
LOCAL_BINARIES+=("$2"); shift 2 ;;
--include-local-dir)
[[ $# -ge 2 ]] || { echo "Error: --include-local-dir requires a path" >&2; exit 2; }
LOCAL_BINARY_DIRS+=("$2"); shift 2 ;;
-h|--help) usage; exit 0 ;;
-*) echo "Error: unknown option: $1" >&2; usage >&2; exit 2 ;;
*) echo "Error: unexpected argument: $1" >&2; usage >&2; exit 2 ;;
esac
done
if [[ -z "$VENV_PATH" ]]; then
echo "Error: pass --venv PATH or activate a virtualenv first" >&2
exit 2
fi
VENV_PATH="$(cd "$VENV_PATH" && pwd)"
[[ -x "$VENV_PATH/bin/python" ]] || { echo "Error: missing venv python: $VENV_PATH/bin/python" >&2; exit 2; }
VENV_PYTHON_MINOR="$($VENV_PATH/bin/python - <<'PY'
import sys
print(f"{sys.version_info.major}.{sys.version_info.minor}")
PY
)"
PBS_PYTHON_MINOR="${PYTHON_VERSION%.*}"
if [[ "$VENV_PYTHON_MINOR" != "$PBS_PYTHON_MINOR" ]]; then
echo "Error: venv Python minor ($VENV_PYTHON_MINOR) does not match standalone Python minor ($PBS_PYTHON_MINOR)" >&2
exit 2
fi
mkdir -p "$OUT_DIR" "$ROOT_DIR/.packaging-cache"
[[ -n "$OUT_NAME" ]] || OUT_NAME="$OUT_DIR/coderai-linux-x64-venv.tar.zst"
STAGE="$ROOT_DIR/.packaging-cache/tarball/coderai"
rm -rf "$ROOT_DIR/.packaging-cache/tarball"
mkdir -p "$STAGE/python" "$STAGE/app" "$STAGE/bin" "$STAGE/local-libs" "$STAGE/config" "$STAGE/models" "$STAGE/cache"
curl -fsSL -o "$ROOT_DIR/.packaging-cache/python.tar.gz" \
"https://github.com/astral-sh/python-build-standalone/releases/download/${PBS_RELEASE}/cpython-${PYTHON_VERSION}+${PBS_RELEASE}-x86_64-unknown-linux-gnu-install_only.tar.gz"
tar -xzf "$ROOT_DIR/.packaging-cache/python.tar.gz" -C "$STAGE"
source_sp="$VENV_PATH/lib/python${VENV_PYTHON_MINOR}/site-packages"
target_sp="$STAGE/python/lib/python${VENV_PYTHON_MINOR}/site-packages"
[[ -d "$source_sp" ]] || { echo "Error: venv site-packages not found: $source_sp" >&2; exit 2; }
rsync -a --delete "$source_sp/" "$target_sp/"
if [[ -d "$VENV_PATH/bin" ]]; then
find "$VENV_PATH/bin" -maxdepth 1 -type f ! -name 'python*' ! -name 'activate*' -exec cp -a '{}' "$STAGE/python/bin/" \;
fi
add_local_binary() {
local path="$1"
local abs_path
[[ -x "$path" && -f "$path" ]] || return 0
abs_path="$(cd "$(dirname "$path")" && pwd)/$(basename "$path")"
for existing in "${LOCAL_BINARIES[@]}"; do
[[ "$existing" == "$abs_path" ]] && return 0
done
LOCAL_BINARIES+=("$abs_path")
}
if [[ "$AUTO_LOCAL_BINS" == "1" ]]; then
for p in "/usr/local/bin/whisper-server" "/usr/local/bin/whisper-cli" "$HOME/whisper.cpp/build/bin/whisper-server" "$HOME/whisper.cpp/build/bin/whisper-cli" "$HOME/whisper.cpp/build/bin/main" "$HOME/whisper.cpp/build/bin/server"; do
add_local_binary "$p"
done
fi
for d in "${LOCAL_BINARY_DIRS[@]}"; do
[[ -d "$d" ]] || { echo "Error: local binary directory does not exist: $d" >&2; exit 2; }
while IFS= read -r -d '' found_bin; do add_local_binary "$found_bin"; done < <(find "$d" -maxdepth 2 -type f -perm -111 -print0)
done
for b in "${LOCAL_BINARIES[@]}"; do
[[ -x "$b" ]] || { echo "Error: local binary is not executable: $b" >&2; exit 2; }
cp -a "$b" "$STAGE/bin/"
done
if [[ "$INCLUDE_LOCAL_LIBS" == "1" ]]; then
VENV_PATH_FOR_LDD="$VENV_PATH" LOCAL_BIN_DIR="$STAGE/bin" LOCAL_LIB_DIR="$STAGE/local-libs" python3 - <<'PY'
import os, shutil, subprocess
from pathlib import Path
venv=Path(os.environ['VENV_PATH_FOR_LDD'])
local_bin=Path(os.environ['LOCAL_BIN_DIR'])
local_lib=Path(os.environ['LOCAL_LIB_DIR'])
skip_names={'linux-vdso.so.1','libc.so.6','libdl.so.2','libm.so.6','libpthread.so.0','librt.so.1','libutil.so.1','libresolv.so.2','libselinux.so.1','libpcre2-8.so.0','libacl.so.1','libattr.so.1','libz.so.1','libzstd.so.1','liblzma.so.5','libbz2.so.1.0','libssl.so.3','libcrypto.so.3','libgcc_s.so.1','libstdc++.so.6'}
skip_starts=('libcuda.so','libnvidia-')
candidates=[]
for root in (venv/'lib', venv/'bin', local_bin):
if root.exists():
for p in root.rglob('*'):
if p.is_file() and (os.access(p, os.X_OK) or '.so' in p.name):
candidates.append(p)
libs=set()
for p in candidates:
proc=subprocess.run(['ldd', str(p)], text=True, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, check=False)
for line in proc.stdout.splitlines():
line=line.strip(); dep=None
if '=>' in line:
rhs=line.split('=>',1)[1].strip()
if rhs.startswith('/'): dep=rhs.split(' (',1)[0]
elif line.startswith('/'):
dep=line.split(' (',1)[0]
if not dep: continue
dp=Path(dep); name=dp.name
if name in skip_names or any(name.startswith(s) for s in skip_starts): continue
if str(dp).startswith(('/lib/','/lib64/','/usr/lib/','/usr/lib64/')) and 'site-packages' not in str(dp): continue
if str(dp).startswith(('/lib/ld-linux','/lib64/ld-linux')): continue
if dp.exists(): libs.add(dp.resolve())
for src in sorted(libs):
dest=local_lib/src.name
if not dest.exists(): shutil.copy2(src,dest)
print(f"Copied {len(libs)} ldd-discovered native libraries")
PY
fi
rsync -a --delete \
--exclude '.git' --exclude 'venv*' --exclude '.venv' --exclude '__pycache__' \
--exclude 'models' --exclude 'offload' --exclude 'township_output' --exclude 'dist' --exclude '.packaging-cache' \
"$ROOT_DIR/" "$STAGE/app/"
cp "$ROOT_DIR/packaging/linux/launcher/coderai-tarball" "$STAGE/bin/coderai"
cp "$ROOT_DIR/packaging/linux/README-RUN.txt" "$STAGE/README-RUN.txt"
chmod +x "$STAGE/bin/coderai"
MANIFEST_OUT="$STAGE/BUILD-MANIFEST.json" PROJECT_ROOT="$ROOT_DIR" MANIFEST_ARTIFACT="linux-tarball" MANIFEST_BUILD_MODE="venv" MANIFEST_PYTHON="$VENV_PATH/bin/python" MANIFEST_VENV="$VENV_PATH" MANIFEST_LOCAL_BINS="$(IFS=:; echo "${LOCAL_BINARIES[*]:-}")" PYTHON_VERSION="$PYTHON_VERSION" PBS_RELEASE="$PBS_RELEASE" UV_VERSION="$UV_VERSION" CUDA_VERSION="$CUDA_VERSION" UBUNTU_VERSION="$UBUNTU_VERSION" python3 "$ROOT_DIR/packaging/common/write_manifest.py"
find "$STAGE" -type d -name __pycache__ -prune -exec rm -rf '{}' +
tar --zstd -cf "$OUT_NAME" -C "$ROOT_DIR/.packaging-cache/tarball" coderai
sha256sum "$OUT_NAME" > "$OUT_NAME.sha256"
cat <<EOF
Created Linux tarball:
archive: $OUT_NAME
checksum: $OUT_NAME.sha256
Extract and run:
tar --zstd -xf "$OUT_NAME"
./coderai/bin/coderai
EOF
#!/usr/bin/env bash
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
VERSIONS_FILE="$ROOT_DIR/packaging/versions.env"
if [[ -f "$VERSIONS_FILE" ]]; then
# shellcheck disable=SC1090
source "$VERSIONS_FILE"
fi
DOCKER_BIN="${DOCKER:-docker}"
read -r -a DOCKER_CMD <<< "$DOCKER_BIN"
IMAGE_TAG="${OCI_IMAGE:-coderai:local}"
OUT_DIR="$ROOT_DIR/dist"
OUT_NAME=""
COMPRESS=1
usage() {
cat <<'EOF'
Usage:
packaging/linux/package_oci_image.sh [IMAGE_TAG]
packaging/linux/package_oci_image.sh -t IMAGE_TAG -o dist/name.tar.zst
Options:
-t, --tag TAG Image tag to export (default: coderai:local or OCI_IMAGE).
-o, --output PATH Output archive path. Defaults to dist/coderai-oci-<tag>.tar.zst.
--no-compress Write an uncompressed docker-save .tar.
-h, --help Show this help.
EOF
}
while [[ $# -gt 0 ]]; do
case "$1" in
-t|--tag)
[[ $# -ge 2 ]] || { echo "Error: $1 requires a tag" >&2; exit 2; }
IMAGE_TAG="$2"
shift 2
;;
-o|--output)
[[ $# -ge 2 ]] || { echo "Error: $1 requires a path" >&2; exit 2; }
OUT_NAME="$2"
shift 2
;;
--no-compress)
COMPRESS=0
shift
;;
-h|--help)
usage
exit 0
;;
-*)
echo "Error: unknown option: $1" >&2
usage >&2
exit 2
;;
*)
IMAGE_TAG="$1"
shift
;;
esac
done
if ! "${DOCKER_CMD[@]}" image inspect "$IMAGE_TAG" >/dev/null 2>&1; then
echo "Error: image not found: $IMAGE_TAG" >&2
echo "Build it first with ./build-oci.sh" >&2
exit 1
fi
safe_tag="${IMAGE_TAG//[^A-Za-z0-9_.-]/-}"
mkdir -p "$OUT_DIR"
if [[ -z "$OUT_NAME" ]]; then
if [[ "$COMPRESS" == "1" ]]; then
OUT_NAME="$OUT_DIR/coderai-oci-${safe_tag}.tar.zst"
else
OUT_NAME="$OUT_DIR/coderai-oci-${safe_tag}.tar"
fi
fi
if [[ "$COMPRESS" == "1" ]]; then
if ! command -v zstd >/dev/null 2>&1; then
echo "Error: zstd is required for compressed export. Use --no-compress or install zstd." >&2
exit 1
fi
"${DOCKER_CMD[@]}" save "$IMAGE_TAG" | zstd -T0 -19 -o "$OUT_NAME"
else
"${DOCKER_CMD[@]}" save -o "$OUT_NAME" "$IMAGE_TAG"
fi
sha256sum "$OUT_NAME" > "$OUT_NAME.sha256"
cat <<EOF
Exported OCI image artifact:
archive: $OUT_NAME
checksum: $OUT_NAME.sha256
Load it with:
$DOCKER_BIN load -i "$OUT_NAME"
EOF
#!/usr/bin/env bash
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
VERSIONS_FILE="$ROOT_DIR/packaging/versions.env"
if [[ -f "$VERSIONS_FILE" ]]; then
# shellcheck disable=SC1090
source "$VERSIONS_FILE"
fi
ENGINE="${CONTAINER_ENGINE:-docker}"
IMAGE_TAG="${OCI_IMAGE:-coderai:local}"
MODE="cpu"
PORT="${CODERAI_PORT:-8776}"
DATA_ROOT="$PWD/coderai-runtime"
DETACH=0
NAME="coderai"
EXTRA_ARGS=()
usage() {
cat <<'EOF'
Usage:
packaging/linux/run_oci.sh [--cpu|--nvidia|--vulkan] [IMAGE_TAG]
Options:
--docker Use docker (default).
--podman Use podman.
--cpu CPU-only run mode (default).
--nvidia NVIDIA CUDA mode; adds --gpus all for Docker.
--vulkan Vulkan mode; adds --device /dev/dri.
-p, --port PORT Host port to expose (default: 8776).
--data-dir PATH Directory for config/models/cache (default: ./coderai-runtime).
--name NAME Container name (default: coderai).
-d, --detach Run in background.
-- ARGS Extra args passed to the container engine before the image name.
-h, --help Show this help.
EOF
}
while [[ $# -gt 0 ]]; do
case "$1" in
--docker) ENGINE=docker; shift ;;
--podman) ENGINE=podman; shift ;;
--cpu) MODE=cpu; shift ;;
--nvidia|--cuda) MODE=nvidia; shift ;;
--vulkan) MODE=vulkan; shift ;;
-p|--port)
[[ $# -ge 2 ]] || { echo "Error: $1 requires a port" >&2; exit 2; }
PORT="$2"; shift 2 ;;
--data-dir)
[[ $# -ge 2 ]] || { echo "Error: --data-dir requires a path" >&2; exit 2; }
DATA_ROOT="$2"; shift 2 ;;
--name)
[[ $# -ge 2 ]] || { echo "Error: --name requires a value" >&2; exit 2; }
NAME="$2"; shift 2 ;;
-d|--detach) DETACH=1; shift ;;
--)
shift
EXTRA_ARGS+=("$@")
break ;;
-h|--help) usage; exit 0 ;;
-*) echo "Error: unknown option: $1" >&2; usage >&2; exit 2 ;;
*) IMAGE_TAG="$1"; shift ;;
esac
done
mkdir -p "$DATA_ROOT/config" "$DATA_ROOT/models" "$DATA_ROOT/cache"
DATA_ROOT="$(cd "$DATA_ROOT" && pwd)"
args=(run --rm --name "$NAME" --ipc=host -p "$PORT:8776" -e CODERAI_HOST=0.0.0.0 -e CODERAI_PORT=8776)
if [[ "$DETACH" == "1" ]]; then
args+=(-d)
fi
case "$MODE" in
nvidia)
if [[ "$ENGINE" == "docker" ]]; then
args+=(--gpus all)
else
args+=(--hooks-dir=/usr/share/containers/oci/hooks.d)
fi
;;
vulkan)
args+=(--device /dev/dri)
;;
cpu) ;;
esac
volume_suffix=""
if [[ "$ENGINE" == "podman" ]]; then
volume_suffix=":Z"
fi
args+=(-v "$DATA_ROOT/config:/config$volume_suffix" -v "$DATA_ROOT/models:/models$volume_suffix" -v "$DATA_ROOT/cache:/cache$volume_suffix")
args+=("${EXTRA_ARGS[@]}" "$IMAGE_TAG")
cat <<EOF
Starting CoderAI OCI container
engine: $ENGINE
image: $IMAGE_TAG
mode: $MODE
url: http://127.0.0.1:$PORT/admin
data: $DATA_ROOT
EOF
exec "$ENGINE" "${args[@]}"
#!/usr/bin/env bash
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
VERSIONS_FILE="$ROOT_DIR/packaging/versions.env"
if [[ -f "$VERSIONS_FILE" ]]; then
# shellcheck disable=SC1090
source "$VERSIONS_FILE"
fi
DOCKER_BIN="${DOCKER:-docker}"
read -r -a DOCKER_CMD <<< "$DOCKER_BIN"
IMAGE_TAG="${OCI_IMAGE:-coderai:local}"
MODE="cpu"
PORT="${CODERAI_PORT:-18776}"
KEEP=0
TIMEOUT=45
CONTAINER_NAME="coderai-smoke-$$"
usage() {
cat <<'EOF'
Usage:
packaging/linux/smoke_test_oci.sh [IMAGE_TAG]
Options:
-t, --tag TAG Image tag to test (default: coderai:local or OCI_IMAGE).
--mode MODE cpu, nvidia, or vulkan (default: cpu).
--port PORT Host port for boot test (default: 18776).
--timeout SECONDS Server boot timeout (default: 45).
--keep Keep the container after failure for inspection.
-h, --help Show this help.
EOF
}
while [[ $# -gt 0 ]]; do
case "$1" in
-t|--tag)
[[ $# -ge 2 ]] || { echo "Error: $1 requires a tag" >&2; exit 2; }
IMAGE_TAG="$2"
shift 2
;;
--mode)
[[ $# -ge 2 ]] || { echo "Error: --mode requires cpu, nvidia, or vulkan" >&2; exit 2; }
MODE="$2"
shift 2
;;
--port)
[[ $# -ge 2 ]] || { echo "Error: --port requires a value" >&2; exit 2; }
PORT="$2"
shift 2
;;
--timeout)
[[ $# -ge 2 ]] || { echo "Error: --timeout requires seconds" >&2; exit 2; }
TIMEOUT="$2"
shift 2
;;
--keep)
KEEP=1
shift
;;
-h|--help)
usage
exit 0
;;
-*)
echo "Error: unknown option: $1" >&2
usage >&2
exit 2
;;
*)
IMAGE_TAG="$1"
shift
;;
esac
done
case "$MODE" in
cpu|nvidia|vulkan) ;;
*) echo "Error: --mode must be cpu, nvidia, or vulkan" >&2; exit 2 ;;
esac
cleanup() {
if [[ "$KEEP" != "1" ]]; then
"${DOCKER_CMD[@]}" rm -f "$CONTAINER_NAME" >/dev/null 2>&1 || true
fi
}
trap cleanup EXIT
if ! "${DOCKER_CMD[@]}" image inspect "$IMAGE_TAG" >/dev/null 2>&1; then
echo "Error: image not found: $IMAGE_TAG" >&2
exit 1
fi
IMPORT_CHECK='import importlib.util, json
mods=["fastapi","uvicorn","torch","transformers","diffusers","accelerate","llama_cpp","PIL"]
optional=["stable_diffusion_cpp","whispercpp","bitsandbytes","onnxruntime"]
out={"required":{},"optional":{}}
missing=[]
for m in mods:
ok=importlib.util.find_spec(m) is not None
out["required"][m]=ok
if not ok: missing.append(m)
for m in optional:
out["optional"][m]=importlib.util.find_spec(m) is not None
try:
import torch
out["torch_cuda_available"]=bool(torch.cuda.is_available())
out["torch_cuda_device_count"]=int(torch.cuda.device_count())
except Exception as e:
out["torch_cuda_error"]=str(e)
print(json.dumps(out, sort_keys=True))
if missing:
raise SystemExit("missing required imports: "+", ".join(missing))'
run_args=(--rm)
case "$MODE" in
nvidia) run_args+=(--gpus all) ;;
vulkan) run_args+=(--device /dev/dri) ;;
esac
echo "Checking imports in $IMAGE_TAG..."
"${DOCKER_CMD[@]}" run "${run_args[@]}" --entrypoint /opt/coderai/python/bin/python3 "$IMAGE_TAG" -c "$IMPORT_CHECK"
tmp_dir="$ROOT_DIR/.packaging-cache/smoke-$MODE-$$"
rm -rf "$tmp_dir" 2>/dev/null || true
mkdir -p "$tmp_dir/config" "$tmp_dir/models" "$tmp_dir/cache"
container_args=(-d --name "$CONTAINER_NAME" -p "$PORT:8776" -e CODERAI_HOST=0.0.0.0 -e CODERAI_PORT=8776 -v "$tmp_dir/config:/config" -v "$tmp_dir/models:/models" -v "$tmp_dir/cache:/cache")
case "$MODE" in
nvidia) container_args+=(--gpus all --ipc=host) ;;
vulkan) container_args+=(--device /dev/dri --ipc=host) ;;
cpu) container_args+=(--ipc=host) ;;
esac
echo "Starting boot test container on http://127.0.0.1:$PORT ..."
"${DOCKER_CMD[@]}" run "${container_args[@]}" "$IMAGE_TAG" >/dev/null
start=$SECONDS
until status=$(curl -sS -o /dev/null -w '%{http_code}' "http://127.0.0.1:$PORT/admin" 2>/dev/null) && [[ "$status" =~ ^(200|301|302|401|403)$ ]]; do
if (( SECONDS - start > TIMEOUT )); then
echo "Server did not respond within ${TIMEOUT}s" >&2
"${DOCKER_CMD[@]}" logs "$CONTAINER_NAME" >&2 || true
exit 1
fi
sleep 1
done
models_status=$(curl -sS -o /dev/null -w '%{http_code}' "http://127.0.0.1:$PORT/v1/models" 2>/dev/null || true)
if [[ ! "$models_status" =~ ^(200|401|403)$ ]]; then
echo "Unexpected /v1/models status: $models_status" >&2
"$DOCKER_BIN" logs "$CONTAINER_NAME" >&2 || true
exit 1
fi
echo "Smoke test passed for $IMAGE_TAG ($MODE)."
# CoderAI local distribution pins
# This file is sourced by packaging/linux/build_oci_image.sh and used as Docker build args.
# python-build-standalone release 20250612 ships CPython 3.13.5. The app only
# requires the 3.13 minor line, while native wheels are built against the target
# interpreter during image assembly.
PYTHON_VERSION=3.13.5
PBS_RELEASE=20250612
UV_VERSION=0.7.13
CUDA_VERSION=12.6.3
UBUNTU_VERSION=24.04
VULKAN_PACKAGES="libvulkan-dev vulkan-tools glslang-tools"
# Native module refs. Empty means use the resolver/package default from PyPI.
LLAMA_CPP_PYTHON_VERSION=
SD_CPP_PYTHON_VERSION=
WHISPERCPP_REF=master
# Local image defaults.
OCI_IMAGE=coderai:local
OCI_BUILD_CONTEXT=.
#!/usr/bin/env bash
set -euo pipefail
exec "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/packaging/linux/run_oci.sh" "$@"
#!/usr/bin/env bash
set -euo pipefail
exec "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/packaging/linux/smoke_test_oci.sh" "$@"
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment