tasks: report live tokens/s for text generation

Add a `rate` field to the Task registry and publish step (tokens so far) + tokens/s from the text streaming loop every few tokens; the Tasks page shows "N tok · X.X tok/s" while a generation is running. Flows through the engine→ front task aggregation unchanged (asdict serialization). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>

tasks: report live tokens/s for text generation
Add a `rate` field to the Task registry and publish step (tokens so far) + tokens/s from the text streaming loop every few tokens; the Tasks page shows "N tok · X.X tok/s" while a generation is running. Flows through the engine→ front task aggregation unchanged (asdict serialization). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
bc9a8352 · Stefy Lanza (nextime / spora ) · b297b25f · bc9a8352 · bc9a8352 · bc9a8352
Commit bc9a8352 authored Jun 18, 2026 by Stefy Lanza (nextime / spora )
Hide whitespace changes
Inline Side-by-side

Showing with 22 additions and 2 deletions

tasks.html codai/admin/templates/tasks.html +9 -2

text.py codai/api/text.py +12 -0

registry.py codai/tasks/registry.py +1 -0

No files found.
--- a/codai/admin/templates/tasks.html
+++ b/codai/admin/templates/tasks.html
@@ -96,12 +96,19 @@ const STATUS_BADGE = {

 function progressBar(t) {
  const total = t.total || 0, step = t.step || 0;
+  // Live throughput for text generation (tokens/s), shown while running.
+  const rate = (t.rate && t.status === 'running')
+    ? ` <span class="dim small">· ${t.rate} tok/s</span>` : '';
  if (!total) {
-    return t.status === 'running' ? '<span class="dim small">working…</span>' : '<span class="dim small">—</span>';
+    if (t.status === 'running') {
+      const tok = step ? `${step} tok` : 'working…';
+      return `<span class="dim small">${tok}</span>${rate}`;
+    }
+    return '<span class="dim small">—</span>';
  }
  const pct = Math.max(0, Math.min(100, Math.round(step / total * 100)));
  return `<div class="progress"><div class="progress-fill" style="width:${pct}%"></div></div>
-          <span class="dim small">${step}/${total} (${pct}%)</span>`;
+          <span class="dim small">${step}/${total} (${pct}%)</span>${rate}`;
 }

 function actions(t) {

--- a/codai/api/text.py
+++ b/codai/api/text.py
@@ -1517,6 +1517,7 @@ async def stream_chat_response(
    
    try:
        chunk_count = 0
+        _gen_t0 = None          # wall-clock of the first generated token (for it/s)
        # Buffer for withholding in-progress tool tags from the content stream.
        content_buffer = ""
        # Exact content deltas actually streamed to the client (post-format,
@@ -1560,6 +1561,17 @@ async def stream_chat_response(
            if task_registry.is_cancelled(_tid):
                break
            chunk_count += 1
+            # Publish live throughput (tokens/s) onto the task for the Tasks page.
+            # The streamer yields ~one token per chunk; refresh every few tokens to
+            # keep the registry lock cold.
+            if _gen_t0 is None:
+                _gen_t0 = time.time()
+            elif chunk_count % 8 == 0:
+                _elapsed = time.time() - _gen_t0
+                if _elapsed > 0:
+                    task_registry.update(
+                        _tid, step=chunk_count,
+                        rate=round(chunk_count / _elapsed, 1))
            # Always filter malformed content (regex-based, works per-chunk)
            filtered_chunk = filter_malformed_content(chunk)
            

--- a/codai/tasks/registry.py
+++ b/codai/tasks/registry.py
@@ -54,6 +54,7 @@ class Task:
    status: str = "queued"         # queued | running | done | error | cancelled
    step: int = 0
    total: int = 0
+    rate: float = 0.0              # throughput (tokens/s) for text generation
    message: str = ""
    job_id: Optional[str] = None   # link to a durable loras training job, if any
    created_at: float = field(default_factory=time.time)