tasks: report live tokens/s for text generation

Add a `rate` field to the Task registry and publish step (tokens so far) +
tokens/s from the text streaming loop every few tokens; the Tasks page shows
"N tok · X.X tok/s" while a generation is running. Flows through the engine→
front task aggregation unchanged (asdict serialization).
Co-Authored-By: 's avatarClaude Opus 4.8 <noreply@anthropic.com>
parent b297b25f
......@@ -96,12 +96,19 @@ const STATUS_BADGE = {
function progressBar(t) {
const total = t.total || 0, step = t.step || 0;
// Live throughput for text generation (tokens/s), shown while running.
const rate = (t.rate && t.status === 'running')
? ` <span class="dim small">· ${t.rate} tok/s</span>` : '';
if (!total) {
return t.status === 'running' ? '<span class="dim small">working…</span>' : '<span class="dim small">—</span>';
if (t.status === 'running') {
const tok = step ? `${step} tok` : 'working…';
return `<span class="dim small">${tok}</span>${rate}`;
}
return '<span class="dim small">—</span>';
}
const pct = Math.max(0, Math.min(100, Math.round(step / total * 100)));
return `<div class="progress"><div class="progress-fill" style="width:${pct}%"></div></div>
<span class="dim small">${step}/${total} (${pct}%)</span>`;
<span class="dim small">${step}/${total} (${pct}%)</span>${rate}`;
}
function actions(t) {
......
......@@ -1517,6 +1517,7 @@ async def stream_chat_response(
try:
chunk_count = 0
_gen_t0 = None # wall-clock of the first generated token (for it/s)
# Buffer for withholding in-progress tool tags from the content stream.
content_buffer = ""
# Exact content deltas actually streamed to the client (post-format,
......@@ -1560,6 +1561,17 @@ async def stream_chat_response(
if task_registry.is_cancelled(_tid):
break
chunk_count += 1
# Publish live throughput (tokens/s) onto the task for the Tasks page.
# The streamer yields ~one token per chunk; refresh every few tokens to
# keep the registry lock cold.
if _gen_t0 is None:
_gen_t0 = time.time()
elif chunk_count % 8 == 0:
_elapsed = time.time() - _gen_t0
if _elapsed > 0:
task_registry.update(
_tid, step=chunk_count,
rate=round(chunk_count / _elapsed, 1))
# Always filter malformed content (regex-based, works per-chunk)
filtered_chunk = filter_malformed_content(chunk)
......
......@@ -54,6 +54,7 @@ class Task:
status: str = "queued" # queued | running | done | error | cancelled
step: int = 0
total: int = 0
rate: float = 0.0 # throughput (tokens/s) for text generation
message: str = ""
job_id: Optional[str] = None # link to a durable loras training job, if any
created_at: float = field(default_factory=time.time)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment