front: timestamped logs + AMD GPU marketing-name detection

- Prefix front/uvicorn and re-emitted engine log lines with [HH:MM:SS] so the
  front log format matches the engine ([HH:MM:SS][nvidia] …); preserve tqdm
  in-place progress and avoid double-timestamping already-tagged lines.
- gpu_detect: _amd_gpu_name() resolves a card's marketing name via amdgpu
  product_name sysfs, then lspci board/chip name, then vulkaninfo.
Co-Authored-By: 's avatarClaude Opus 4.8 <noreply@anthropic.com>
parent 990f9471
...@@ -15,6 +15,7 @@ UI stays live even while an engine is busy loading a model. ...@@ -15,6 +15,7 @@ UI stays live even while an engine is busy loading a model.
""" """
import json import json
import sys
import time import time
from typing import Optional from typing import Optional
...@@ -518,8 +519,11 @@ def _front_log_config(debug_web: bool): ...@@ -518,8 +519,11 @@ def _front_log_config(debug_web: bool):
import uvicorn import uvicorn
lc = copy.deepcopy(uvicorn.config.LOGGING_CONFIG) lc = copy.deepcopy(uvicorn.config.LOGGING_CONFIG)
for fmt in lc.get("formatters", {}).values(): for fmt in lc.get("formatters", {}).values():
if "fmt" in fmt and not fmt["fmt"].startswith("[front]"): if "fmt" in fmt and not fmt["fmt"].startswith(("[front]", "[%(asctime)s]")):
fmt["fmt"] = "[front] " + fmt["fmt"] # Prefix each line with an HH:MM:SS timestamp + the [front] tag so it
# matches the engine log format ([HH:MM:SS][nvidia] …).
fmt["fmt"] = "[%(asctime)s][front] " + fmt["fmt"]
fmt["datefmt"] = "%H:%M:%S"
# Surface codai/broker logs (the broker now runs here) via uvicorn's handler. # Surface codai/broker logs (the broker now runs here) via uvicorn's handler.
lc.setdefault("loggers", {}) lc.setdefault("loggers", {})
lc["loggers"]["codai"] = {"handlers": ["default"], "level": "INFO", "propagate": False} lc["loggers"]["codai"] = {"handlers": ["default"], "level": "INFO", "propagate": False}
...@@ -663,8 +667,52 @@ def _serve_front(app, **uvicorn_kwargs) -> None: ...@@ -663,8 +667,52 @@ def _serve_front(app, **uvicorn_kwargs) -> None:
supervisor.stop_all(grace=5.0) supervisor.stop_all(grace=5.0)
class _TimestampedStdout:
"""Wrap a text stream so every new line begins with an ``[HH:MM:SS]`` tag.
Lines that already carry a timestamp — engine lines re-emitted by the
supervisor and uvicorn lines, both of which start with ``[`` + a digit — are
passed through untouched (no double timestamp), as are in-place tqdm progress
updates (which start with a carriage return). Splits only on ``\\n`` so a
``\\r`` inside a progress line is treated as ordinary content, preserving the
single-line overwrite rendering. Unknown attributes (isatty/flush/fileno/…)
delegate to the wrapped stream so TTY detection and flushing keep working."""
def __init__(self, stream):
self._stream = stream
self._at_line_start = True
def write(self, s):
if not s:
return 0
ts = f'[{time.strftime("%H:%M:%S")}]'
parts = s.split('\n')
buf = []
for idx, part in enumerate(parts):
if idx > 0:
buf.append('\n')
self._at_line_start = True
if not part:
continue
if self._at_line_start:
already_ts = part[:1] == '[' and part[1:2].isdigit()
if not part.startswith('\r') and not already_ts:
buf.append(ts)
self._at_line_start = False
buf.append(part)
return self._stream.write(''.join(buf))
def __getattr__(self, name):
return getattr(self._stream, name)
def run_front(config, args) -> None: def run_front(config, args) -> None:
"""Build the front app, start engine supervision, and serve on the public port.""" """Build the front app, start engine supervision, and serve on the public port."""
# Timestamp every terminal line the front emits (raw prints, uvicorn logs, and
# re-emitted engine output) at HH:MM:SS. Installed before uvicorn binds its log
# handlers so they write through the wrapped stream.
if not isinstance(sys.stdout, _TimestampedStdout):
sys.stdout = _TimestampedStdout(sys.stdout)
config_dir = getattr(args, "config", None) if args is not None else None config_dir = getattr(args, "config", None) if args is not None else None
app = build_app(config, config_dir=config_dir) app = build_app(config, config_dir=config_dir)
app.state.front.debug_engine = getattr(args, "debug_engine", False) app.state.front.debug_engine = getattr(args, "debug_engine", False)
......
...@@ -346,6 +346,7 @@ class EngineSupervisor: ...@@ -346,6 +346,7 @@ class EngineSupervisor:
to whole-percent changes so the log isn't flooded.""" to whole-percent changes so the log isn't flooded."""
pm = self._PROGRESS_RE.match(line) pm = self._PROGRESS_RE.match(line)
is_progress = bool(pm) is_progress = bool(pm)
ts = time.strftime('%H:%M:%S')
with self._log_lock: with self._log_lock:
tty = False tty = False
try: try:
...@@ -354,7 +355,7 @@ class EngineSupervisor: ...@@ -354,7 +355,7 @@ class EngineSupervisor:
pass pass
if is_progress and tty: if is_progress and tty:
# Overwrite the current line; pad to clear any longer previous one. # Overwrite the current line; pad to clear any longer previous one.
print(f"\r[{tag}] {line}\033[K", end="", flush=True) print(f"\r[{ts}][{tag}] {line}\033[K", end="", flush=True)
self._log_progress_tag = tag self._log_progress_tag = tag
return return
if is_progress: if is_progress:
...@@ -370,7 +371,7 @@ class EngineSupervisor: ...@@ -370,7 +371,7 @@ class EngineSupervisor:
if self._log_progress_tag is not None: if self._log_progress_tag is not None:
print(flush=True) print(flush=True)
self._log_progress_tag = None self._log_progress_tag = None
print(f"[{tag}] {line}", flush=True) print(f"[{ts}][{tag}] {line}", flush=True)
def _note_load_progress(self, engine, line): def _note_load_progress(self, engine, line):
"""Track model-load progress from the engine's log stream so the front can """Track model-load progress from the engine's log stream so the front can
......
...@@ -25,6 +25,7 @@ directory; nothing imports torch. ...@@ -25,6 +25,7 @@ directory; nothing imports torch.
import glob import glob
import os import os
import re
import shutil import shutil
import subprocess import subprocess
...@@ -224,6 +225,50 @@ def _nvidia_stats() -> list: ...@@ -224,6 +225,50 @@ def _nvidia_stats() -> list:
return cards return cards
def _amd_gpu_name(dev: str, base: str) -> str:
"""Best-effort marketing name for an AMD card given its sysfs ``device`` dir.
Tries, in order: amdgpu's ``product_name`` sysfs attr (e.g. "Radeon RX 7900
XTX"), ``lspci`` for the card's exact PCI address, and the matching
``vulkaninfo`` device name. Falls back to ``AMD GPU (cardN)``."""
# 1) amdgpu sysfs product_name (newer kernels expose a clean marketing name).
try:
with open(os.path.join(dev, "product_name")) as f:
name = f.read().strip()
if name:
return name
except OSError:
pass
# 2) lspci by the card's exact PCI bus address (realpath of the device dir).
# The generic chip name (field 2) is shared across rebrands — e.g.
# "[Radeon RX 470/480/570/580/590]" — but the subsystem/board name (field 4)
# pins the actual model ("Radeon RX 580"), so prefer it when meaningful.
pci_addr = os.path.basename(os.path.realpath(dev))
if re.match(r"^[0-9a-fA-F]{4}:", pci_addr):
lspci = shutil.which("lspci")
if lspci:
try:
out = subprocess.run([lspci, "-mm", "-s", pci_addr],
capture_output=True, text=True, timeout=5)
# -mm quotes each name: "class" "vendor" "device" "subvendor" "subdevice"
fields = re.findall(r'"([^"]*)"', out.stdout)
board = fields[4] if len(fields) >= 5 else ""
# Use the board name unless it's a placeholder ("Device 1234").
if board and not re.match(r"^Device\b", board):
return board
if len(fields) >= 3 and fields[2]:
# Fall back to the chip name, preferring its [bracketed] part.
m = re.search(r"\[([^\]]+)\]", fields[2])
return m.group(1) if m else fields[2]
except Exception:
pass
# 3) vulkaninfo device name (RADV/AMDVLK expose the marketing name).
for d in vulkan_devices():
if d.get("vendor") == "amd" and d.get("name"):
return d["name"]
return f"AMD GPU ({base})"
def _amd_stats() -> list: def _amd_stats() -> list:
"""Per-GPU live stats for AMD cards from sysfs (amdgpu). Memory in GB.""" """Per-GPU live stats for AMD cards from sysfs (amdgpu). Memory in GB."""
import re import re
...@@ -259,7 +304,7 @@ def _amd_stats() -> list: ...@@ -259,7 +304,7 @@ def _amd_stats() -> list:
break break
cards.append({ cards.append({
"vendor": "amd", "index": int(base[4:]), "vendor": "amd", "index": int(base[4:]),
"name": f"AMD GPU ({base})", "name": _amd_gpu_name(dev, base),
"util": float(busy) if busy and busy.isdigit() else None, "util": float(busy) if busy and busy.isdigit() else None,
"mem_used": round(int(used) / 1e9, 2) if used and used.isdigit() else None, "mem_used": round(int(used) / 1e9, 2) if used and used.isdigit() else None,
"mem_total": round(int(total) / 1e9, 2) if total and total.isdigit() else None, "mem_total": round(int(total) / 1e9, 2) if total and total.isdigit() else None,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment