multi-engine: route gguf automatic alias (filename without .gguf)

A gguf model's assigned/loaded key is its file path, but /v1/models
advertises it — and clients address it — by the filename without the
.gguf suffix (the automatic alias). engine_for_assigned /
engine_for_model / _key_matches_path compared short names verbatim, so
the automatic alias never matched the .gguf key and routing fell through
(404 / wrong engine). Normalize both sides via _short_stem so the
automatic alias resolves to the owning engine with no manual alias.
Co-Authored-By: 's avatarClaude Opus 4.8 <noreply@anthropic.com>
parent 79c2e44d
......@@ -370,7 +370,10 @@ class FrontProxy:
@staticmethod
def _key_matches_path(key: str, path: str) -> bool:
return key == path or key.endswith(f":{path}") or key.endswith(path.split("/")[-1])
from codai.frontproxy.registry import _short_stem
return (key == path or key.endswith(f":{path}")
or key.endswith(path.split("/")[-1])
or _short_stem(key) == _short_stem(path))
def _engine_by_name(self, name: Optional[str]):
if not name:
......
......@@ -20,6 +20,19 @@ from dataclasses import dataclass, field
from typing import Dict, List, Optional, Set
def _short_stem(key: str) -> str:
"""Short name for a routable key, with a trailing ``.gguf`` stripped.
A gguf model's assigned/loaded key is its file path, but ``/v1/models``
advertises it (and clients address it) by the filename *without* ``.gguf`` —
the automatic alias. Normalizing both sides here lets that alias resolve to
the owning engine without the user setting an explicit alias."""
short = key.split("/")[-1].split(":")[-1]
if short.lower().endswith(".gguf"):
short = short[:-5]
return short
# Default model-format capabilities implied by an engine's backend:
# transformers — safetensors/HF models (CUDA only here)
# gguf — llama.cpp models (CUDA or Vulkan)
......@@ -149,13 +162,13 @@ class EngineRegistry:
the same fuzzy spirit the manager uses, but read-only over loaded keys."""
if not model_key:
return None
short = model_key.split("/")[-1]
short = _short_stem(model_key)
with self._lock:
for e in self._engines.values():
if not e.healthy or not e.can_serve(required_cap):
continue
for k in e.loaded_models:
if k == model_key or k.split("/")[-1] == short \
if k == model_key or _short_stem(k) == short \
or k.endswith(model_key) or model_key.endswith(k.split(":")[-1]):
return e
return None
......@@ -168,13 +181,13 @@ class EngineRegistry:
short-name / alias resolves to the owner."""
if not model_key:
return None
short = model_key.split("/")[-1]
short = _short_stem(model_key)
with self._lock:
for e in self._engines.values():
if not e.healthy:
continue
for k in e.assigned_models:
if (k == model_key or k.split("/")[-1] == short
if (k == model_key or _short_stem(k) == short
or k.endswith(model_key) or model_key.endswith(k.split("/")[-1])):
return e
return None
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment