front/ui: stricter mmproj auto-pair; show loaded model ids, not aliases

- models page: the multimodal-projector select now defaults to None unless a
  projector is a strong, unambiguous name match. Scores only distinctive tokens
  (drops generic words + quant tokens, keeps size tokens like 14b), requires
  covering at least half the model's tokens, and rejects ties. Stops a lone
  shared family token from pairing the wrong-size projector.
- task page: the per-engine loaded-model hover now lists each model once by its
  canonical id instead of its aliases (auto gguf stem, explicit alias, type
  prefix). engines_list() resolves loaded keys via the pin index's new model_id.
Co-Authored-By: 's avatarClaude Opus 4.8 <noreply@anthropic.com>
parent 84d085d7
......@@ -2944,9 +2944,9 @@ function _applyBackendCfgVisibility(m){
}
// Populate the mmproj (multimodal projector) select from the available component
// GGUF files. Auto-selects the projector that best matches this model's name when
// the config hasn't explicitly chosen one (mmproj is enabled by default when a
// matching projector exists — e.g. gemma vision).
// GGUF files. Defaults to None and only auto-pairs a projector when its name is a
// strong, unambiguous match for this model (e.g. gemma vision) — never on a lone
// shared family token, which previously paired the wrong-size projector.
function _populateMmprojSelect(m, s){
const sel = document.getElementById('cfg-mmproj');
if(!sel) return;
......@@ -2956,27 +2956,40 @@ function _populateMmprojSelect(m, s){
return role && role.key === 'mmproj';
});
const cur = (s && s.mmproj) || '';
// Score each projector by shared name tokens with the model, so the gemma
// projector pairs with the gemma model rather than an unrelated one.
// Score each projector by shared *distinctive* name tokens with the model, so
// the gemma projector pairs with the gemma model — not an unrelated one, and
// not the wrong-size sibling. Generic tokens (quant level, format, "mmproj"…)
// are dropped so they can't manufacture a false match.
const GENERIC = new Set(['gguf','mmproj','proj','model','instruct','chat','text',
'vision','base','tuned','finetune','merge','fp16','bf16','f16','f32']);
const isQuant = (t) => /^(iq|q)\d/.test(t) || /^\d+bpw$/.test(t)
|| ['k','km','ks','kl','kxl','km','kss','xs','xss'].includes(t);
const distinctive = (name) => name.split(/[^a-z0-9]+/)
.filter(t => t.length >= 3 && !GENERIC.has(t) && !isQuant(t));
const modelName = (_ggufBaseName(m.label || m.path || '') || '').toLowerCase();
const tokens = modelName.split(/[^a-z0-9]+/).filter(t => t.length >= 3);
const tokens = distinctive(modelName);
const score = (fname) => {
const n = fname.toLowerCase();
return tokens.reduce((acc, t) => acc + (n.includes(t) ? 1 : 0), 0);
};
let best = '', bestScore = 0;
// A real match must cover at least half the model's distinctive tokens (floor
// 1) — a lone shared family token among several isn't enough.
const need = Math.max(1, Math.ceil(tokens.length / 2));
let best = '', bestScore = 0, tie = false;
const opts = ['<option value="">None</option>'];
for(const f of projFiles){
const sc = score(f.filename);
if(sc > bestScore){ bestScore = sc; best = f.path; }
if(sc > bestScore){ bestScore = sc; best = f.path; tie = false; }
else if(sc === bestScore && sc > 0){ tie = true; }
opts.push(`<option value="${esc(f.path)}">${esc(f.filename)} (${fmtGB(f.size_gb)})</option>`);
}
sel.innerHTML = opts.join('');
// Selection priority: explicit config value → best name match (enabled by
// default) → None.
sel.value = cur || (bestScore > 0 ? best : '');
// Auto-pair only on a strong, unambiguous match; otherwise default to None.
const auto = (bestScore >= need && !tie) ? best : '';
// Selection priority: explicit config value → strong name match → None.
sel.value = cur || auto;
// If the stored value isn't among the current options (file removed), fall back.
if(cur && sel.value !== cur) sel.value = (bestScore > 0 ? best : '');
if(cur && sel.value !== cur) sel.value = auto;
_onMmprojChange();
}
......
......@@ -262,6 +262,10 @@ class FrontProxy:
rec = {"engine": (m.get("engine") or "").strip() or None,
"backend": (m.get("backend") or "").strip() or None,
"path": (m.get("path") or m.get("id") or "").strip() or None,
# Canonical model id (NOT the alias) — what the loaded-model
# list should display so each model shows once, by its id.
"model_id": (m.get("id") or m.get("path")
or m.get("alias") or "").strip() or None,
"engine_fallback": bool(m.get("engine_fallback"))}
for field_ in (m.get("path"), m.get("id"), m.get("alias")):
if not field_:
......@@ -350,6 +354,19 @@ class FrontProxy:
except Exception:
return False
def _canonical_loaded(self, keys) -> list:
"""Map an engine's loaded-model keys to canonical model ids, deduped.
A model can be resident under several keys — its real id/path, the
auto-derived gguf stem, an explicit alias, a type-prefixed key (``audio:`` …).
Resolve each to the model's configured id so the loaded-model list shows each
model once, by its actual id rather than an alias."""
seen: dict = {}
for k in sorted(keys):
canon = (self._model_info(k).get("model_id") or k)
seen.setdefault(canon, None)
return list(seen.keys())
def engines_list(self) -> list:
out = []
for e in self.registry.all():
......@@ -360,7 +377,8 @@ class FrontProxy:
out.append({"id": e.id, "name": e.name, "backend": e.backend,
"gpu": e.gpu, "healthy": e.healthy, "primary": e.primary,
"vram": e.vram, "cooling": bool(e.cooling),
"loaded_models": sorted(e.loaded_models), "pid": pid})
"loaded_models": self._canonical_loaded(e.loaded_models),
"pid": pid})
return out
async def model_loaded_status(self, request: Request):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment