Fix Vulkan backend to use llama.cpp's built-in chat template

- Use apply_chat_template() to properly format messages for each model
- This ensures Qwen3 and other models get their correct chat format
- Fallback to <|im_start|>/|im_end|> format if apply_chat_template fails
- Fixes garbled output with <|system|> tags appearing in responses
parent d78e5bd7
...@@ -958,20 +958,39 @@ class VulkanBackend(ModelBackend): ...@@ -958,20 +958,39 @@ class VulkanBackend(ModelBackend):
raise raise
def format_messages(self, messages: List[ChatMessage]) -> str: def format_messages(self, messages: List[ChatMessage]) -> str:
"""Format messages into a prompt string suitable for chat models.""" """Format messages into a prompt string suitable for chat models.
formatted = []
Uses llama.cpp's built-in chat template support for proper formatting.
"""
# Convert to format expected by llama.cpp
chat_messages = []
for msg in messages: for msg in messages:
if msg.role == "system": chat_msg = {"role": msg.role}
formatted.append(f"<|system|>\n{msg.content}") if msg.content:
elif msg.role == "user": chat_msg["content"] = msg.content
formatted.append(f"<|user|>\n{msg.content}") if msg.tool_calls:
elif msg.role == "assistant": chat_msg["tool_calls"] = msg.tool_calls
content = msg.content or "" chat_messages.append(chat_msg)
formatted.append(f"<|assistant|>\n{content}")
# Use llama.cpp's apply_chat_template if available
formatted.append("<|assistant|>\n") try:
return "\n".join(formatted) prompt = self.model.apply_chat_template(chat_messages, tokenize=False, add_generation_prompt=True)
return prompt
except Exception as e:
# Fallback to manual formatting if apply_chat_template fails
print(f"Warning: apply_chat_template failed ({e}), using fallback formatting")
formatted = []
for msg in messages:
if msg.role == "system":
formatted.append(f"<|im_start|>system\n{msg.content}<|im_end|>")
elif msg.role == "user":
formatted.append(f"<|im_start|>user\n{msg.content}<|im_end|>")
elif msg.role == "assistant":
content = msg.content or ""
formatted.append(f"<|im_start|>assistant\n{content}<|im_end|>")
formatted.append("<|im_start|>assistant\n")
return "\n".join(formatted)
def generate(self, prompt: str, max_tokens: Optional[int] = None, def generate(self, prompt: str, max_tokens: Optional[int] = None,
temperature: float = 0.7, top_p: float = 1.0, temperature: float = 0.7, top_p: float = 1.0,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment