Fix Vulkan backend to use llama.cpp's built-in chat template

- Use apply_chat_template() to properly format messages for each model - This ensures Qwen3 and other models get their correct chat format - Fallback to <|im_start|>/|im_end|> format if apply_chat_template fails - Fixes garbled output with <|system|> tags appearing in responses

Fix Vulkan backend to use llama.cpp's built-in chat template
eea67af6 · Stefy Lanza (nextime / spora ) · d78e5bd7 · eea67af6
Commit eea67af6 authored Feb 28, 2026 by Stefy Lanza (nextime / spora )
Hide whitespace changes
Inline Side-by-side

Showing with 31 additions and 12 deletions

coderai coderai +31 -12

No files found.
--- a/coderai
+++ b/coderai
@@ -958,20 +958,39 @@ class VulkanBackend(ModelBackend):
            raise
    def format_messages(self, messages: List[ChatMessage]) -> str:
-        """Format messages into a prompt string suitable for chat models."""
+        """Format messages into a prompt string suitable for chat models.
-        formatted = []
+        Uses llama.cpp's built-in chat template support for proper formatting.
+        """
+        # Convert to format expected by llama.cpp
+        chat_messages = []
        for msg in messages:
-            if msg.role == "system":
+            chat_msg = {"role": msg.role}
-                formatted.append(f"<|system|>\n{msg.content}")
+            if msg.content:
-            elif msg.role == "user":
+                chat_msg["content"] = msg.content
-                formatted.append(f"<|user|>\n{msg.content}")
+            if msg.tool_calls:
-            elif msg.role == "assistant":
+                chat_msg["tool_calls"] = msg.tool_calls
-                content = msg.content or ""
+            chat_messages.append(chat_msg)
-                formatted.append(f"<|assistant|>\n{content}")
+        # Use llama.cpp's apply_chat_template if available
-        formatted.append("<|assistant|>\n")
+        try:
-        return "\n".join(formatted)
+            prompt = self.model.apply_chat_template(chat_messages, tokenize=False, add_generation_prompt=True)
+            return prompt
+        except Exception as e:
+            # Fallback to manual formatting if apply_chat_template fails
+            print(f"Warning: apply_chat_template failed ({e}), using fallback formatting")
+            formatted = []
+            for msg in messages:
+                if msg.role == "system":
+                    formatted.append(f"<|im_start|>system\n{msg.content}<|im_end|>")
+                elif msg.role == "user":
+                    formatted.append(f"<|im_start|>user\n{msg.content}<|im_end|>")
+                elif msg.role == "assistant":
+                    content = msg.content or ""
+                    formatted.append(f"<|im_start|>assistant\n{content}<|im_end|>")
+            formatted.append("<|im_start|>assistant\n")
+            return "\n".join(formatted)
    def generate(self, prompt: str, max_tokens: Optional[int] = None,
                 temperature: float = 0.7, top_p: float = 1.0,