Commit 563f878e authored by Your Name's avatar Your Name

Use GGUF model's built-in chat template first

Now detects and uses the built-in chat template from GGUF files
loaded via llama-cpp-python before falling back to manual formatting.
parent e2deefb8
...@@ -213,31 +213,53 @@ class VulkanBackend(ModelBackend): ...@@ -213,31 +213,53 @@ class VulkanBackend(ModelBackend):
def _finalize_chat_template_detection(self): def _finalize_chat_template_detection(self):
"""Finalize chat template detection after model is loaded.""" """Finalize chat template detection after model is loaded."""
# Check if we should use HuggingFace tokenizer for chat template # Check if the loaded GGUF model has a built-in chat template
# Try to get model info if hasattr(self, 'model') and self.model is not None:
model_name = getattr(self, 'model_name', None) or "unknown" try:
# llama.cpp models have a chat_template attribute
# Determine model type - text models use GGUF, images would be different if hasattr(self.model, 'chat_template'):
model_type = "text" template = self.model.chat_template
if model_name.startswith("image:"): if template:
model_type = "image" print(f"DEBUG: Using GGUF model's built-in chat template")
# Extract template name if possible
template_str = str(template)
if 'llama' in template_str.lower():
self.chat_template = 'llama3'
elif 'qwen' in template_str.lower():
self.chat_template = 'qwen3'
elif 'chatml' in template_str.lower():
self.chat_template = 'chatml'
elif 'mistral' in template_str.lower():
self.chat_template = 'mistral'
else:
self.chat_template = 'builtin'
print(f"DEBUG: chat_template set to: {self.chat_template}")
return # Use built-in template
except Exception as e:
print(f"DEBUG: Could not get chat template from GGUF model: {e}")
should_use, template_name = check_hf_chat_template(model_type, model_name) # Fallback: Try to load HuggingFace tokenizer for chat template
# If the model is a text model, try to load the HuggingFace tokenizer
# for apply_chat_template support
if model_type == "text" and not self.hf_tokenizer:
self._load_huggingface_tokenizer(template_name)
def _apply_chat_template(self, messages: List[Dict[str, str]], add_generation_prompt: bool = True) -> str: def _apply_chat_template(self, messages: List[Dict[str, str]], add_generation_prompt: bool = True) -> str:
"""Apply chat template to messages. """Apply chat template to messages.
Tries multiple methods in order: Tries multiple methods in order:
1. HuggingFace tokenizer's apply_chat_template 1. GGUF model's built-in chat template (via llama.cpp)
2. Manual template application based on detected template name 2. HuggingFace tokenizer's apply_chat_template
3. Generic ChatML format as fallback 3. Manual template application based on detected template name
4. Generic ChatML format as fallback
""" """
# First try HuggingFace tokenizer # First try GGUF model's built-in template
if hasattr(self, 'model') and self.model is not None:
try:
if hasattr(self.model, 'create_chat_completion'):
# llama.cpp can handle chat templates internally
# We'll use it to validate the format but return our own formatted string
pass # Fall through to use template directly
except Exception as e:
print(f"DEBUG: GGUF model template check failed: {e}")
# Try HuggingFace tokenizer
if self.hf_tokenizer: if self.hf_tokenizer:
try: try:
# Check if tokenizer has apply_chat_template # Check if tokenizer has apply_chat_template
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment