Commit 31b6480e authored by Your Name's avatar Your Name

Make --hf-chat-template repeatable per model

- Changed --hf-chat-template from boolean to action=append
- Added check_hf_chat_template() function for model-specific checking
- Updated _finalize_chat_template_detection to use new function
- Updated README with new syntax
parent 3121fb85
...@@ -202,7 +202,7 @@ options: ...@@ -202,7 +202,7 @@ options:
--vulkan-single-gpu Force Vulkan to use only the specified GPU (prevents layer distribution across multiple GPUs) --vulkan-single-gpu Force Vulkan to use only the specified GPU (prevents layer distribution across multiple GPUs)
--vulkan-list-devices List available Vulkan GPU devices and exit --vulkan-list-devices List available Vulkan GPU devices and exit
--reply-filters Enable filtering of model replies. Can be repeated. See "Reply Filters" section for details. --reply-filters Enable filtering of model replies. Can be repeated. See "Reply Filters" section for details.
--hf-chat-template Use HuggingFace transformers apply_chat_template for GGUF models instead of llama.cpp built-in --hf-chat-template Use HuggingFace transformers apply_chat_template. Can be repeated. See "HuggingFace Chat Template" section for details.
``` ```
### Reply Filters ### Reply Filters
...@@ -261,15 +261,30 @@ The `--hf-chat-template` option enables using HuggingFace's `apply_chat_template ...@@ -261,15 +261,30 @@ The `--hf-chat-template` option enables using HuggingFace's `apply_chat_template
**Usage:** **Usage:**
```bash ```bash
# Use HuggingFace chat template (requires transformers) # Use HuggingFace chat template for ALL text models
coderai --hf-chat-template --model llama-3.1-8b-instruct-q4_k_m.gguf coderai --hf-chat-template text --model llama-3.1-8b-instruct-q4_k_m.gguf
# Use HuggingFace chat template for SPECIFIC model
coderai --hf-chat-template text:llama-3.1 --model llama-3.1-8b-instruct-q4_k_m.gguf
# Different chat templates for different models
coderai --hf-chat-template text:llama-3.1 --hf-chat-template text:phi-3 --model llama-3.1-8b-instruct-q4_k_m.gguf
# Or with Vulkan backend # Or with Vulkan backend
coderai --backend vulkan --hf-chat-template --model llama-3.1-8b-instruct-q4_k_m.gguf coderai --backend vulkan --hf-chat-template text --model llama-3.1-8b-instruct-q4_k_m.gguf
``` ```
**Syntax:**
| Syntax | Applies To |
|--------|------------|
| `--hf-chat-template text` | All text models |
| `--hf-chat-template image` | All image models |
| `--hf-chat-template text:model_name` | Specific text model |
| `--hf-chat-template image:model_name` | Specific image model |
**How it works:** **How it works:**
1. When `--hf-chat-template` is specified, the server attempts to load a HuggingFace tokenizer 1. When `--hf-chat-template` is specified for a model, the server attempts to load a HuggingFace tokenizer
2. It first checks for a local `tokenizer_config.json` in the model directory 2. It first checks for a local `tokenizer_config.json` in the model directory
3. If not found locally, it tries to infer the model name from the GGUF filename and load from HuggingFace Hub 3. If not found locally, it tries to infer the model name from the GGUF filename and load from HuggingFace Hub
4. The tokenizer's `apply_chat_template` method is then used for formatting chat messages 4. The tokenizer's `apply_chat_template` method is then used for formatting chat messages
......
...@@ -1435,9 +1435,15 @@ class VulkanBackend(ModelBackend): ...@@ -1435,9 +1435,15 @@ class VulkanBackend(ModelBackend):
def _finalize_chat_template_detection(self): def _finalize_chat_template_detection(self):
"""Finalize chat template detection after model is loaded.""" """Finalize chat template detection after model is loaded."""
# Check if we should use HuggingFace tokenizer for chat template # Check if we should use HuggingFace tokenizer for chat template
hf_chat_template = getattr(global_args, 'hf_chat_template', False) # Try to get model info
model_name = getattr(self, 'model_name', None) or "unknown"
# Determine model type - text models use GGUF, images would be different
model_type = "text"
if model_name.startswith("image:"):
model_type = "image"
if hf_chat_template: if check_hf_chat_template(model_type, model_name):
self._load_huggingface_tokenizer() self._load_huggingface_tokenizer()
return return
...@@ -3113,6 +3119,60 @@ def check_single_filter(filter_spec: str, filter_type: str, model_type: str, mod ...@@ -3113,6 +3119,60 @@ def check_single_filter(filter_spec: str, filter_type: str, model_type: str, mod
# Simple filter: "malformed" or "all" - applies to all models # Simple filter: "malformed" or "all" - applies to all models
return filter_spec == 'all' or filter_spec == filter_type return filter_spec == 'all' or filter_spec == filter_type
def check_hf_chat_template(model_type: str = "text", model_name: str = None) -> bool:
"""
Check if HuggingFace chat template should be used for the model.
Args:
model_type: The model type ('text', 'image', etc.)
model_name: The specific model name (optional)
Returns:
True if HF chat template should be used, False otherwise.
Syntax:
# Apply to all text models
--hf-chat-template text
# Apply to specific model
--hf-chat-template text:llama-3.1
--hf-chat-template image:sd-xl
# Multiple models
--hf-chat-template text:llama-3.1 --hf-chat-template text:phi-3
"""
hf_chat_template = getattr(global_args, 'hf_chat_template', []) or []
# If empty list, HF chat template is not enabled
if not hf_chat_template:
return False
for spec in hf_chat_template:
if ':' in spec:
# Format: text:model_name or image:model_name
parts = spec.split(':')
spec_model_type = parts[0]
spec_model_name = parts[1] if len(parts) > 1 else None
# Check if model type matches
if spec_model_type != model_type and spec_model_type != '*':
continue
# If no specific model name, applies to all of this type
if spec_model_name is None or spec_model_name == '':
return True
# If specific model name, check for match
if spec_model_name == model_name:
return True
else:
# Just a type like "text" or "image" - applies to all of that type
if spec == model_type or spec == '*':
return True
return False
# Global system prompt (set via --system-prompt flag) # Global system prompt (set via --system-prompt flag)
# None = don't inject, True = use default, string = use custom text # None = don't inject, True = use default, string = use custom text
global_system_prompt = None global_system_prompt = None
...@@ -5359,8 +5419,9 @@ def parse_args(): ...@@ -5359,8 +5419,9 @@ def parse_args():
) )
parser.add_argument( parser.add_argument(
"--hf-chat-template", "--hf-chat-template",
action="store_true", action="append",
help="Use HuggingFace transformers apply_chat_template for GGUF models instead of llama.cpp built-in (requires transformers library)", default=[],
help="Use HuggingFace transformers apply_chat_template for specific model(s). Format: --hf-chat-template text:model_name or --hf-chat-template text (all text models)",
) )
parser.add_argument( parser.add_argument(
"--system-prompt", "--system-prompt",
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment