Commit 329a0042 authored by Your Name's avatar Your Name

Fix force_reasoning_args None check in mock reasoning logic

- Add truthy check before 'in' operator to prevent TypeError when
  force_reasoning_args is None (when --force-reasoning is not specified)
- Fixes: name 'force_reasoning_args' is not defined error
parent 1266f46a
""" """
Agentic Template Manager - Automates prompt injection for agentic behavior. Agentic Template Manager for forcing reasoning in LLM agents.
Supports the 'Big 10' with specific triggers for tool-calling.
Uses Prompt Seeding technique to force reasoning in LLM models: Provides:
- Ends prompt with thought tag (<think>, <thought>, Thought:) to force reasoning - Prompt Seeding: Ends prompts with thought tags (<minimax:tool_call>, <thought>, Thought:) to force reasoning
- Uses raw completion instead of chat API to bypass validation - Uses raw completion instead of chat API to bypass validation
- Provides family-specific stop tokens for reasoning extraction - Provides family-specific stop tokens for reasoning extraction
""" """
...@@ -49,6 +48,60 @@ class AgenticTemplateManager: ...@@ -49,6 +48,60 @@ class AgenticTemplateManager:
"generic": ["</think>", "</thought>", "Thought:"] "generic": ["</think>", "</thought>", "Thought:"]
} }
# Tool call tags for each model family - uses native format each model was trained on
TOOL_CALL_TAGS = {
"qwen": {
"start": "<|tool_call|>",
"end": "<|tool_call_end|>",
"json_format": "<|tool_call|>{\"name\": \"tool_name\", \"arguments\": {}}"
},
"deepseek": {
"start": "<tool_call>",
"end": "</tool_call>",
"json_format": "<tool_call>{\"name\": \"tool_name\", \"arguments\": {}}</tool_call>"
},
"llama3": {
"start": "<tool_call>",
"end": "</tool_call>",
"json_format": "<tool_call>{\"name\": \"tool_name\", \"arguments\": {}}</tool_call>"
},
"mistral": {
"start": "Action:",
"end": None,
"json_format": "Action: tool_name\nAction Input: {}"
},
"anthropic": {
"start": "<tool_call>",
"end": "</tool_call>",
"json_format": "<tool_call>{\"name\": \"tool_name\", \"arguments\": {}}</tool_call>"
},
"gemma": {
"start": "<tool_call>",
"end": "</tool_call>",
"json_format": "<tool_call>{\"name\": \"tool_name\", \"arguments\": {}}</tool_call>"
},
"phi3": {
"start": "<|tool_call|>",
"end": "<|tool_call_end|>",
"json_format": "<|tool_call|>{\"name\": \"tool_name\", \"arguments\": {}}"
},
"yi": {
"start": "<|tool_call|>",
"end": "<|tool_call_end|>",
"json_format": "<|tool_call|>{\"name\": \"tool_name\", \"arguments\": {}}"
},
"cohere": {
"start": "<tool_call>",
"end": "</tool_call>",
"json_format": "<tool_call>{\"name\": \"tool_name\", \"arguments\": {}}</tool_call>"
},
"generic": {
"start": "<tool_call>",
"end": "</tool_call>",
"json_format": "<tool_call>{\"name\": \"tool_name\", \"arguments\": {}}</tool_call>"
}
}
# Original FAMILIES config for backward compatibility # Original FAMILIES config for backward compatibility
FAMILIES = { FAMILIES = {
"qwen": {"name": "Qwen", "prefix": "<|im_start|>", "suffix": "<|im_end|>\n", "thought_tag": "<|thought|>", "call_tag": "<tool_call>"}, "qwen": {"name": "Qwen", "prefix": "<|im_start|>", "suffix": "<|im_end|>\n", "thought_tag": "<|thought|>", "call_tag": "<tool_call>"},
...@@ -250,7 +303,8 @@ class AgenticTemplateManager: ...@@ -250,7 +303,8 @@ class AgenticTemplateManager:
def format_for_raw_completion(self, system_prompt: str, user_message: str, def format_for_raw_completion(self, system_prompt: str, user_message: str,
inject_system: bool = True, inject_system: bool = True,
force_reasoning: bool = True) -> Tuple[str, List[str]]: force_reasoning: bool = True,
tools: Optional[List[Dict]] = None) -> Tuple[str, List[str]]:
""" """
Format prompt for raw completion (bypassing chat API). Format prompt for raw completion (bypassing chat API).
...@@ -259,15 +313,44 @@ class AgenticTemplateManager: ...@@ -259,15 +313,44 @@ class AgenticTemplateManager:
user_message: User message/query user_message: User message/query
inject_system: If True, injects agentic system instructions inject_system: If True, injects agentic system instructions
force_reasoning: If True, seeds prompt with thought tag to force reasoning force_reasoning: If True, seeds prompt with thought tag to force reasoning
tools: Optional list of tool definitions to include in the prompt
Returns: Returns:
Tuple of (formatted_prompt, stop_tokens) Tuple of (formatted_prompt, stop_tokens)
""" """
effective_system = system_prompt effective_system = system_prompt
# Check if there's a custom system prompt (not just default)
has_custom_system = system_prompt and len(system_prompt.strip()) > 0 and system_prompt.strip() not in ("You are a helpful assistant.", "You are a helpful AI assistant.", "")
# Get tool call tags for this model family
tool_tags = self.TOOL_CALL_TAGS.get(self.family_key, self.TOOL_CALL_TAGS["generic"])
# Add tool descriptions to system prompt if tools are provided AND no custom system prompt exists
# (don't override client's custom system prompt with tool instructions)
if tools and not has_custom_system:
import json
tool_descriptions = []
for tool in tools:
func = tool.get('function', {})
name = func.get('name', 'unknown')
desc = f"Tool: {name}"
if func.get('description'):
desc += f"\nDescription: {func['description']}"
if func.get('parameters'):
desc += f"\nParameters: {json.dumps(func['parameters'], indent=2)}"
tool_descriptions.append(desc)
tools_text = "You have access to the following tools:\n\n" + "\n\n".join(tool_descriptions)
tools_text += f"\n\nIMPORTANT: When you need to use a tool, you MUST format your response EXACTLY as:\n"
tools_text += tool_tags["json_format"]
# Prepend tools to system prompt
effective_system = f"{tools_text}\n\n{effective_system}" if effective_system else tools_text
# Inject system prompt if requested # Inject system prompt if requested
if inject_system: if inject_system:
effective_system = self.get_agent_system_prompt(system_prompt) effective_system = self.get_agent_system_prompt(effective_system)
if force_reasoning: if force_reasoning:
# Use prompt seeding to force reasoning # Use prompt seeding to force reasoning
...@@ -321,6 +404,6 @@ def create_reasoning_prompt(model_name: str, system_prompt: str, user_question: ...@@ -321,6 +404,6 @@ def create_reasoning_prompt(model_name: str, system_prompt: str, user_question:
inject_system=False, force_reasoning=True) inject_system=False, force_reasoning=True)
""" """
manager = AgenticTemplateManager(model_name) manager = AgenticTemplateManager(model_name)
return manager.format_for_raw_completion(system_prompt, user_question, return manager.format_for_raw_completion(system_prompt, user_message,
inject_system=inject_system, inject_system=inject_system,
force_reasoning=force_reasoning) force_reasoning=force_reasoning)
...@@ -2110,8 +2110,9 @@ async def chat_completions(request: ChatCompletionRequest, http_request: Request ...@@ -2110,8 +2110,9 @@ async def chat_completions(request: ChatCompletionRequest, http_request: Request
if global_debug: if global_debug:
print(f"STOP: Added reasoning stop tokens: {additional_stops}") print(f"STOP: Added reasoning stop tokens: {additional_stops}")
# Format messages with tools if provided # Format messages with tools if provided - BUT SKIP for raw mode
if request.tools: # (raw mode handles tools separately via format_for_raw_completion)
if request.tools and "raw" not in force_reasoning_args:
messages = format_tools_for_prompt(request.tools, messages) messages = format_tools_for_prompt(request.tools, messages)
# Get the tool_parser from the current manager # Get the tool_parser from the current manager
...@@ -2214,7 +2215,8 @@ async def chat_completions(request: ChatCompletionRequest, http_request: Request ...@@ -2214,7 +2215,8 @@ async def chat_completions(request: ChatCompletionRequest, http_request: Request
system_prompt=system_prompt, system_prompt=system_prompt,
user_message=user_message, user_message=user_message,
inject_system=True, inject_system=True,
force_reasoning=True force_reasoning=True,
tools=request.tools # Pass tools for family-specific formatting
) )
use_raw_mode = True use_raw_mode = True
...@@ -2413,7 +2415,7 @@ async def chat_completions(request: ChatCompletionRequest, http_request: Request ...@@ -2413,7 +2415,7 @@ async def chat_completions(request: ChatCompletionRequest, http_request: Request
# But only if we DON'T already have real reasoning from extraction # But only if we DON'T already have real reasoning from extraction
has_real_reasoning = reasoning_text and len(reasoning_text.strip()) > 10 has_real_reasoning = reasoning_text and len(reasoning_text.strip()) > 10
if "mock" in force_reasoning_args and formatted_response and not has_real_reasoning: if force_reasoning_args and "mock" in force_reasoning_args and formatted_response and not has_real_reasoning:
# Add fake reasoning tokens to trigger VSCode plugin stats # Add fake reasoning tokens to trigger VSCode plugin stats
mock_reasoning_tokens = 50 mock_reasoning_tokens = 50
...@@ -2490,6 +2492,7 @@ async def chat_completions(request: ChatCompletionRequest, http_request: Request ...@@ -2490,6 +2492,7 @@ async def chat_completions(request: ChatCompletionRequest, http_request: Request
current_manager, current_manager,
tool_parser, tool_parser,
request.response_format, request.response_format,
force_reasoning_args,
) )
async def stream_chat_response( async def stream_chat_response(
...@@ -2816,6 +2819,7 @@ async def generate_chat_response( ...@@ -2816,6 +2819,7 @@ async def generate_chat_response(
current_manager: ModelManager, current_manager: ModelManager,
tool_parser: ToolCallParser, tool_parser: ToolCallParser,
response_format: Optional[Dict] = None, response_format: Optional[Dict] = None,
force_reasoning_args: Optional[List[str]] = None,
) -> Dict: ) -> Dict:
"""Generate non-streaming chat completion response.""" """Generate non-streaming chat completion response."""
completion_id = f"chatcmpl-{uuid.uuid4().hex}" completion_id = f"chatcmpl-{uuid.uuid4().hex}"
...@@ -2932,7 +2936,7 @@ async def generate_chat_response( ...@@ -2932,7 +2936,7 @@ async def generate_chat_response(
if "message" in choice: if "message" in choice:
existing_reasoning = choice["message"].get("reasoning") existing_reasoning = choice["message"].get("reasoning")
if "mock" in force_reasoning_args and formatted_response and not existing_reasoning: if force_reasoning_args and "mock" in force_reasoning_args and formatted_response and not existing_reasoning:
# Add fake reasoning tokens to trigger VSCode plugin stats # Add fake reasoning tokens to trigger VSCode plugin stats
mock_reasoning_tokens = 50 mock_reasoning_tokens = 50
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment