Commit bcd9bc55 authored by Your Name's avatar Your Name

Fix force-reasoning bugs: duplicate tools, reasoning duplication, tool extraction

- Bug 1: Skip format_tools_for_prompt in raw mode (already had condition)
- Bug 2: Use final_text (after reasoning) instead of generated_text for formatter
- Bug 3: Pass final_text to ModelParserAdapter instead of generated_text

This prevents reasoning from appearing in both content AND reasoning fields,
and allows the tool parser to properly extract tool calls without being
confused by reasoning tags.
parent 017c0399
...@@ -2429,18 +2429,21 @@ async def chat_completions(request: ChatCompletionRequest, http_request: Request ...@@ -2429,18 +2429,21 @@ async def chat_completions(request: ChatCompletionRequest, http_request: Request
print(f"DEBUG: Error converting tool in raw mode: {e}, tool type: {type(t)}") print(f"DEBUG: Error converting tool in raw mode: {e}, tool type: {type(t)}")
continue continue
# Step 1: Use ModelParserAdapter to extract tool calls from generated text # Step 1: Use ModelParserAdapter to extract tool calls from final_text (NOT generated_text which includes reasoning)
# This fixes Bug 2 and Bug 3: reasoning was appearing in both content AND reasoning fields
# because the parser was receiving the full generated_text including reasoning
extracted_tool_calls = None extracted_tool_calls = None
clean_text = generated_text clean_text = final_text # Use final_text (after reasoning) instead of generated_text (which includes reasoning)
if tools_list: if tools_list:
adapter = ModelParserAdapter(model_name=response_model_name) adapter = ModelParserAdapter(model_name=response_model_name)
extracted_tool_calls = adapter.extract_tool_calls(generated_text, tools_list) # Extract tool calls from final_text only (after reasoning is done)
extracted_tool_calls = adapter.extract_tool_calls(final_text, tools_list)
if extracted_tool_calls: if extracted_tool_calls:
# Strip tool calls from the text # Strip tool calls from the text
clean_text = adapter.strip_tool_calls_from_content(generated_text) clean_text = adapter.strip_tool_calls_from_content(final_text)
if global_debug: if global_debug:
print(f"RAW: Extracted {len(extracted_tool_calls)} tool calls from generated text") print(f"RAW: Extracted {len(extracted_tool_calls)} tool calls from final_text (after reasoning)")
# Estimate token counts # Estimate token counts
prompt_tokens = len(raw_prompt_for_generation.split()) prompt_tokens = len(raw_prompt_for_generation.split())
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment