Commit ed8397a0 authored by Your Name's avatar Your Name

Add --force-reasoning CLI flag for reasoning/thinking mode

- Added --force-reasoning argument to enable reasoning mode for models
  that support it (Qwen3, DeepSeek R1, etc.)
- Modified chat_completions endpoint to check both API parameter
  enable_thinking and CLI flag force_reasoning
- When either is true, injects agentic template to enable thinking
parent 11526eee
...@@ -5418,8 +5418,10 @@ async def chat_completions(request: ChatCompletionRequest, http_request: Request ...@@ -5418,8 +5418,10 @@ async def chat_completions(request: ChatCompletionRequest, http_request: Request
# Insert system message at the beginning # Insert system message at the beginning
messages = [ChatMessage(role="system", content=system_text)] + list(messages) messages = [ChatMessage(role="system", content=system_text)] + list(messages)
# Enable thinking/reasoning mode if requested # Enable thinking/reasoning mode if requested via API parameter OR CLI flag
if getattr(request, 'enable_thinking', False): force_reasoning = getattr(global_args, 'force_reasoning', False) if global_args else False
enable_thinking = getattr(request, 'enable_thinking', False) or force_reasoning
if enable_thinking:
from codai.models.templates import AgenticTemplateManager from codai.models.templates import AgenticTemplateManager
template_manager = AgenticTemplateManager(request.model) template_manager = AgenticTemplateManager(request.model)
# Get the current system prompt if exists # Get the current system prompt if exists
...@@ -6471,6 +6473,11 @@ def parse_args(): ...@@ -6471,6 +6473,11 @@ def parse_args():
choices=["auto", "litellm"], choices=["auto", "litellm"],
help="Tool call parser to use: 'auto' for internal parser, 'litellm' for LiteLLM's parser. Default: auto", help="Tool call parser to use: 'auto' for internal parser, 'litellm' for LiteLLM's parser. Default: auto",
) )
parser.add_argument(
"--force-reasoning",
action="store_true",
help="Force reasoning/thinking mode for models that support it (e.g., Qwen3, DeepSeek R1). Enables extraction of reasoning content.",
)
return parser.parse_args() return parser.parse_args()
def main(): def main():
"""Main entry point.""" """Main entry point."""
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment