Commit ca6f9841 authored by Your Name's avatar Your Name

Make 'raw' mutually exclusive with 'prompt' and 'inject'

When 'raw' is used, skip the 'prompt', 'inject', and 'stop' handlers
since raw mode handles everything separately. This was causing
double assistant headers and corrupted prompts.
parent 750d433f
...@@ -1946,7 +1946,8 @@ async def chat_completions(request: ChatCompletionRequest, http_request: Request ...@@ -1946,7 +1946,8 @@ async def chat_completions(request: ChatCompletionRequest, http_request: Request
print(f"CHAT: Reasoning API param enabled") print(f"CHAT: Reasoning API param enabled")
# Handle 'inject' - system prompt injection # Handle 'inject' - system prompt injection
if "inject" in force_reasoning_args: # Skip for 'raw' mode since it handles everything separately
if "raw" not in force_reasoning_args and "inject" in force_reasoning_args:
from codai.models.templates import AgenticTemplateManager from codai.models.templates import AgenticTemplateManager
template_manager = AgenticTemplateManager(request.model) template_manager = AgenticTemplateManager(request.model)
...@@ -1981,7 +1982,8 @@ async def chat_completions(request: ChatCompletionRequest, http_request: Request ...@@ -1981,7 +1982,8 @@ async def chat_completions(request: ChatCompletionRequest, http_request: Request
print(f"--- END SYSTEM PROMPT ---") print(f"--- END SYSTEM PROMPT ---")
# Handle 'prompt' - prompt seeding (ends with thought tag) # Handle 'prompt' - prompt seeding (ends with thought tag)
if "prompt" in force_reasoning_args: # Note: 'prompt' and 'raw' are mutually exclusive - raw bypasses this
if "prompt" in force_reasoning_args and "raw" not in force_reasoning_args:
from codai.models.templates import AgenticTemplateManager from codai.models.templates import AgenticTemplateManager
template_manager = AgenticTemplateManager(request.model) template_manager = AgenticTemplateManager(request.model)
...@@ -2021,7 +2023,8 @@ async def chat_completions(request: ChatCompletionRequest, http_request: Request ...@@ -2021,7 +2023,8 @@ async def chat_completions(request: ChatCompletionRequest, http_request: Request
stop_sequences = list(request.stop) stop_sequences = list(request.stop)
# Handle 'stop' - add reasoning stop tokens (also done for 'inject' and 'prompt') # Handle 'stop' - add reasoning stop tokens (also done for 'inject' and 'prompt')
if "stop" in force_reasoning_args or "inject" in force_reasoning_args or "prompt" in force_reasoning_args: # Skip for 'raw' mode since it handles stop tokens separately
if "raw" not in force_reasoning_args and ("stop" in force_reasoning_args or "inject" in force_reasoning_args or "prompt" in force_reasoning_args):
_, _, additional_stops = get_reasoning_stop_tokens(model_family) _, _, additional_stops = get_reasoning_stop_tokens(model_family)
for stop_token in additional_stops: for stop_token in additional_stops:
if stop_token not in stop_sequences: if stop_token not in stop_sequences:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment