Add support for tools and tool_choice with retry on tool call errors

- Add tools and tool_choice fields to ChatCompletionRequest model
- Update OpenAIProviderHandler to accept and pass tools/tool_choice parameters
- Update handlers to pass tools/tool_choice from request to provider
- Treat tool call errors during streaming as provider failures
- Record failure and re-raise to trigger retry with next model in rotation
- Allows proper tool/function calling support through the proxy
- Resolves 'Tool choice is none, but model called a tool' error by retrying with another model
parent 9840590a
......@@ -83,7 +83,9 @@ class RequestHandler:
messages=request_data['messages'],
max_tokens=request_data.get('max_tokens'),
temperature=request_data.get('temperature', 1.0),
stream=request_data.get('stream', False)
stream=request_data.get('stream', False),
tools=request_data.get('tools'),
tool_choice=request_data.get('tool_choice')
)
logger.info(f"Response received from provider")
handler.record_success()
......@@ -120,7 +122,9 @@ class RequestHandler:
messages=request_data['messages'],
max_tokens=request_data.get('max_tokens'),
temperature=request_data.get('temperature', 1.0),
stream=True
stream=True,
tools=request_data.get('tools'),
tool_choice=request_data.get('tool_choice')
)
for chunk in response:
try:
......@@ -134,11 +138,23 @@ class RequestHandler:
yield f"data: {json.dumps(chunk_dict)}\n\n".encode('utf-8')
except Exception as chunk_error:
# Handle errors during chunk serialization (e.g., tool calls without tool_choice)
logger.warning(f"Error serializing chunk: {str(chunk_error)}")
logger.warning(f"Chunk type: {type(chunk)}")
logger.warning(f"Chunk content: {chunk}")
# Skip this chunk and continue with the next one
continue
# This is a critical error - the model is trying to call tools without proper configuration
# We should treat this as a provider failure
error_msg = str(chunk_error)
if "tool" in error_msg.lower():
logger.error(f"Tool call error during streaming: {error_msg}")
logger.error(f"Chunk type: {type(chunk)}")
logger.error(f"Chunk content: {chunk}")
# Record this as a provider failure
handler.record_failure()
# Re-raise to trigger retry in rotation handler
raise
else:
logger.warning(f"Error serializing chunk: {error_msg}")
logger.warning(f"Chunk type: {type(chunk)}")
logger.warning(f"Chunk content: {chunk}")
# Skip this chunk and continue with the next one
continue
handler.record_success()
except Exception as e:
handler.record_failure()
......@@ -348,7 +364,9 @@ class RotationHandler:
messages=request_data['messages'],
max_tokens=request_data.get('max_tokens'),
temperature=request_data.get('temperature', 1.0),
stream=request_data.get('stream', False)
stream=request_data.get('stream', False),
tools=request_data.get('tools'),
tool_choice=request_data.get('tool_choice')
)
logger.info(f"Response received from provider")
handler.record_success()
......@@ -675,11 +693,20 @@ class AutoselectHandler:
yield f"data: {json.dumps(chunk_dict)}\n\n".encode('utf-8')
except Exception as chunk_error:
# Handle errors during chunk serialization (e.g., tool calls without tool_choice)
logger.warning(f"Error serializing chunk: {str(chunk_error)}")
logger.warning(f"Chunk type: {type(chunk)}")
logger.warning(f"Chunk content: {chunk}")
# Skip this chunk and continue with the next one
continue
# This is a critical error - the model is trying to call tools without proper configuration
error_msg = str(chunk_error)
if "tool" in error_msg.lower():
logger.error(f"Tool call error during streaming: {error_msg}")
logger.error(f"Chunk type: {type(chunk)}")
logger.error(f"Chunk content: {chunk}")
# Re-raise to trigger retry in rotation handler
raise
else:
logger.warning(f"Error serializing chunk: {error_msg}")
logger.warning(f"Chunk type: {type(chunk)}")
logger.warning(f"Chunk content: {chunk}")
# Skip this chunk and continue with the next one
continue
except Exception as e:
logger.error(f"Error in streaming response: {str(e)}")
import json
......
......@@ -36,6 +36,8 @@ class ChatCompletionRequest(BaseModel):
max_tokens: Optional[int] = None
temperature: Optional[float] = 1.0
stream: Optional[bool] = False
tools: Optional[List[Dict]] = None
tool_choice: Optional[Union[str, Dict]] = None
class ChatCompletionResponse(BaseModel):
id: str
......
......@@ -204,7 +204,8 @@ class OpenAIProviderHandler(BaseProviderHandler):
self.client = OpenAI(base_url=config.providers[provider_id].endpoint, api_key=api_key)
async def handle_request(self, model: str, messages: List[Dict], max_tokens: Optional[int] = None,
temperature: Optional[float] = 1.0, stream: Optional[bool] = False) -> Union[Dict, object]:
temperature: Optional[float] = 1.0, stream: Optional[bool] = False,
tools: Optional[List[Dict]] = None, tool_choice: Optional[Union[str, Dict]] = None) -> Union[Dict, object]:
if self.is_rate_limited():
raise Exception("Provider rate limited")
......@@ -212,17 +213,28 @@ class OpenAIProviderHandler(BaseProviderHandler):
import logging
logging.info(f"OpenAIProviderHandler: Handling request for model {model}")
logging.info(f"OpenAIProviderHandler: Messages: {messages}")
logging.info(f"OpenAIProviderHandler: Tools: {tools}")
logging.info(f"OpenAIProviderHandler: Tool choice: {tool_choice}")
# Apply rate limiting
await self.apply_rate_limit()
response = self.client.chat.completions.create(
model=model,
messages=[{"role": msg["role"], "content": msg["content"]} for msg in messages],
max_tokens=max_tokens,
temperature=temperature,
stream=stream
)
# Build request parameters
request_params = {
"model": model,
"messages": [{"role": msg["role"], "content": msg["content"]} for msg in messages],
"max_tokens": max_tokens,
"temperature": temperature,
"stream": stream
}
# Add tools and tool_choice if provided
if tools is not None:
request_params["tools"] = tools
if tool_choice is not None:
request_params["tool_choice"] = tool_choice
response = self.client.chat.completions.create(**request_params)
logging.info(f"OpenAIProviderHandler: Response received: {response}")
self.record_success()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment