Add support for tools and tool_choice with retry on tool call errors

- Add tools and tool_choice fields to ChatCompletionRequest model - Update OpenAIProviderHandler to accept and pass tools/tool_choice parameters - Update handlers to pass tools/tool_choice from request to provider - Treat tool call errors during streaming as provider failures - Record failure and re-raise to trigger retry with next model in rotation - Allows proper tool/function calling support through the proxy - Resolves 'Tool choice is none, but model called a tool' error by retrying with another model

Add support for tools and tool_choice with retry on tool call errors
- Add tools and tool_choice fields to ChatCompletionRequest model - Update OpenAIProviderHandler to accept and pass tools/tool_choice parameters - Update handlers to pass tools/tool_choice from request to provider - Treat tool call errors during streaming as provider failures - Record failure and re-raise to trigger retry with next model in rotation - Allows proper tool/function calling support through the proxy - Resolves 'Tool choice is none, but model called a tool' error by retrying with another model
e4148fcf · Stefy Lanza (nextime / spora ) · 9840590a · e4148fcf · e4148fcf · e4148fcf
Commit e4148fcf authored Feb 06, 2026 by Stefy Lanza (nextime / spora )
Hide whitespace changes
Inline Side-by-side

Showing with 62 additions and 21 deletions

handlers.py aisbf/handlers.py +40 -13

models.py aisbf/models.py +2 -0

providers.py aisbf/providers.py +20 -8

No files found.
--- a/aisbf/handlers.py
+++ b/aisbf/handlers.py
@@ -83,7 +83,9 @@ class RequestHandler:
                messages=request_data['messages'],
                max_tokens=request_data.get('max_tokens'),
                temperature=request_data.get('temperature', 1.0),
-                stream=request_data.get('stream', False)
+                stream=request_data.get('stream', False),
+                tools=request_data.get('tools'),
+                tool_choice=request_data.get('tool_choice')
            )
            logger.info(f"Response received from provider")
            handler.record_success()
@@ -120,7 +122,9 @@ class RequestHandler:
                    messages=request_data['messages'],
                    max_tokens=request_data.get('max_tokens'),
                    temperature=request_data.get('temperature', 1.0),
-                    stream=True
+                    stream=True,
+                    tools=request_data.get('tools'),
+                    tool_choice=request_data.get('tool_choice')
                )
                for chunk in response:
                    try:
@@ -134,11 +138,23 @@ class RequestHandler:
                        yield f"data: {json.dumps(chunk_dict)}\n\n".encode('utf-8')
                    except Exception as chunk_error:
                        # Handle errors during chunk serialization (e.g., tool calls without tool_choice)
-                        logger.warning(f"Error serializing chunk: {str(chunk_error)}")
-                        logger.warning(f"Chunk type: {type(chunk)}")
-                        logger.warning(f"Chunk content: {chunk}")
-                        # Skip this chunk and continue with the next one
-                        continue
+                        # This is a critical error - the model is trying to call tools without proper configuration
+                        # We should treat this as a provider failure
+                        error_msg = str(chunk_error)
+                        if "tool" in error_msg.lower():
+                            logger.error(f"Tool call error during streaming: {error_msg}")
+                            logger.error(f"Chunk type: {type(chunk)}")
+                            logger.error(f"Chunk content: {chunk}")
+                            # Record this as a provider failure
+                            handler.record_failure()
+                            # Re-raise to trigger retry in rotation handler
+                            raise
+                        else:
+                            logger.warning(f"Error serializing chunk: {error_msg}")
+                            logger.warning(f"Chunk type: {type(chunk)}")
+                            logger.warning(f"Chunk content: {chunk}")
+                            # Skip this chunk and continue with the next one
+                            continue
                handler.record_success()
            except Exception as e:
                handler.record_failure()
@@ -348,7 +364,9 @@ class RotationHandler:
                    messages=request_data['messages'],
                    max_tokens=request_data.get('max_tokens'),
                    temperature=request_data.get('temperature', 1.0),
-                    stream=request_data.get('stream', False)
+                    stream=request_data.get('stream', False),
+                    tools=request_data.get('tools'),
+                    tool_choice=request_data.get('tool_choice')
                )
                logger.info(f"Response received from provider")
                handler.record_success()
@@ -675,11 +693,20 @@ class AutoselectHandler:
                        yield f"data: {json.dumps(chunk_dict)}\n\n".encode('utf-8')
                    except Exception as chunk_error:
                        # Handle errors during chunk serialization (e.g., tool calls without tool_choice)
-                        logger.warning(f"Error serializing chunk: {str(chunk_error)}")
-                        logger.warning(f"Chunk type: {type(chunk)}")
-                        logger.warning(f"Chunk content: {chunk}")
-                        # Skip this chunk and continue with the next one
-                        continue
+                        # This is a critical error - the model is trying to call tools without proper configuration
+                        error_msg = str(chunk_error)
+                        if "tool" in error_msg.lower():
+                            logger.error(f"Tool call error during streaming: {error_msg}")
+                            logger.error(f"Chunk type: {type(chunk)}")
+                            logger.error(f"Chunk content: {chunk}")
+                            # Re-raise to trigger retry in rotation handler
+                            raise
+                        else:
+                            logger.warning(f"Error serializing chunk: {error_msg}")
+                            logger.warning(f"Chunk type: {type(chunk)}")
+                            logger.warning(f"Chunk content: {chunk}")
+                            # Skip this chunk and continue with the next one
+                            continue
            except Exception as e:
                logger.error(f"Error in streaming response: {str(e)}")
                import json

--- a/aisbf/models.py
+++ b/aisbf/models.py
@@ -36,6 +36,8 @@ class ChatCompletionRequest(BaseModel):
    max_tokens: Optional[int] = None
    temperature: Optional[float] = 1.0
    stream: Optional[bool] = False
+    tools: Optional[List[Dict]] = None
+    tool_choice: Optional[Union[str, Dict]] = None

 class ChatCompletionResponse(BaseModel):
    id: str

--- a/aisbf/providers.py
+++ b/aisbf/providers.py
@@ -204,7 +204,8 @@ class OpenAIProviderHandler(BaseProviderHandler):
        self.client = OpenAI(base_url=config.providers[provider_id].endpoint, api_key=api_key)

    async def handle_request(self, model: str, messages: List[Dict], max_tokens: Optional[int] = None,
-                           temperature: Optional[float] = 1.0, stream: Optional[bool] = False) -> Union[Dict, object]:
+                           temperature: Optional[float] = 1.0, stream: Optional[bool] = False,
+                           tools: Optional[List[Dict]] = None, tool_choice: Optional[Union[str, Dict]] = None) -> Union[Dict, object]:
        if self.is_rate_limited():
            raise Exception("Provider rate limited")

@@ -212,17 +213,28 @@ class OpenAIProviderHandler(BaseProviderHandler):
            import logging
            logging.info(f"OpenAIProviderHandler: Handling request for model {model}")
            logging.info(f"OpenAIProviderHandler: Messages: {messages}")
+            logging.info(f"OpenAIProviderHandler: Tools: {tools}")
+            logging.info(f"OpenAIProviderHandler: Tool choice: {tool_choice}")

            # Apply rate limiting
            await self.apply_rate_limit()

-            response = self.client.chat.completions.create(
-                model=model,
-                messages=[{"role": msg["role"], "content": msg["content"]} for msg in messages],
-                max_tokens=max_tokens,
-                temperature=temperature,
-                stream=stream
-            )
+            # Build request parameters
+            request_params = {
+                "model": model,
+                "messages": [{"role": msg["role"], "content": msg["content"]} for msg in messages],
+                "max_tokens": max_tokens,
+                "temperature": temperature,
+                "stream": stream
+            }
+            
+            # Add tools and tool_choice if provided
+            if tools is not None:
+                request_params["tools"] = tools
+            if tool_choice is not None:
+                request_params["tool_choice"] = tool_choice
+
+            response = self.client.chat.completions.create(**request_params)
            logging.info(f"OpenAIProviderHandler: Response received: {response}")
            self.record_success()