Now it works!

e5494efd · Stefy Lanza (nextime / spora ) · 49e14347 · e5494efd · e5494efd · e5494efd
Commit e5494efd authored Feb 07, 2026 by Stefy Lanza (nextime / spora )
Showing with 574 additions and 303 deletions

handlers.py aisbf/handlers.py +417 -175

providers.py aisbf/providers.py +79 -5

main.py main.py +31 -123

start_proxy.sh start_proxy.sh +2 -0

test_google.sh test_google.sh +45 -0

No files found.
--- a/aisbf/handlers.py
+++ b/aisbf/handlers.py
--- a/aisbf/providers.py
+++ b/aisbf/providers.py
@@ -44,6 +44,8 @@ class BaseProviderHandler:
        self.error_tracking = config.error_tracking[provider_id]
        self.last_request_time = 0
        self.rate_limit = config.providers[provider_id].rate_limit
+        # Add model-level rate limit tracking
+        self.model_last_request_time = {}  # {model_name: timestamp}
    def is_rate_limited(self) -> bool:
        if self.error_tracking['disabled_until'] and self.error_tracking['disabled_until'] > time.time():
@@ -65,6 +67,25 @@ class BaseProviderHandler:
            self.last_request_time = time.time()
+    async def apply_model_rate_limit(self, model: str, rate_limit: Optional[float] = None):
+        """Apply rate limiting for a specific model"""
+        if rate_limit is None:
+            rate_limit = self.rate_limit
+        if rate_limit and rate_limit > 0:
+            current_time = time.time()
+            last_time = self.model_last_request_time.get(model, 0)
+            time_since_last_request = current_time - last_time
+            required_wait = rate_limit - time_since_last_request
+            if required_wait > 0:
+                import logging
+                logger = logging.getLogger(__name__)
+                logger.info(f"Model-level rate limiting: waiting {required_wait:.2f}s for model {model}")
+                await asyncio.sleep(required_wait)
+            self.model_last_request_time[model] = time.time()
    def record_failure(self):
        import logging
        logger = logging.getLogger(__name__)
@@ -188,6 +209,14 @@ class GoogleProviderHandler(BaseProviderHandler):
                logging.info(f"GoogleProviderHandler: Response received: {response}")
                self.record_success()
+                # Dump raw response if AISBF_DEBUG is enabled
+                if AISBF_DEBUG:
+                    logging.info(f"=== RAW GOOGLE RESPONSE ===")
+                    logging.info(f"Raw response type: {type(response)}")
+                    logging.info(f"Raw response: {response}")
+                    logging.info(f"Raw response dir: {dir(response)}")
+                    logging.info(f"=== END RAW GOOGLE RESPONSE ===")
                # Extract content from the nested response structure
                # The response has candidates[0].content.parts
                response_text = ""
@@ -394,6 +423,13 @@ class GoogleProviderHandler(BaseProviderHandler):
                # Pydantic validation might be causing serialization issues
                logging.info(f"GoogleProviderHandler: Returning response dict (no validation)")
                logging.info(f"Response dict keys: {openai_response.keys()}")
+                # Dump final response if AISBF_DEBUG is enabled
+                if AISBF_DEBUG:
+                    logging.info(f"=== FINAL GOOGLE RESPONSE DICT ===")
+                    logging.info(f"Final response: {openai_response}")
+                    logging.info(f"=== END FINAL GOOGLE RESPONSE DICT ===")
                return openai_response
        except Exception as e:
            import logging
@@ -496,10 +532,18 @@ class OpenAIProviderHandler(BaseProviderHandler):
            logging.info(f"OpenAIProviderHandler: Response received: {response}")
            self.record_success()
-            # Return Stream object directly for streaming, otherwise dump to dict
+            # Dump raw response if AISBF_DEBUG is enabled
-            if stream:
+            if AISBF_DEBUG:
+                logging.info(f"=== RAW OPENAI RESPONSE ===")
+                logging.info(f"Raw response type: {type(response)}")
+                logging.info(f"Raw response: {response}")
+                logging.info(f"=== END RAW OPENAI RESPONSE ===")
+            # Return raw response without any parsing or modification
+            # For streaming: return the Stream object as-is
+            # For non-streaming: return the response object as-is
+            logging.info(f"OpenAIProviderHandler: Returning raw response without parsing")
            return response
-            return response.model_dump()
        except Exception as e:
            import logging
            logging.error(f"OpenAIProviderHandler: Error: {str(e)}", exc_info=True)
@@ -554,6 +598,14 @@ class AnthropicProviderHandler(BaseProviderHandler):
            logging.info(f"AnthropicProviderHandler: Response received: {response}")
            self.record_success()
+            # Dump raw response if AISBF_DEBUG is enabled
+            if AISBF_DEBUG:
+                logging.info(f"=== RAW ANTHROPIC RESPONSE ===")
+                logging.info(f"Raw response type: {type(response)}")
+                logging.info(f"Raw response: {response}")
+                logging.info(f"Raw response dir: {dir(response)}")
+                logging.info(f"=== END RAW ANTHROPIC RESPONSE ===")
            logging.info(f"=== ANTHROPIC RESPONSE PARSING START ===")
            logging.info(f"Response type: {type(response)}")
            logging.info(f"Response attributes: {dir(response)}")
@@ -681,6 +733,13 @@ class AnthropicProviderHandler(BaseProviderHandler):
            # Pydantic validation might be causing serialization issues
            logging.info(f"AnthropicProviderHandler: Returning response dict (no validation)")
            logging.info(f"Response dict keys: {openai_response.keys()}")
+            # Dump final response dict if AISBF_DEBUG is enabled
+            if AISBF_DEBUG:
+                logging.info(f"=== FINAL ANTHROPIC RESPONSE DICT ===")
+                logging.info(f"Final response: {openai_response}")
+                logging.info(f"=== END FINAL ANTHROPIC RESPONSE DICT ===")
            return openai_response
        except Exception as e:
            import logging
@@ -818,10 +877,17 @@ class OllamaProviderHandler(BaseProviderHandler):
            logger.info(f"Final response: {response_json}")
            self.record_success()
+            # Dump raw response if AISBF_DEBUG is enabled
+            if AISBF_DEBUG:
+                logging.info(f"=== RAW OLLAMA RESPONSE ===")
+                logging.info(f"Raw response JSON: {response_json}")
+                logging.info(f"=== END RAW OLLAMA RESPONSE ===")
            logger.info(f"=== OllamaProviderHandler.handle_request END ===")
            # Convert Ollama response to OpenAI-style format
-            return {
+            openai_response = {
                "id": f"ollama-{model}-{int(time.time())}",
                "object": "chat.completion",
                "created": int(time.time()),
@@ -840,6 +906,14 @@ class OllamaProviderHandler(BaseProviderHandler):
                    "total_tokens": response_json.get("prompt_eval_count", 0) + response_json.get("eval_count", 0)
                }
            }
+            # Dump final response dict if AISBF_DEBUG is enabled
+            if AISBF_DEBUG:
+                logging.info(f"=== FINAL OLLAMA RESPONSE DICT ===")
+                logging.info(f"Final response: {openai_response}")
+                logging.info(f"=== END FINAL OLLAMA RESPONSE DICT ===")
+            return openai_response
        except Exception as e:
            self.record_failure()
            raise e

--- a/main.py
+++ b/main.py
@@ -51,6 +51,9 @@ def setup_logging():
    # Create log directory if it doesn't exist
    log_dir.mkdir(parents=True, exist_ok=True)
+    # Check if debug mode is enabled
+    AISBF_DEBUG = os.environ.get('AISBF_DEBUG', '').lower() in ('true', '1', 'yes')
    # Setup rotating file handler for general logs
    log_file = log_dir / 'aisbf.log'
    file_handler = RotatingFileHandler(
@@ -76,8 +79,15 @@ def setup_logging():
    error_handler.setLevel(logging.ERROR)
    error_handler.setFormatter(file_formatter)
-    # Setup console handler
+    # Setup console handler - use DEBUG level if AISBF_DEBUG is enabled
    console_handler = logging.StreamHandler(sys.stdout)
+    if AISBF_DEBUG:
+        console_handler.setLevel(logging.DEBUG)
+        print("=== AISBF DEBUG MODE ENABLED ===")
+        print("All debug messages will be shown in console")
+        print("Raw responses from providers will be logged")
+        print("=== END AISBF DEBUG MODE ===")
+    else:
        console_handler.setLevel(logging.INFO)
    console_formatter = logging.Formatter(
        '%(asctime)s - %(levelname)s - %(message)s'
@@ -115,7 +125,7 @@ async def validation_exception_handler(request: Request, exc: RequestValidationE
    print(f"Request method: {request.method}")
    print(f"Request headers: {dict(request.headers)}")
-    # Try to get the raw body
+    # Try to get raw body
    try:
        raw_body = await request.body()
        print(f"Raw request body: {raw_body.decode('utf-8')}")
@@ -130,7 +140,7 @@ async def validation_exception_handler(request: Request, exc: RequestValidationE
    logger.error(f"Request method: {request.method}")
    logger.error(f"Request headers: {dict(request.headers)}")
-    # Try to get the raw body
+    # Try to get raw body
    try:
        raw_body = await request.body()
        logger.error(f"Raw request body: {raw_body.decode('utf-8')}")
@@ -187,7 +197,12 @@ async def list_rotations():
 @app.post("/api/rotations/chat/completions")
 async def rotation_chat_completions(request: Request, body: ChatCompletionRequest):
-    """Handle chat completions for rotations using model name to select rotation"""
+    """
+    Handle chat completions for rotations using model name to select rotation.
+    The RotationHandler handles streaming internally based on the selected
+    provider's type (google vs others), so we just pass through the response.
+    """
    logger.info(f"=== ROTATION CHAT COMPLETION REQUEST START ===")
    logger.info(f"Request path: {request.url.path}")
    logger.info(f"Model requested: {body.model}")
@@ -210,117 +225,10 @@ async def rotation_chat_completions(request: Request, body: ChatCompletionReques
    logger.debug("Handling rotation request")
    try:
-        if body.stream:
+        # The rotation handler handles streaming internally and returns
-            logger.debug("Handling streaming rotation request")
+        # a StreamingResponse for streaming requests or a dict for non-streaming
-            rotation_config = config.get_rotation(body.model)
-            if not rotation_config:
-                raise HTTPException(status_code=400, detail=f"Rotation {body.model} not found")
-            # Check if this is a Google streaming response
-            async def stream_generator():
-                import time  # Import time module
-                try:
-                    response = await rotation_handler.handle_rotation_request(body.model, body_dict)
-                    # Check if this is a generator (sync iterator) response
-                    if hasattr(response, '__iter__') and not hasattr(response, '__aiter__'):
-                        logger.debug("Handling synchronous generator stream response")
-                        # This is likely a Google streaming response
-                        chunk_id = 0
-                        for chunk in response:
-                            try:
-                                logger.debug(f"Chunk type: {type(chunk)}")
-                                logger.debug(f"Chunk: {chunk}")
-                                # Extract text from Google chunk
-                                chunk_text = ""
-                                try:
-                                    if hasattr(chunk, 'candidates') and chunk.candidates:
-                                        candidate = chunk.candidates[0] if chunk.candidates else None
-                                        if candidate and hasattr(candidate, 'content') and candidate.content:
-                                            if hasattr(candidate.content, 'parts') and candidate.content.parts:
-                                                for part in candidate.content.parts:
-                                                    if hasattr(part, 'text') and part.text:
-                                                        chunk_text += part.text
-                                except Exception as e:
-                                    logger.error(f"Error extracting text from chunk: {e}")
-                                # Create OpenAI-compatible chunk
-                                openai_chunk = {
-                                    "id": f"google-{body.model}-{int(time.time())}-chunk-{chunk_id}",
-                                    "object": "chat.completion.chunk",
-                                    "created": int(time.time()),
-                                    "model": body.model,
-                                    "choices": [{
-                                        "index": 0,
-                                        "delta": {
-                                            "content": chunk_text
-                                        },
-                                        "finish_reason": None
-                                    }]
-                                }
-                                chunk_id += 1
-                                logger.debug(f"OpenAI chunk: {openai_chunk}")
-                                import json
-                                yield f"data: {json.dumps(openai_chunk)}\n\n".encode('utf-8')
-                            except Exception as chunk_error:
-                                logger.error(f"Error processing chunk: {str(chunk_error)}")
-                                continue
-                    elif hasattr(response, '__aiter__'):
-                       # Handle OpenAI/Anthropic streaming responses (async iterators)
-                       chunk_id = 0
-                       async for chunk in response:
-                           try:
-                               # Extract text from Google chunk
-                               chunk_text = ""
-                               try:
-                                   if hasattr(chunk, 'candidates') and chunk.candidates:
-                                       candidate = chunk.candidates[0] if chunk.candidates else None
-                                       if candidate and hasattr(candidate, 'content') and candidate.content:
-                                           if hasattr(candidate.content, 'parts') and candidate.content.parts:
-                                               for part in candidate.content.parts:
-                                                   if hasattr(part, 'text') and part.text:
-                                                       chunk_text += part.text
-                               except Exception as e:
-                                   logger.error(f"Error extracting text from chunk: {e}")
-                               # Create OpenAI-compatible chunk
-                               chunk_dict = {
-                                   "id": f"google-{body.model}-{int(time.time())}-chunk-{chunk_id}",
-                                   "object": "chat.completion.chunk",
-                                   "created": int(time.time()),
-                                   "model": body.model,
-                                   "choices": [{
-                                       "index": 0,
-                                       "delta": {
-                                           "content": chunk_text
-                                       },
-                                       "finish_reason": None
-                                   }]
-                               }
-                               chunk_id += 1
-                               import json
-                               yield f"data: {json.dumps(chunk_dict)}\n\n".encode('utf-8')
-                           except Exception as chunk_error:
-                               logger.warning(f"Error serializing chunk: {str(chunk_error)}")
-                               continue
-                    else:
-                        # Handle other types of responses
-                        logger.warning(f"Unknown response type: {type(response)}")
-                        import json
-                        yield f"data: {json.dumps({'error': 'Unknown response type'})}\n\n".encode('utf-8')
-                except Exception as e:
-                    logger.error(f"Error in streaming response: {str(e)}")
-                    import json
-                    yield f"data: {json.dumps({'error': str(e)})}\n\n".encode('utf-8')
-            return StreamingResponse(stream_generator(), media_type="text/event-stream")
-        else:
-            logger.debug("Handling non-streaming rotation request")
        result = await rotation_handler.handle_rotation_request(body.model, body_dict)
-            logger.debug(f"Rotation response result: {result}")
+        logger.debug(f"Rotation response result type: {type(result)}")
        return result
    except Exception as e:
        logger.error(f"Error handling rotation chat_completions: {str(e)}", exc_info=True)

--- a/start_proxy.sh
+++ b/start_proxy.sh
@@ -21,6 +21,8 @@
 # AISBF - AI Service Broker Framework || AI Should Be Free
 # DEVELOPMENT START SCRIPT - For development use only
 # For production use, install with: python setup.py install
+export AISBF_DEBUG=true
 # Get the directory where this script is located
 SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"

--- a/test_google.sh
+++ b/test_google.sh
+#!/bin/bash
+# Copyright (C) 2026 Stefy Lanza <stefy@nexlab.net>
+#
+# Test script for AISBF proxy
+#
+PROXY_URL="http://127.0.0.1:17765"
+# Test 1: Streaming request to rotations endpoint with googletest model
+echo "Test 1: Streaming request to rotations endpoint with googletest model"
+echo "----------------------------------------"
+echo "Note: Streaming responses will appear as data: lines"
+echo ""
+curl -X POST "${PROXY_URL}/api/rotations/chat/completions" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "googletest",
+    "messages": [
+      {"role": "user", "content": "Hello, how are you?"}
+    ],
+    "stream": true
+  }' \
+  2>/dev/null
+echo ""
+echo ""
+# Test 2: Streaming request to rotations endpoint with kilotest model
+echo "Test 2: Streaming request to rotations endpoint with kilotest model"
+echo "----------------------------------------"
+echo "Note: Streaming responses will appear as data: lines"
+echo ""
+curl -X POST "${PROXY_URL}/api/rotations/chat/completions" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "kilotest",
+    "messages": [
+      {"role": "user", "content": "Hello, how are you?"}
+    ],
+    "stream": true
+  }' \
+  2>/dev/null
+echo ""
+echo ""