Now it works!

e5494efd · Stefy Lanza (nextime / spora ) · 49e14347 · e5494efd · e5494efd · e5494efd
Commit e5494efd authored Feb 07, 2026 by Stefy Lanza (nextime / spora )
Showing with 574 additions and 303 deletions

handlers.py aisbf/handlers.py +417 -175

providers.py aisbf/providers.py +79 -5

main.py main.py +31 -123

start_proxy.sh start_proxy.sh +2 -0

test_google.sh test_google.sh +45 -0

No files found.
--- a/aisbf/handlers.py
+++ b/aisbf/handlers.py
--- a/aisbf/providers.py
+++ b/aisbf/providers.py
@@ -44,6 +44,8 @@ class BaseProviderHandler:
        self.error_tracking = config.error_tracking[provider_id]
        self.last_request_time = 0
        self.rate_limit = config.providers[provider_id].rate_limit
+        # Add model-level rate limit tracking
+        self.model_last_request_time = {}  # {model_name: timestamp}
    def is_rate_limited(self) -> bool:
        if self.error_tracking['disabled_until'] and self.error_tracking['disabled_until'] > time.time():
@@ -65,6 +67,25 @@ class BaseProviderHandler:
            self.last_request_time = time.time()
+    async def apply_model_rate_limit(self, model: str, rate_limit: Optional[float] = None):
+        """Apply rate limiting for a specific model"""
+        if rate_limit is None:
+            rate_limit = self.rate_limit
+        if rate_limit and rate_limit > 0:
+            current_time = time.time()
+            last_time = self.model_last_request_time.get(model, 0)
+            time_since_last_request = current_time - last_time
+            required_wait = rate_limit - time_since_last_request
+            if required_wait > 0:
+                import logging
+                logger = logging.getLogger(__name__)
+                logger.info(f"Model-level rate limiting: waiting {required_wait:.2f}s for model {model}")
+                await asyncio.sleep(required_wait)
+            self.model_last_request_time[model] = time.time()
    def record_failure(self):
        import logging
        logger = logging.getLogger(__name__)
@@ -188,6 +209,14 @@ class GoogleProviderHandler(BaseProviderHandler):
                logging.info(f"GoogleProviderHandler: Response received: {response}")
                self.record_success()
+                # Dump raw response if AISBF_DEBUG is enabled
+                if AISBF_DEBUG:
+                    logging.info(f"=== RAW GOOGLE RESPONSE ===")
+                    logging.info(f"Raw response type: {type(response)}")
+                    logging.info(f"Raw response: {response}")
+                    logging.info(f"Raw response dir: {dir(response)}")
+                    logging.info(f"=== END RAW GOOGLE RESPONSE ===")
                # Extract content from the nested response structure
                # The response has candidates[0].content.parts
                response_text = ""
@@ -394,6 +423,13 @@ class GoogleProviderHandler(BaseProviderHandler):
                # Pydantic validation might be causing serialization issues
                logging.info(f"GoogleProviderHandler: Returning response dict (no validation)")
                logging.info(f"Response dict keys: {openai_response.keys()}")
+                # Dump final response if AISBF_DEBUG is enabled
+                if AISBF_DEBUG:
+                    logging.info(f"=== FINAL GOOGLE RESPONSE DICT ===")
+                    logging.info(f"Final response: {openai_response}")
+                    logging.info(f"=== END FINAL GOOGLE RESPONSE DICT ===")
                return openai_response
        except Exception as e:
            import logging
@@ -496,10 +532,18 @@ class OpenAIProviderHandler(BaseProviderHandler):
            logging.info(f"OpenAIProviderHandler: Response received: {response}")
            self.record_success()
-            # Return Stream object directly for streaming, otherwise dump to dict
+            # Dump raw response if AISBF_DEBUG is enabled
-            if stream:
+            if AISBF_DEBUG:
-                return response
+                logging.info(f"=== RAW OPENAI RESPONSE ===")
-            return response.model_dump()
+                logging.info(f"Raw response type: {type(response)}")
+                logging.info(f"Raw response: {response}")
+                logging.info(f"=== END RAW OPENAI RESPONSE ===")
+            # Return raw response without any parsing or modification
+            # For streaming: return the Stream object as-is
+            # For non-streaming: return the response object as-is
+            logging.info(f"OpenAIProviderHandler: Returning raw response without parsing")
+            return response
        except Exception as e:
            import logging
            logging.error(f"OpenAIProviderHandler: Error: {str(e)}", exc_info=True)
@@ -554,6 +598,14 @@ class AnthropicProviderHandler(BaseProviderHandler):
            logging.info(f"AnthropicProviderHandler: Response received: {response}")
            self.record_success()
+            # Dump raw response if AISBF_DEBUG is enabled
+            if AISBF_DEBUG:
+                logging.info(f"=== RAW ANTHROPIC RESPONSE ===")
+                logging.info(f"Raw response type: {type(response)}")
+                logging.info(f"Raw response: {response}")
+                logging.info(f"Raw response dir: {dir(response)}")
+                logging.info(f"=== END RAW ANTHROPIC RESPONSE ===")
            logging.info(f"=== ANTHROPIC RESPONSE PARSING START ===")
            logging.info(f"Response type: {type(response)}")
            logging.info(f"Response attributes: {dir(response)}")
@@ -681,6 +733,13 @@ class AnthropicProviderHandler(BaseProviderHandler):
            # Pydantic validation might be causing serialization issues
            logging.info(f"AnthropicProviderHandler: Returning response dict (no validation)")
            logging.info(f"Response dict keys: {openai_response.keys()}")
+            # Dump final response dict if AISBF_DEBUG is enabled
+            if AISBF_DEBUG:
+                logging.info(f"=== FINAL ANTHROPIC RESPONSE DICT ===")
+                logging.info(f"Final response: {openai_response}")
+                logging.info(f"=== END FINAL ANTHROPIC RESPONSE DICT ===")
            return openai_response
        except Exception as e:
            import logging
@@ -818,10 +877,17 @@ class OllamaProviderHandler(BaseProviderHandler):
            logger.info(f"Final response: {response_json}")
            self.record_success()
+            # Dump raw response if AISBF_DEBUG is enabled
+            if AISBF_DEBUG:
+                logging.info(f"=== RAW OLLAMA RESPONSE ===")
+                logging.info(f"Raw response JSON: {response_json}")
+                logging.info(f"=== END RAW OLLAMA RESPONSE ===")
            logger.info(f"=== OllamaProviderHandler.handle_request END ===")
            # Convert Ollama response to OpenAI-style format
-            return {
+            openai_response = {
                "id": f"ollama-{model}-{int(time.time())}",
                "object": "chat.completion",
                "created": int(time.time()),
@@ -840,6 +906,14 @@ class OllamaProviderHandler(BaseProviderHandler):
                    "total_tokens": response_json.get("prompt_eval_count", 0) + response_json.get("eval_count", 0)
                }
            }
+            # Dump final response dict if AISBF_DEBUG is enabled
+            if AISBF_DEBUG:
+                logging.info(f"=== FINAL OLLAMA RESPONSE DICT ===")
+                logging.info(f"Final response: {openai_response}")
+                logging.info(f"=== END FINAL OLLAMA RESPONSE DICT ===")
+            return openai_response
        except Exception as e:
            self.record_failure()
            raise e

--- a/main.py
+++ b/main.py
--- a/start_proxy.sh
+++ b/start_proxy.sh
@@ -21,6 +21,8 @@
 # AISBF - AI Service Broker Framework || AI Should Be Free
 # DEVELOPMENT START SCRIPT - For development use only
 # For production use, install with: python setup.py install
+export AISBF_DEBUG=true
 # Get the directory where this script is located
 SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"

--- a/test_google.sh
+++ b/test_google.sh
+#!/bin/bash
+# Copyright (C) 2026 Stefy Lanza <stefy@nexlab.net>
+#
+# Test script for AISBF proxy
+#
+PROXY_URL="http://127.0.0.1:17765"
+# Test 1: Streaming request to rotations endpoint with googletest model
+echo "Test 1: Streaming request to rotations endpoint with googletest model"
+echo "----------------------------------------"
+echo "Note: Streaming responses will appear as data: lines"
+echo ""
+curl -X POST "${PROXY_URL}/api/rotations/chat/completions" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "googletest",
+    "messages": [
+      {"role": "user", "content": "Hello, how are you?"}
+    ],
+    "stream": true
+  }' \
+  2>/dev/null
+echo ""
+echo ""
+# Test 2: Streaming request to rotations endpoint with kilotest model
+echo "Test 2: Streaming request to rotations endpoint with kilotest model"
+echo "----------------------------------------"
+echo "Note: Streaming responses will appear as data: lines"
+echo ""
+curl -X POST "${PROXY_URL}/api/rotations/chat/completions" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "kilotest",
+    "messages": [
+      {"role": "user", "content": "Hello, how are you?"}
+    ],
+    "stream": true
+  }' \
+  2>/dev/null
+echo ""
+echo ""