Now it works!

parent 49e14347
This diff is collapsed.
......@@ -44,6 +44,8 @@ class BaseProviderHandler:
self.error_tracking = config.error_tracking[provider_id]
self.last_request_time = 0
self.rate_limit = config.providers[provider_id].rate_limit
# Add model-level rate limit tracking
self.model_last_request_time = {} # {model_name: timestamp}
def is_rate_limited(self) -> bool:
if self.error_tracking['disabled_until'] and self.error_tracking['disabled_until'] > time.time():
......@@ -65,6 +67,25 @@ class BaseProviderHandler:
self.last_request_time = time.time()
async def apply_model_rate_limit(self, model: str, rate_limit: Optional[float] = None):
"""Apply rate limiting for a specific model"""
if rate_limit is None:
rate_limit = self.rate_limit
if rate_limit and rate_limit > 0:
current_time = time.time()
last_time = self.model_last_request_time.get(model, 0)
time_since_last_request = current_time - last_time
required_wait = rate_limit - time_since_last_request
if required_wait > 0:
import logging
logger = logging.getLogger(__name__)
logger.info(f"Model-level rate limiting: waiting {required_wait:.2f}s for model {model}")
await asyncio.sleep(required_wait)
self.model_last_request_time[model] = time.time()
def record_failure(self):
import logging
logger = logging.getLogger(__name__)
......@@ -188,6 +209,14 @@ class GoogleProviderHandler(BaseProviderHandler):
logging.info(f"GoogleProviderHandler: Response received: {response}")
self.record_success()
# Dump raw response if AISBF_DEBUG is enabled
if AISBF_DEBUG:
logging.info(f"=== RAW GOOGLE RESPONSE ===")
logging.info(f"Raw response type: {type(response)}")
logging.info(f"Raw response: {response}")
logging.info(f"Raw response dir: {dir(response)}")
logging.info(f"=== END RAW GOOGLE RESPONSE ===")
# Extract content from the nested response structure
# The response has candidates[0].content.parts
response_text = ""
......@@ -394,6 +423,13 @@ class GoogleProviderHandler(BaseProviderHandler):
# Pydantic validation might be causing serialization issues
logging.info(f"GoogleProviderHandler: Returning response dict (no validation)")
logging.info(f"Response dict keys: {openai_response.keys()}")
# Dump final response if AISBF_DEBUG is enabled
if AISBF_DEBUG:
logging.info(f"=== FINAL GOOGLE RESPONSE DICT ===")
logging.info(f"Final response: {openai_response}")
logging.info(f"=== END FINAL GOOGLE RESPONSE DICT ===")
return openai_response
except Exception as e:
import logging
......@@ -496,10 +532,18 @@ class OpenAIProviderHandler(BaseProviderHandler):
logging.info(f"OpenAIProviderHandler: Response received: {response}")
self.record_success()
# Return Stream object directly for streaming, otherwise dump to dict
if stream:
# Dump raw response if AISBF_DEBUG is enabled
if AISBF_DEBUG:
logging.info(f"=== RAW OPENAI RESPONSE ===")
logging.info(f"Raw response type: {type(response)}")
logging.info(f"Raw response: {response}")
logging.info(f"=== END RAW OPENAI RESPONSE ===")
# Return raw response without any parsing or modification
# For streaming: return the Stream object as-is
# For non-streaming: return the response object as-is
logging.info(f"OpenAIProviderHandler: Returning raw response without parsing")
return response
return response.model_dump()
except Exception as e:
import logging
logging.error(f"OpenAIProviderHandler: Error: {str(e)}", exc_info=True)
......@@ -554,6 +598,14 @@ class AnthropicProviderHandler(BaseProviderHandler):
logging.info(f"AnthropicProviderHandler: Response received: {response}")
self.record_success()
# Dump raw response if AISBF_DEBUG is enabled
if AISBF_DEBUG:
logging.info(f"=== RAW ANTHROPIC RESPONSE ===")
logging.info(f"Raw response type: {type(response)}")
logging.info(f"Raw response: {response}")
logging.info(f"Raw response dir: {dir(response)}")
logging.info(f"=== END RAW ANTHROPIC RESPONSE ===")
logging.info(f"=== ANTHROPIC RESPONSE PARSING START ===")
logging.info(f"Response type: {type(response)}")
logging.info(f"Response attributes: {dir(response)}")
......@@ -681,6 +733,13 @@ class AnthropicProviderHandler(BaseProviderHandler):
# Pydantic validation might be causing serialization issues
logging.info(f"AnthropicProviderHandler: Returning response dict (no validation)")
logging.info(f"Response dict keys: {openai_response.keys()}")
# Dump final response dict if AISBF_DEBUG is enabled
if AISBF_DEBUG:
logging.info(f"=== FINAL ANTHROPIC RESPONSE DICT ===")
logging.info(f"Final response: {openai_response}")
logging.info(f"=== END FINAL ANTHROPIC RESPONSE DICT ===")
return openai_response
except Exception as e:
import logging
......@@ -818,10 +877,17 @@ class OllamaProviderHandler(BaseProviderHandler):
logger.info(f"Final response: {response_json}")
self.record_success()
# Dump raw response if AISBF_DEBUG is enabled
if AISBF_DEBUG:
logging.info(f"=== RAW OLLAMA RESPONSE ===")
logging.info(f"Raw response JSON: {response_json}")
logging.info(f"=== END RAW OLLAMA RESPONSE ===")
logger.info(f"=== OllamaProviderHandler.handle_request END ===")
# Convert Ollama response to OpenAI-style format
return {
openai_response = {
"id": f"ollama-{model}-{int(time.time())}",
"object": "chat.completion",
"created": int(time.time()),
......@@ -840,6 +906,14 @@ class OllamaProviderHandler(BaseProviderHandler):
"total_tokens": response_json.get("prompt_eval_count", 0) + response_json.get("eval_count", 0)
}
}
# Dump final response dict if AISBF_DEBUG is enabled
if AISBF_DEBUG:
logging.info(f"=== FINAL OLLAMA RESPONSE DICT ===")
logging.info(f"Final response: {openai_response}")
logging.info(f"=== END FINAL OLLAMA RESPONSE DICT ===")
return openai_response
except Exception as e:
self.record_failure()
raise e
......
......@@ -51,6 +51,9 @@ def setup_logging():
# Create log directory if it doesn't exist
log_dir.mkdir(parents=True, exist_ok=True)
# Check if debug mode is enabled
AISBF_DEBUG = os.environ.get('AISBF_DEBUG', '').lower() in ('true', '1', 'yes')
# Setup rotating file handler for general logs
log_file = log_dir / 'aisbf.log'
file_handler = RotatingFileHandler(
......@@ -76,8 +79,15 @@ def setup_logging():
error_handler.setLevel(logging.ERROR)
error_handler.setFormatter(file_formatter)
# Setup console handler
# Setup console handler - use DEBUG level if AISBF_DEBUG is enabled
console_handler = logging.StreamHandler(sys.stdout)
if AISBF_DEBUG:
console_handler.setLevel(logging.DEBUG)
print("=== AISBF DEBUG MODE ENABLED ===")
print("All debug messages will be shown in console")
print("Raw responses from providers will be logged")
print("=== END AISBF DEBUG MODE ===")
else:
console_handler.setLevel(logging.INFO)
console_formatter = logging.Formatter(
'%(asctime)s - %(levelname)s - %(message)s'
......@@ -115,7 +125,7 @@ async def validation_exception_handler(request: Request, exc: RequestValidationE
print(f"Request method: {request.method}")
print(f"Request headers: {dict(request.headers)}")
# Try to get the raw body
# Try to get raw body
try:
raw_body = await request.body()
print(f"Raw request body: {raw_body.decode('utf-8')}")
......@@ -130,7 +140,7 @@ async def validation_exception_handler(request: Request, exc: RequestValidationE
logger.error(f"Request method: {request.method}")
logger.error(f"Request headers: {dict(request.headers)}")
# Try to get the raw body
# Try to get raw body
try:
raw_body = await request.body()
logger.error(f"Raw request body: {raw_body.decode('utf-8')}")
......@@ -187,7 +197,12 @@ async def list_rotations():
@app.post("/api/rotations/chat/completions")
async def rotation_chat_completions(request: Request, body: ChatCompletionRequest):
"""Handle chat completions for rotations using model name to select rotation"""
"""
Handle chat completions for rotations using model name to select rotation.
The RotationHandler handles streaming internally based on the selected
provider's type (google vs others), so we just pass through the response.
"""
logger.info(f"=== ROTATION CHAT COMPLETION REQUEST START ===")
logger.info(f"Request path: {request.url.path}")
logger.info(f"Model requested: {body.model}")
......@@ -210,117 +225,10 @@ async def rotation_chat_completions(request: Request, body: ChatCompletionReques
logger.debug("Handling rotation request")
try:
if body.stream:
logger.debug("Handling streaming rotation request")
rotation_config = config.get_rotation(body.model)
if not rotation_config:
raise HTTPException(status_code=400, detail=f"Rotation {body.model} not found")
# Check if this is a Google streaming response
async def stream_generator():
import time # Import time module
try:
response = await rotation_handler.handle_rotation_request(body.model, body_dict)
# Check if this is a generator (sync iterator) response
if hasattr(response, '__iter__') and not hasattr(response, '__aiter__'):
logger.debug("Handling synchronous generator stream response")
# This is likely a Google streaming response
chunk_id = 0
for chunk in response:
try:
logger.debug(f"Chunk type: {type(chunk)}")
logger.debug(f"Chunk: {chunk}")
# Extract text from Google chunk
chunk_text = ""
try:
if hasattr(chunk, 'candidates') and chunk.candidates:
candidate = chunk.candidates[0] if chunk.candidates else None
if candidate and hasattr(candidate, 'content') and candidate.content:
if hasattr(candidate.content, 'parts') and candidate.content.parts:
for part in candidate.content.parts:
if hasattr(part, 'text') and part.text:
chunk_text += part.text
except Exception as e:
logger.error(f"Error extracting text from chunk: {e}")
# Create OpenAI-compatible chunk
openai_chunk = {
"id": f"google-{body.model}-{int(time.time())}-chunk-{chunk_id}",
"object": "chat.completion.chunk",
"created": int(time.time()),
"model": body.model,
"choices": [{
"index": 0,
"delta": {
"content": chunk_text
},
"finish_reason": None
}]
}
chunk_id += 1
logger.debug(f"OpenAI chunk: {openai_chunk}")
import json
yield f"data: {json.dumps(openai_chunk)}\n\n".encode('utf-8')
except Exception as chunk_error:
logger.error(f"Error processing chunk: {str(chunk_error)}")
continue
elif hasattr(response, '__aiter__'):
# Handle OpenAI/Anthropic streaming responses (async iterators)
chunk_id = 0
async for chunk in response:
try:
# Extract text from Google chunk
chunk_text = ""
try:
if hasattr(chunk, 'candidates') and chunk.candidates:
candidate = chunk.candidates[0] if chunk.candidates else None
if candidate and hasattr(candidate, 'content') and candidate.content:
if hasattr(candidate.content, 'parts') and candidate.content.parts:
for part in candidate.content.parts:
if hasattr(part, 'text') and part.text:
chunk_text += part.text
except Exception as e:
logger.error(f"Error extracting text from chunk: {e}")
# Create OpenAI-compatible chunk
chunk_dict = {
"id": f"google-{body.model}-{int(time.time())}-chunk-{chunk_id}",
"object": "chat.completion.chunk",
"created": int(time.time()),
"model": body.model,
"choices": [{
"index": 0,
"delta": {
"content": chunk_text
},
"finish_reason": None
}]
}
chunk_id += 1
import json
yield f"data: {json.dumps(chunk_dict)}\n\n".encode('utf-8')
except Exception as chunk_error:
logger.warning(f"Error serializing chunk: {str(chunk_error)}")
continue
else:
# Handle other types of responses
logger.warning(f"Unknown response type: {type(response)}")
import json
yield f"data: {json.dumps({'error': 'Unknown response type'})}\n\n".encode('utf-8')
except Exception as e:
logger.error(f"Error in streaming response: {str(e)}")
import json
yield f"data: {json.dumps({'error': str(e)})}\n\n".encode('utf-8')
return StreamingResponse(stream_generator(), media_type="text/event-stream")
else:
logger.debug("Handling non-streaming rotation request")
# The rotation handler handles streaming internally and returns
# a StreamingResponse for streaming requests or a dict for non-streaming
result = await rotation_handler.handle_rotation_request(body.model, body_dict)
logger.debug(f"Rotation response result: {result}")
logger.debug(f"Rotation response result type: {type(result)}")
return result
except Exception as e:
logger.error(f"Error handling rotation chat_completions: {str(e)}", exc_info=True)
......
......@@ -21,6 +21,8 @@
# AISBF - AI Service Broker Framework || AI Should Be Free
# DEVELOPMENT START SCRIPT - For development use only
# For production use, install with: python setup.py install
export AISBF_DEBUG=true
# Get the directory where this script is located
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
......
#!/bin/bash
# Copyright (C) 2026 Stefy Lanza <stefy@nexlab.net>
#
# Test script for AISBF proxy
#
PROXY_URL="http://127.0.0.1:17765"
# Test 1: Streaming request to rotations endpoint with googletest model
echo "Test 1: Streaming request to rotations endpoint with googletest model"
echo "----------------------------------------"
echo "Note: Streaming responses will appear as data: lines"
echo ""
curl -X POST "${PROXY_URL}/api/rotations/chat/completions" \
-H "Content-Type: application/json" \
-d '{
"model": "googletest",
"messages": [
{"role": "user", "content": "Hello, how are you?"}
],
"stream": true
}' \
2>/dev/null
echo ""
echo ""
# Test 2: Streaming request to rotations endpoint with kilotest model
echo "Test 2: Streaming request to rotations endpoint with kilotest model"
echo "----------------------------------------"
echo "Note: Streaming responses will appear as data: lines"
echo ""
curl -X POST "${PROXY_URL}/api/rotations/chat/completions" \
-H "Content-Type: application/json" \
-d '{
"model": "kilotest",
"messages": [
{"role": "user", "content": "Hello, how are you?"}
],
"stream": true
}' \
2>/dev/null
echo ""
echo ""
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment