Analytics system

fe8b625a · Your Name · 823291c7 · fe8b625a · fe8b625a · fe8b625a
Commit fe8b625a authored Apr 17, 2026 by Your Name
20 changed files
--- a/aisbf/__init__.py
+++ b/aisbf/__init__.py
@@ -54,7 +54,7 @@ from .auth.qwen import QwenOAuth2
 from .handlers import RequestHandler, RotationHandler, AutoselectHandler
 from .utils import count_messages_tokens, split_messages_into_chunks, get_max_request_tokens_for_model

-__version__ = "0.99.28"
+__version__ = "0.99.29"
 __all__ = [
    # Config
    "config",

--- a/aisbf/analytics.py
+++ b/aisbf/analytics.py
--- a/aisbf/auth/claude.py
+++ b/aisbf/auth/claude.py
@@ -27,6 +27,7 @@ import hashlib
 import base64
 import webbrowser
 import time
+import asyncio
 import httpx
 from pathlib import Path
 from typing import Optional, Dict
@@ -223,7 +224,7 @@ class ClaudeAuth:
        )
        return response
    
-    def refresh_token(self, max_retries: int = 3) -> bool:
+    async def refresh_token(self, max_retries: int = 3) -> bool:
        """
        Use the refresh token to get a new access token without logging in.
        
@@ -270,7 +271,7 @@ class ClaudeAuth:
                    # Rate limited - wait and retry with exponential backoff
                    wait_time = (2 ** attempt) * 5  # 5, 10, 20 seconds
                    logger.warning(f"Rate limited (429). Waiting {wait_time} seconds before retry {attempt + 1}/{max_retries}")
-                    time.sleep(wait_time)
+                    await asyncio.sleep(wait_time)
                    continue
                else:
                    logger.error(f"Token refresh failed: {response.status_code} - {response.text}")
@@ -280,14 +281,14 @@ class ClaudeAuth:
                if attempt < max_retries - 1:
                    wait_time = (2 ** attempt) * 5
                    logger.info(f"Retrying in {wait_time} seconds...")
-                    time.sleep(wait_time)
+                    await asyncio.sleep(wait_time)
                    continue
                return False
        
        logger.error(f"Token refresh failed after {max_retries} attempts")
        return False
    
-    def get_valid_token(self, auto_login: bool = False) -> str:
+    async def get_valid_token(self, auto_login: bool = False) -> str:
        """
        Get a valid access token, refreshing it if necessary.
        
@@ -311,7 +312,7 @@ class ClaudeAuth:
        # Refresh if less than 5 minutes remain
        if time.time() > (self.tokens.get('expires_at', 0) - 300):
            logger.info("Token expiring soon, refreshing...")
-            if not self.refresh_token():
+            if not await self.refresh_token():
                if not auto_login:
                    logger.error("Token refresh failed and auto_login is disabled")
                    raise Exception("Claude token refresh failed. Please re-authenticate via /dashboard/claude/auth/start or MCP tool.")
@@ -540,7 +541,7 @@ class ClaudeAuth:
        
        logger.info("OAuth2 login flow completed successfully")
    
-    def exchange_code_for_tokens(self, code: str, state: str, verifier: str = None, max_retries: int = 3) -> bool:
+    async def exchange_code_for_tokens(self, code: str, state: str, verifier: str = None, max_retries: int = 3) -> bool:
        """
        Exchange authorization code for access tokens.
        Matches CLIProxyAPI implementation exactly.
@@ -621,7 +622,7 @@ class ClaudeAuth:
                    # Rate limited - wait and retry with exponential backoff
                    wait_time = (2 ** attempt) * 5  # 5, 10, 20 seconds
                    logger.warning(f"Rate limited (429). Waiting {wait_time} seconds before retry {attempt + 1}/{max_retries}")
-                    time.sleep(wait_time)
+                    await asyncio.sleep(wait_time)
                    continue
                else:
                    logger.error(f"Token exchange failed: {response.status_code} - {response.text}")
@@ -631,7 +632,7 @@ class ClaudeAuth:
                if attempt < max_retries - 1:
                    wait_time = (2 ** attempt) * 5
                    logger.info(f"Retrying in {wait_time} seconds...")
-                    time.sleep(wait_time)
+                    await asyncio.sleep(wait_time)
                    continue
                return False
        
@@ -652,14 +653,16 @@ class ClaudeAuth:

 # Example usage
 if __name__ == "__main__":
+    import asyncio
    logging.basicConfig(level=logging.INFO)

+    async def main():
        auth = ClaudeAuth()
        token = auth.get_valid_token()

        # Use the token for an API call
-    client = httpx.Client()
-    response = client.post(
+        async with httpx.AsyncClient() as client:
+            response = await client.post(
                "https://api.anthropic.com/v1/messages",
                headers={
                    "Authorization": f"Bearer {token}",
@@ -674,3 +677,5 @@ if __name__ == "__main__":
                }
            )
            print(response.json())
+
+    asyncio.run(main())
--- a/aisbf/auth/qwen.py
+++ b/aisbf/auth/qwen.py
@@ -152,13 +152,14 @@ class QwenOAuth2:
        
        return code_verifier, code_challenge
    
-    def _acquire_lock(self, max_attempts: int = 20) -> bool:
+    async def _acquire_lock(self, max_attempts: int = 20) -> bool:
        """
        Acquire a file lock to prevent concurrent token refreshes.

        Returns:
            True if lock acquired, False otherwise.
        """
+        import asyncio
        lock_id = str(uuid.uuid4())
        interval = 0.1

@@ -182,7 +183,7 @@ class QwenOAuth2:
                    # Lock might have been removed by another process
                    continue

-                time.sleep(interval)
+                await asyncio.sleep(interval)
                interval = min(interval * 1.5, 2.0)  # Exponential backoff

        return False
@@ -461,7 +462,7 @@ class QwenOAuth2:
        logger.info("QwenOAuth2: Refreshing access token...")
        
        # Acquire lock to prevent concurrent refreshes
-        if not self._acquire_lock():
+        if not await self._acquire_lock():
            logger.error("QwenOAuth2: Failed to acquire lock for token refresh")
            return False
        

--- a/aisbf/context.py
+++ b/aisbf/context.py
@@ -534,7 +534,7 @@ class ContextManager:
                    "max_tokens": 1000,
                    "stream": False
                }
-                response = await self._rotation_handler.handle_rotation_request(self._rotation_id, condensation_request)
+                response = await self._rotation_handler.handle_rotation_request(self._rotation_id, condensation_request, None, None)
                if isinstance(response, dict):
                    summary_content = response.get('choices', [{}])[0].get('message', {}).get('content', '')
            else:
@@ -642,7 +642,7 @@ Provide only the relevant information in a concise format."""
                    "max_tokens": 2000,
                    "stream": False
                }
-                response = await self._rotation_handler.handle_rotation_request(self._rotation_id, condensation_request)
+                response = await self._rotation_handler.handle_rotation_request(self._rotation_id, condensation_request, None, None)
                if isinstance(response, dict):
                    pruned_content = response.get('choices', [{}])[0].get('message', {}).get('content', '')
            else:

--- a/aisbf/cost_extractor.py
+++ b/aisbf/cost_extractor.py
+"""
+Copyleft (C) 2026 Stefy Lanza <stefy@nexlab.net>
+
+AISBF - AI Service Broker Framework || AI Should Be Free
+
+Cost extraction utilities for provider responses.
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program.  If not, see <https://www.gnu.org/licenses/>.
+"""
+
+import logging
+from typing import Dict, Optional, Any
+
+logger = logging.getLogger(__name__)
+
+
+def extract_cost_from_response(response: Dict[str, Any], provider_id: str) -> Optional[float]:
+    """
+    Extract actual cost from provider response if available.
+    
+    Args:
+        response: Provider response dictionary
+        provider_id: Provider identifier
+        
+    Returns:
+        Cost in USD if found, None otherwise
+    """
+    if not response or not isinstance(response, dict):
+        return None
+    
+    try:
+        # AWS Bedrock - may include cost in usage
+        if provider_id in ['amazon', 'bedrock', 'aws']:
+            usage = response.get('usage', {})
+            if isinstance(usage, dict):
+                cost = usage.get('cost')
+                if cost is not None:
+                    return float(cost)
+        
+        # Cohere - has billed_units but not direct cost
+        # Would need pricing config to convert
+        if provider_id == 'cohere':
+            meta = response.get('meta', {})
+            if isinstance(meta, dict):
+                billed_units = meta.get('billed_units', {})
+                if billed_units:
+                    # Return None - we'll calculate from tokens
+                    # Could enhance this to calculate from billed_units
+                    pass
+        
+        # Replicate - has prediction time
+        if provider_id == 'replicate':
+            metrics = response.get('metrics', {})
+            if isinstance(metrics, dict):
+                predict_time = metrics.get('predict_time')
+                if predict_time:
+                    # Would need pricing per second to calculate
+                    # Return None for now - calculate from tokens
+                    pass
+        
+        # Check for generic cost fields that some providers might use
+        for cost_field in ['cost', 'price', 'amount', 'total_cost']:
+            if cost_field in response:
+                cost = response[cost_field]
+                if cost is not None:
+                    return float(cost)
+            
+            # Check in usage object
+            usage = response.get('usage', {})
+            if isinstance(usage, dict) and cost_field in usage:
+                cost = usage[cost_field]
+                if cost is not None:
+                    return float(cost)
+        
+        return None
+        
+    except Exception as e:
+        logger.debug(f"Error extracting cost from {provider_id} response: {e}")
+        return None
+
+
+def extract_cost_from_streaming_chunk(chunk: Dict[str, Any], provider_id: str) -> Optional[float]:
+    """
+    Extract cost from streaming response chunk if available.
+    
+    Most providers don't include cost in streaming chunks, but some might
+    include it in the final chunk.
+    
+    Args:
+        chunk: Streaming chunk dictionary
+        provider_id: Provider identifier
+        
+    Returns:
+        Cost in USD if found, None otherwise
+    """
+    if not chunk or not isinstance(chunk, dict):
+        return None
+    
+    try:
+        # Check if this is a final chunk with usage/cost info
+        usage = chunk.get('usage', {})
+        if isinstance(usage, dict):
+            # Try to extract cost from usage
+            cost = usage.get('cost')
+            if cost is not None:
+                return float(cost)
+        
+        # Some providers might include cost at top level in final chunk
+        cost = chunk.get('cost')
+        if cost is not None:
+            return float(cost)
+        
+        return None
+        
+    except Exception as e:
+        logger.debug(f"Error extracting cost from {provider_id} streaming chunk: {e}")
+        return None
--- a/aisbf/database.py
+++ b/aisbf/database.py
--- a/aisbf/handlers.py
+++ b/aisbf/handlers.py
--- a/aisbf/mcp.py
+++ b/aisbf/mcp.py
@@ -855,12 +855,12 @@ class MCPServer:
            if stream:
                return {"error": "Streaming not supported in MCP, use SSE endpoint instead"}
            else:
-                return await handler.handle_autoselect_request(actual_model, request_data)
+                return await handler.handle_autoselect_request(actual_model, request_data, user_id, None)
        elif provider_id == "rotation":
            handler = get_user_handler('rotation', user_id)
            if actual_model not in self.config.rotations and (not user_id or actual_model not in handler.user_rotations):
                raise HTTPException(status_code=400, detail=f"Rotation '{actual_model}' not found")
-            return await handler.handle_rotation_request(actual_model, request_data)
+            return await handler.handle_rotation_request(actual_model, request_data, user_id, None)
        else:
            handler = get_user_handler('request', user_id)
            if provider_id not in self.config.providers and (not user_id or provider_id not in handler.user_providers):

--- a/aisbf/providers/claude.py
+++ b/aisbf/providers/claude.py
@@ -159,7 +159,7 @@ class ClaudeProviderHandler(BaseProviderHandler):
        logger.info("ClaudeProviderHandler: Initializing session for quota tracking")
        
        try:
-            headers = self._get_auth_headers(stream=False)
+            headers = await self._get_auth_headers(stream=False)
            
            payload = {
                'model': 'claude-haiku-4-5-20251001',
@@ -257,12 +257,12 @@ class ClaudeProviderHandler(BaseProviderHandler):
            if old_util != new_util:
                logger.debug(f"ClaudeProviderHandler: Quota utilization updated: {old_util} -> {new_util}")
    
-    def _get_sdk_client(self):
+    async def _get_sdk_client(self):
        """Get or create an Anthropic SDK client configured with OAuth2 auth token."""
        import logging
        logger = logging.getLogger(__name__)

-        access_token = self.auth.get_valid_token()
+        access_token = await self.auth.get_valid_token()
        
        if not access_token:
            logger.error("ClaudeProviderHandler: No OAuth2 access token available")
@@ -277,14 +277,14 @@ class ClaudeProviderHandler(BaseProviderHandler):
        logger.info("ClaudeProviderHandler: Created SDK client with OAuth2 auth token")
        return self._sdk_client
    
-    def _get_auth_headers(self, stream: bool = False):
+    async def _get_auth_headers(self, stream: bool = False):
        """Get HTTP headers with OAuth2 Bearer token."""
        import logging
        import uuid
        import platform
        logger = logging.getLogger(__name__)

-        access_token = self.auth.get_valid_token()
+        access_token = await self.auth.get_valid_token()
        
        if not self.session_state.get('session_id'):
            self.session_state['session_id'] = str(uuid.uuid4())
@@ -849,7 +849,7 @@ class ClaudeProviderHandler(BaseProviderHandler):
            if anthropic_tool_choice:
                payload['tool_choice'] = anthropic_tool_choice
        
-        headers = self._get_auth_headers(stream=stream)
+        headers = await self._get_auth_headers(stream=stream)
        api_url = 'https://api.anthropic.com/v1/messages?beta=true'
        
        logger.info(f"ClaudeProviderHandler: Request payload keys: {list(payload.keys())}")
@@ -1640,7 +1640,7 @@ class ClaudeProviderHandler(BaseProviderHandler):
            try:
                logging.info("ClaudeProviderHandler: [1/3] Attempting primary API endpoint...")

-                headers = self._get_auth_headers(stream=False)
+                headers = await self._get_auth_headers(stream=False)
                
                api_endpoint = 'https://api.anthropic.com/v1/models'
                logging.info(f"ClaudeProviderHandler: Calling API endpoint: {api_endpoint}")

--- a/aisbf/providers/kilo.py
+++ b/aisbf/providers/kilo.py
@@ -170,7 +170,7 @@ class KiloProviderHandler(BaseProviderHandler):
                "token": self.api_key
            }

-        token = self.oauth2.get_valid_token()
+        token = await self.oauth2.get_valid_token()

        if token:
            logger.info("KiloProviderHandler: Using existing OAuth2 token")
@@ -182,7 +182,7 @@ class KiloProviderHandler(BaseProviderHandler):
        # Try to reload credentials one more time - this handles the case where credentials
        # were saved by another process/handler instance after this handler was created
        self.oauth2._load_credentials()
-        token = self.oauth2.get_valid_token()
+        token = await self.oauth2.get_valid_token()
        
        if token:
            logger.info("KiloProviderHandler: Found OAuth2 token after reloading credentials")

--- a/aisbf/providers/qwen.py
+++ b/aisbf/providers/qwen.py
@@ -103,7 +103,7 @@ class QwenProviderHandler(BaseProviderHandler):
        logging.getLogger(__name__).info(f"QwenProviderHandler: Falling back to file-based credentials for user {self.user_id}")
        return QwenOAuth2(credentials_file=credentials_file)
    
-    def _get_sdk_client(self):
+    async def _get_sdk_client(self):
        """Get or create an OpenAI SDK client configured with authentication (OAuth2 or API key)."""
        import logging
        logger = logging.getLogger(__name__)
@@ -122,7 +122,7 @@ class QwenProviderHandler(BaseProviderHandler):
            base_url = self._get_region_endpoint(qwen_config)
        else:
            # Use OAuth2 authentication
-            access_token = self.auth.get_valid_token()
+            access_token = await self.auth.get_valid_token()

            if not access_token:
                logger.error("QwenProviderHandler: No OAuth2 access token available")
@@ -221,7 +221,7 @@ class QwenProviderHandler(BaseProviderHandler):
        await self.apply_rate_limit()
        
        # Get SDK client with current OAuth token
-        client = self._get_sdk_client()
+        client = await self._get_sdk_client()
        
        # Build request parameters
        request_params = {
@@ -308,7 +308,7 @@ class QwenProviderHandler(BaseProviderHandler):
                if refresh_success:
                    logger.info("QwenProviderHandler: Token refreshed, retrying request")
                    # Retry with new token
-                    client = self._get_sdk_client()
+                    client = await self._get_sdk_client()
                    
                    if stream:
                        return self._handle_streaming_request(client, request_params, model)
@@ -472,7 +472,7 @@ class QwenProviderHandler(BaseProviderHandler):

        try:
            # Get SDK client with API key authentication
-            client = self._get_sdk_client()
+            client = await self._get_sdk_client()

            # List models using OpenAI SDK
            models_response = await client.models.list()

--- a/main.py
+++ b/main.py
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

 [project]
 name = "aisbf"
-version = "0.99.28"
+version = "0.99.29"
 description = "AISBF - AI Service Broker Framework || AI Should Be Free - A modular proxy server for managing multiple AI provider integrations"
 readme = "README.md"
 license = "GPL-3.0-or-later"

--- a/setup.py
+++ b/setup.py
@@ -49,7 +49,7 @@ class InstallCommand(_install):

 setup(
    name="aisbf",
-    version="0.99.28",
+    version="0.99.29",
    author="AISBF Contributors",
    author_email="stefy@nexlab.net",
    description="AISBF - AI Service Broker Framework || AI Should Be Free - A modular proxy server for managing multiple AI provider integrations",

--- a/templates/base.html
+++ b/templates/base.html
@@ -25,7 +25,7 @@ along with this program.  If not, see <https://www.gnu.org/licenses/>.
    <style>
        * { margin: 0; padding: 0; box-sizing: border-box; }
        body { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif; background: #1a1a2e; color: #e0e0e0; }
-        .container { max-width: 1320px; margin: 0 auto; padding: 20px; }
+        .container { max-width: 1452px; margin: 0 auto; padding: 20px; }
        .header { background: #16213e; color: white; padding: 20px 0; margin-bottom: 30px; border-bottom: 2px solid #0f3460; }
        .header h1 { font-size: 24px; font-weight: 600; display: inline-block; }
        .header-actions { float: right; }

--- a/templates/dashboard/analytics.html
+++ b/templates/dashboard/analytics.html
--- a/templates/dashboard/pricing.html
+++ b/templates/dashboard/pricing.html
@@ -21,7 +21,7 @@
 <!-- Paid Tiers -->
 <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(300px, 1fr)); gap: 20px; margin-bottom: 40px;">
    {% for tier in tiers %}
-    {% if tier.is_active and not tier.is_default %}
+    {% if not tier.is_default %}
    <div class="pricing-card {% if tier.is_recommended %}recommended{% endif %}">
        {% if tier.is_recommended %}
        <div style="background: #4a9eff; color: white; padding: 8px 15px; border-radius: 20px; text-align: center; margin-bottom: 20px; font-weight: bold; text-transform: uppercase; font-size: 12px;">

--- a/templates/dashboard/user_index.html
+++ b/templates/dashboard/user_index.html
@@ -2,6 +2,21 @@

 {% block title %}User Dashboard - AISBF{% endblock %}

+{% macro format_tokens(value) %}
+{% if value is none or value == 0 %}0{% else %}
+{% set val = value | float %}
+{% if val >= 1000000000 %}
+{{ "%.2f"|format(val / 1000000000) }}B
+{% elif val >= 1000000 %}
+{{ "%.2f"|format(val / 1000000) }}M
+{% elif val >= 1000 %}
+{{ "%.2f"|format(val / 1000) }}K
+{% else %}
+{{ value }}
+{% endif %}
+{% endif %}
+{% endmacro %}
+
 {% block content %}
 <div class="container">
    <h1>User Dashboard</h1>
@@ -98,7 +113,7 @@
        <div class="stats-grid">
            <div class="stat-item">
                <h3>Total Tokens Used</h3>
-                <p class="stat-value">{{ usage_stats.total_tokens|default(0) }}</p>
+                <p class="stat-value">{{ format_tokens(usage_stats.total_tokens|default(0)) }}</p>
            </div>
            <div class="stat-item">
                <h3>Requests Today</h3>

--- a/templates/dashboard/users.html
+++ b/templates/dashboard/users.html
@@ -229,23 +229,60 @@ function updateUserTier(userId, tierId) {
    .then(response => response.json())
    .then(data => {
        if (data.success) {
-            // Show success message briefly
-            const msg = document.createElement('div');
-            msg.style.cssText = 'position: fixed; top: 20px; right: 20px; background: #4ade80; color: #000; padding: 15px 20px; border-radius: 5px; z-index: 9999;';
-            msg.textContent = 'Tier updated successfully';
-            document.body.appendChild(msg);
-            setTimeout(() => msg.remove(), 2000);
+            // Show success notification
+            showNotification('Tier updated successfully', 'success');
        } else {
-            alert(data.error || 'Failed to update tier');
+            // Show error notification
+            showNotification(data.error || 'Failed to update tier', 'error');
            location.reload(); // Reload to reset dropdown
        }
    })
    .catch(error => {
-        alert('Error: ' + error);
+        showNotification('Error: ' + error, 'error');
        location.reload(); // Reload to reset dropdown
    });
 }

+function showNotification(message, type) {
+    // Remove any existing notifications
+    const existingNotifications = document.querySelectorAll('.notification-toast');
+    existingNotifications.forEach(notification => notification.remove());
+
+    // Create new notification
+    const notification = document.createElement('div');
+    notification.className = `notification-toast alert alert-${type === 'success' ? 'success' : 'error'}`;
+    notification.style.cssText = `
+        position: fixed;
+        top: 20px;
+        right: 20px;
+        z-index: 9999;
+        min-width: 300px;
+        max-width: 500px;
+        box-shadow: 0 4px 12px rgba(0,0,0,0.5);
+        border: none;
+        animation: slideIn 0.3s ease-out;
+    `;
+
+    notification.innerHTML = `
+        <div style="display: flex; align-items: center; gap: 10px;">
+            <i class="fas fa-${type === 'success' ? 'check-circle' : 'exclamation-triangle'}" style="font-size: 18px;"></i>
+            <span>${message}</span>
+        </div>
+    `;
+
+    document.body.appendChild(notification);
+
+    // Auto-remove after 3 seconds
+    setTimeout(() => {
+        notification.style.animation = 'slideOut 0.3s ease-in';
+        setTimeout(() => {
+            if (notification.parentNode) {
+                notification.parentNode.removeChild(notification);
+            }
+        }, 300);
+    }, 3000);
+}
+
 // Close modal when clicking outside
 window.onclick = function(event) {
    const modal = document.getElementById('edit-modal');
@@ -269,6 +306,43 @@ th {
    background: #0f3460;
    font-weight: 600;
 }
+
+/* Notification animations */
+@keyframes slideIn {
+    from {
+        transform: translateX(100%);
+        opacity: 0;
+    }
+    to {
+        transform: translateX(0);
+        opacity: 1;
+    }
+}
+
+@keyframes slideOut {
+    from {
+        transform: translateX(0);
+        opacity: 1;
+    }
+    to {
+        transform: translateX(100%);
+        opacity: 0;
+    }
+}
+
+/* Custom alert styles for dark theme */
+.alert-success {
+    background-color: #10b981 !important;
+    color: #ffffff !important;
+    border-color: #059669 !important;
+}
+
+.alert-error {
+    background-color: #ef4444 !important;
+    color: #ffffff !important;
+    border-color: #dc2626 !important;
+}
+
 /* Prevent browser autofill from overriding dark theme */
 input:-webkit-autofill,
 input:-webkit-autofill:hover,