Fix LiteLLM

6c2c0afc · Your Name · 8280060e · 6c2c0afc · 6c2c0afc · 6c2c0afc
Commit 6c2c0afc authored Mar 16, 2026 by Your Name
6 changed files
--- a/codai/__init__.py
+++ b/codai/__init__.py
@@ -16,8 +16,6 @@ from .models.parser import (

 from .models.templates import AgenticTemplateManager

-# OpenAI-compatible backends
-
 __all__ = [
    'ModelParserDispatcher',
    'BaseParser',

--- a/codai/__init__.py~
+++ b/codai/__init__.py~
+# codai module - AI model parsing utilities
+from .models.parser import (
+    ModelParserDispatcher,
+    BaseParser,
+    QwenParser,
+    DeepSeekParser,
+    LlamaParser,
+    MistralParser,
+    ClaudeParser,
+    CommandRParser,
+    GemmaParser,
+    GrokParser,
+    PhiParser,
+    ApexBig50Parser,
+)
+
+from .models.templates import AgenticTemplateManager
+
+# OpenAI-compatible backends
+from .openai.litellm import (
+    LiteLLMBackend,
+    get_litellm_backend,
+    set_litellm_backend,
+    LITELLM_AVAILABLE,
+)
+
+__all__ = [
+    'ModelParserDispatcher',
+    'BaseParser',
+    'QwenParser',
+    'DeepSeekParser',
+    'LlamaParser',
+    'MistralParser',
+    'ClaudeParser',
+    'CommandRParser',
+    'GemmaParser',
+    'GrokParser',
+    'PhiParser',
+    'ApexBig50Parser',
+    'AgenticTemplateManager',
+    'LiteLLMBackend',
+    'get_litellm_backend',
+    'set_litellm_backend',
+    'LITELLM_AVAILABLE',
+]
--- a/codai/models/parsers.py
+++ b/codai/models/parsers.py
 import time
 import uuid

+# Try to import litellm for response formatting
+# Fall back to plain dicts if litellm is not available or doesn't export these
+try:
+    from litellm import ModelResponse, ChatCompletionChunk
+    LITELLM_AVAILABLE = True
+except ImportError:
+    LITELLM_AVAILABLE = False
+    ModelResponse = None
+    ChatCompletionChunk = None
+

 class OpenAIFormatter:
    """Formatter for standardizing chat completion responses in OpenAI format.
@@ -123,3 +133,74 @@ class OpenAIFormatter:
            chunk["usage"] = usage
            
        return chunk
+    
+    def format_litellm_full(self, text: str, prompt_tokens: int, completion_tokens: int, tool_calls=None) -> dict:
+        """Format using litellm's ModelResponse if available.
+        
+        Args:
+            text: The generated text content
+            prompt_tokens: Number of tokens in the prompt
+            completion_tokens: Number of tokens in the completion
+            tool_calls: Optional list of tool calls to include
+            
+        Returns:
+            Dictionary representation of ModelResponse
+        """
+        if not LITELLM_AVAILABLE or ModelResponse is None:
+            return self.format_full(text, prompt_tokens, completion_tokens, tool_calls)
+        
+        try:
+            from litellm import Choices, Message, Usage
+            
+            return ModelResponse(
+                id=self.id,
+                model=self.model_name,
+                object="chat.completion",
+                created=int(time.time()),
+                choices=[Choices(
+                    finish_reason="tool_calls" if tool_calls else "stop",
+                    index=0,
+                    message=Message(content=text if not tool_calls else None, role="assistant", tool_calls=tool_calls)
+                )],
+                usage=Usage(
+                    prompt_tokens=prompt_tokens,
+                    completion_tokens=completion_tokens,
+                    total_tokens=prompt_tokens + completion_tokens
+                )
+            ).model_dump()
+        except Exception:
+            # Fall back to plain dict if litellm fails
+            return self.format_full(text, prompt_tokens, completion_tokens, tool_calls)
+    
+    def format_litellm_chunk(self, delta_text: str, is_final: bool = False, usage: dict = None) -> dict:
+        """Format streaming chunk using litellm's ChatCompletionChunk if available.
+        
+        Args:
+            delta_text: The incremental text content for this chunk
+            is_final: Whether this is the final chunk
+            usage: Optional usage information (typically only sent on final chunk)
+            
+        Returns:
+            Dictionary representation of ChatCompletionChunk
+        """
+        if not LITELLM_AVAILABLE or ChatCompletionChunk is None:
+            return self.format_chunk(delta_text, is_final, usage)
+        
+        try:
+            from litellm import StreamingChoices, Delta, Usage
+            
+            return ChatCompletionChunk(
+                id=self.id,
+                model=self.model_name,
+                object="chat.completion.chunk",
+                created=int(time.time()),
+                choices=[StreamingChoices(
+                    finish_reason="stop" if is_final else None,
+                    index=0,
+                    delta=Delta(content=delta_text, role="assistant")
+                )],
+                usage=Usage(**usage) if usage else None
+            ).model_dump()
+        except Exception:
+            # Fall back to plain dict if litellm fails
+            return self.format_chunk(delta_text, is_final, usage)
--- a/codai/models/parsers.py~
+++ b/codai/models/parsers.py~
+import time
+import uuid
+
+# Try to import litellm for response formatting
+# Fall back to plain dicts if litellm is not available or doesn't export these
+try:
+    from litellm import ModelResponse, ChatCompletionChunk
+    LITELLM_AVAILABLE = True
+except ImportError:
+    LITELLM_AVAILABLE = False
+    ModelResponse = None
+    ChatCompletionChunk = None
+
+
+class OpenAIFormatter:
+    """Formatter for standardizing chat completion responses in OpenAI format.
+    
+    This class provides final sanitization of responses before sending them
+    to clients. It processes the output of the internal parser and formats
+    them into proper OpenAI-compatible responses.
+    """
+    
+    def __init__(self, model_name: str):
+        self.model_name = model_name
+        self.id = f"chatcmpl-{uuid.uuid4()}"
+    
+    def format_full(self, text: str, prompt_tokens: int, completion_tokens: int, tool_calls=None) -> dict:
+        """Format a standard (non-streaming) response.
+        
+        Args:
+            text: The generated text content
+            prompt_tokens: Number of tokens in the prompt
+            completion_tokens: Number of tokens in the completion
+            tool_calls: Optional list of tool calls to include
+            
+        Returns:
+            Dictionary representation of the response
+        """
+        message = {
+            "role": "assistant",
+            "content": text if not tool_calls else None,
+        }
+        if tool_calls:
+            message["tool_calls"] = tool_calls
+        
+        choice = {
+            "index": 0,
+            "message": message,
+            "finish_reason": "tool_calls" if tool_calls else "stop",
+        }
+        
+        return {
+            "id": self.id,
+            "object": "chat.completion",
+            "created": int(time.time()),
+            "model": self.model_name,
+            "choices": [choice],
+            "usage": {
+                "prompt_tokens": prompt_tokens,
+                "completion_tokens": completion_tokens,
+                "total_tokens": prompt_tokens + completion_tokens,
+            },
+            "provider": {
+                "provider_name": "coderai",
+                "provider_id": "coderai",
+            },
+        }
+    
+    def format_chunk(self, delta_text: str, is_final: bool = False, usage: dict = None) -> dict:
+        """Format a streaming chunk response.
+        
+        Args:
+            delta_text: The incremental text content for this chunk
+            is_final: Whether this is the final chunk
+            usage: Optional usage information (typically only sent on final chunk)
+            
+        Returns:
+            Dictionary representation of the chunk
+        """
+        delta = {
+            "content": delta_text,
+            "role": "assistant",
+        }
+        
+        choice = {
+            "index": 0,
+            "delta": delta,
+            "finish_reason": "stop" if is_final else None,
+        }
+        
+        chunk = {
+            "id": self.id,
+            "object": "chat.completion.chunk",
+            "created": int(time.time()),
+            "model": self.model_name,
+            "choices": [choice],
+        }
+        
+        if usage and is_final:
+            chunk["usage"] = usage
+            
+        return chunk
+    
+    def format_final_chunk(self, usage: dict = None) -> dict:
+        """Format the final streaming chunk with usage information.
+        
+        Args:
+            usage: Usage statistics dictionary with prompt_tokens, completion_tokens, total_tokens
+            
+        Returns:
+            Dictionary representation of the final chunk
+        """
+        delta = {
+            "content": None,
+            "role": "assistant",
+        }
+        
+        choice = {
+            "index": 0,
+            "delta": delta,
+            "finish_reason": "stop",
+        }
+        
+        chunk = {
+            "id": self.id,
+            "object": "chat.completion.chunk",
+            "created": int(time.time()),
+            "model": self.model_name,
+            "choices": [choice],
+        }
+        
+        if usage:
+            chunk["usage"] = usage
+            
+        return chunk
+    
+    def format_litellm_full(self, text: str, prompt_tokens: int, completion_tokens: int, tool_calls=None) -> dict:
+        """Format using litellm's ModelResponse if available.
+        
+        Args:
+            text: The generated text content
+            prompt_tokens: Number of tokens in the prompt
+            completion_tokens: Number of tokens in the completion
+            tool_calls: Optional list of tool calls to include
+            
+        Returns:
+            Dictionary representation of ModelResponse
+        """
+        if not LITELLM_AVAILABLE or ModelResponse is None:
+            return self.format_full(text, prompt_tokens, completion_tokens, tool_calls)
+        
+        try:
+            from litellm import Choices, Message, Usage
+            
+            return ModelResponse(
+                id=self.id,
+                model=self.model_name,
+                object="chat.completion",
+                created=int(time.time()),
+                choices=[Choices(
+                    finish_reason="tool_calls" if tool_calls else "stop",
+                    index=0,
+                    message=Message(content=text if not tool_calls else None, role="assistant", tool_calls=tool_calls)
+                )],
+                usage=Usage(
+                    prompt_tokens=prompt_tokens,
+                    completion_tokens=completion_tokens,
+                    total_tokens=prompt_tokens + completion_tokens
+                )
+            ).model_dump()
+        except Exception:
+            # Fall back to plain dict if litellm fails
+            return self.format_full(text, prompt_tokens, completion_tokens, tool_calls)
+    
+    def format_litellm_chunk(self, delta_text: str, is_final: bool = False, usage: dict = None) -> dict:
+        """Format streaming chunk using litellm's ChatCompletionChunk if available.
+        
+        Args:
+            delta_text: The incremental text content for this chunk
+            is_final: Whether this is the final chunk
+            usage: Optional usage information (typically only sent on final chunk)
+            
+        Returns:
+            Dictionary representation of ChatCompletionChunk
+        """
+        if not LITELLM_AVAILABLE or ChatCompletionChunk is None:
+            return self.format_chunk(delta_text, is_final, usage)
+        
+        try:
+            from litellm import StreamingChoices, Delta, Usage
+            
+            return ChatCompletionChunk(
+                id=self.id,
+                model=self.model_name,
+                object="chat.completion.chunk",
+                created=int(time.time()),
+                choices=[StreamingChoices(
+                    finish_reason="stop" if is_final else None,
+                    index=0,
+                    delta=Delta(content=delta_text, role="assistant")
+                )],
+                usage=Usage(**usage) if usage else None
+            ).model_dump()
+        except Exception:
+            # Fall back to plain dict if litellm fails
+            return self.format_chunk(delta_text, is_final, usage)
--- a/codai/openai/__init__.py
+++ b/codai/openai/__init__.py
-# codai.openai - OpenAI-compatible API implementations
-
-__all__ = []
--- a/coderai
+++ b/coderai