fix: Revert Google streaming to yield raw chunk objects

- Google provider now yields raw chunk objects instead of pre-formatted SSE bytes - The handlers.py handles the conversion to OpenAI-compatible format - This fixes the issue where clients weren't receiving streaming responses Note: Server must be restarted to pick up this change

fix: Revert Google streaming to yield raw chunk objects
- Google provider now yields raw chunk objects instead of pre-formatted SSE bytes - The handlers.py handles the conversion to OpenAI-compatible format - This fixes the issue where clients weren't receiving streaming responses Note: Server must be restarted to pick up this change
e1e0092d · Stefy Lanza (nextime / spora ) · 9d95c435 · e1e0092d
Commit e1e0092d authored Feb 08, 2026 by Stefy Lanza (nextime / spora )
Hide whitespace changes
Inline Side-by-side

Showing with 3 additions and 110 deletions

providers.py aisbf/providers.py +3 -110

No files found.
--- a/aisbf/providers.py
+++ b/aisbf/providers.py
@@ -309,118 +309,11 @@ class GoogleProviderHandler(BaseProviderHandler):
                logging.info(f"GoogleProviderHandler: Streaming response received (total chunks: {len(chunks)})")
                self.record_success()
-                # Now yield chunks asynchronously with proper OpenAI-compatible parsing
+                # Now yield chunks asynchronously - yield raw chunk objects
+                # The handlers.py will handle the conversion to OpenAI format
                async def async_generator():
-                    import json
-                    chunk_id = 0
-                    accumulated_text = ""
-                    created_time = int(time.time())
-                    response_id = f"google-{model}-{created_time}"
-                    # Track completion tokens for Google responses
-                    completion_tokens = 0
-                    accumulated_response_text = ""
-                    total_chunks = len(chunks)
-                    chunk_idx = 0
                    for chunk in chunks:
-                        try:
+                        yield chunk
-                            # Extract text from Google chunk
-                            chunk_text = ""
-                            finish_reason = None
-                            try:
-                                if hasattr(chunk, 'candidates') and chunk.candidates:
-                                    candidate = chunk.candidates[0] if chunk.candidates else None
-                                    if candidate and hasattr(candidate, 'content') and candidate.content:
-                                        if hasattr(candidate.content, 'parts') and candidate.content.parts:
-                                            for part in candidate.content.parts:
-                                                if hasattr(part, 'text') and part.text:
-                                                    chunk_text += part.text
-                                # Check for finish reason in candidate
-                                if hasattr(candidate, 'finish_reason'):
-                                    google_finish = str(candidate.finish_reason)
-                                    if google_finish in ('STOP', 'END_TURN', 'FINISH_REASON_UNSPECIFIED'):
-                                        finish_reason = "stop"
-                                    elif google_finish == 'MAX_TOKENS':
-                                        finish_reason = "length"
-                            except Exception as e:
-                                logging.error(f"Error extracting text from Google chunk: {e}")
-                            # Calculate delta (only new text since last chunk)
-                            delta_text = chunk_text[len(accumulated_text):] if chunk_text.startswith(accumulated_text) else chunk_text
-                            accumulated_text = chunk_text
-                            # Check if this is the last chunk
-                            is_last_chunk = (chunk_idx == total_chunks - 1)
-                            chunk_finish_reason = finish_reason if is_last_chunk else None
-                            # Only send if there's new content or it's the last chunk with finish_reason
-                            if delta_text or is_last_chunk:
-                                # Create OpenAI-compatible chunk
-                                openai_chunk = {
-                                    "id": response_id,
-                                    "object": "chat.completion.chunk",
-                                    "created": created_time,
-                                    "model": model,
-                                    "choices": [{
-                                        "index": 0,
-                                        "delta": {
-                                            "content": delta_text if delta_text else "",
-                                            "refusal": None,
-                                            "role": "assistant",
-                                            "tool_calls": None
-                                        },
-                                        "finish_reason": chunk_finish_reason,
-                                        "logprobs": None,
-                                        "native_finish_reason": chunk_finish_reason
-                                    }]
-                                }
-                                chunk_id += 1
-                                # Track completion tokens for Google responses
-                                if delta_text:
-                                    accumulated_response_text += delta_text
-                                # Yield as JSON string
-                                yield f"data: {json.dumps(openai_chunk)}\n\n".encode('utf-8')
-                            chunk_idx += 1
-                        except Exception as chunk_error:
-                            logging.error(f"Error processing Google chunk: {str(chunk_error)}")
-                            chunk_idx += 1
-                            continue
-                    # Send final chunk with usage statistics
-                    if accumulated_response_text:
-                        completion_tokens = count_messages_tokens([{"role": "assistant", "content": accumulated_response_text}], model)
-                    total_tokens = completion_tokens  # Google doesn't provide prompt tokens in streaming
-                    final_chunk = {
-                        "id": response_id,
-                        "object": "chat.completion.chunk",
-                        "created": created_time,
-                        "model": model,
-                        "choices": [{
-                            "index": 0,
-                            "delta": {
-                                "content": "",
-                                "refusal": None,
-                                "role": "assistant",
-                                "tool_calls": None
-                            },
-                            "finish_reason": None,
-                            "logprobs": None,
-                            "native_finish_reason": None
-                        }],
-                        "usage": {
-                            "prompt_tokens": None,
-                            "completion_tokens": completion_tokens,
-                            "total_tokens": total_tokens
-                        }
-                    }
-                    yield f"data: {json.dumps(final_chunk)}\n\n".encode('utf-8')
                return async_generator()
            else: