fix: Revert Google streaming to yield raw chunk objects

- Google provider now yields raw chunk objects instead of pre-formatted SSE bytes
- The handlers.py handles the conversion to OpenAI-compatible format
- This fixes the issue where clients weren't receiving streaming responses

Note: Server must be restarted to pick up this change
parent 9d95c435
...@@ -309,118 +309,11 @@ class GoogleProviderHandler(BaseProviderHandler): ...@@ -309,118 +309,11 @@ class GoogleProviderHandler(BaseProviderHandler):
logging.info(f"GoogleProviderHandler: Streaming response received (total chunks: {len(chunks)})") logging.info(f"GoogleProviderHandler: Streaming response received (total chunks: {len(chunks)})")
self.record_success() self.record_success()
# Now yield chunks asynchronously with proper OpenAI-compatible parsing # Now yield chunks asynchronously - yield raw chunk objects
# The handlers.py will handle the conversion to OpenAI format
async def async_generator(): async def async_generator():
import json
chunk_id = 0
accumulated_text = ""
created_time = int(time.time())
response_id = f"google-{model}-{created_time}"
# Track completion tokens for Google responses
completion_tokens = 0
accumulated_response_text = ""
total_chunks = len(chunks)
chunk_idx = 0
for chunk in chunks: for chunk in chunks:
try: yield chunk
# Extract text from Google chunk
chunk_text = ""
finish_reason = None
try:
if hasattr(chunk, 'candidates') and chunk.candidates:
candidate = chunk.candidates[0] if chunk.candidates else None
if candidate and hasattr(candidate, 'content') and candidate.content:
if hasattr(candidate.content, 'parts') and candidate.content.parts:
for part in candidate.content.parts:
if hasattr(part, 'text') and part.text:
chunk_text += part.text
# Check for finish reason in candidate
if hasattr(candidate, 'finish_reason'):
google_finish = str(candidate.finish_reason)
if google_finish in ('STOP', 'END_TURN', 'FINISH_REASON_UNSPECIFIED'):
finish_reason = "stop"
elif google_finish == 'MAX_TOKENS':
finish_reason = "length"
except Exception as e:
logging.error(f"Error extracting text from Google chunk: {e}")
# Calculate delta (only new text since last chunk)
delta_text = chunk_text[len(accumulated_text):] if chunk_text.startswith(accumulated_text) else chunk_text
accumulated_text = chunk_text
# Check if this is the last chunk
is_last_chunk = (chunk_idx == total_chunks - 1)
chunk_finish_reason = finish_reason if is_last_chunk else None
# Only send if there's new content or it's the last chunk with finish_reason
if delta_text or is_last_chunk:
# Create OpenAI-compatible chunk
openai_chunk = {
"id": response_id,
"object": "chat.completion.chunk",
"created": created_time,
"model": model,
"choices": [{
"index": 0,
"delta": {
"content": delta_text if delta_text else "",
"refusal": None,
"role": "assistant",
"tool_calls": None
},
"finish_reason": chunk_finish_reason,
"logprobs": None,
"native_finish_reason": chunk_finish_reason
}]
}
chunk_id += 1
# Track completion tokens for Google responses
if delta_text:
accumulated_response_text += delta_text
# Yield as JSON string
yield f"data: {json.dumps(openai_chunk)}\n\n".encode('utf-8')
chunk_idx += 1
except Exception as chunk_error:
logging.error(f"Error processing Google chunk: {str(chunk_error)}")
chunk_idx += 1
continue
# Send final chunk with usage statistics
if accumulated_response_text:
completion_tokens = count_messages_tokens([{"role": "assistant", "content": accumulated_response_text}], model)
total_tokens = completion_tokens # Google doesn't provide prompt tokens in streaming
final_chunk = {
"id": response_id,
"object": "chat.completion.chunk",
"created": created_time,
"model": model,
"choices": [{
"index": 0,
"delta": {
"content": "",
"refusal": None,
"role": "assistant",
"tool_calls": None
},
"finish_reason": None,
"logprobs": None,
"native_finish_reason": None
}],
"usage": {
"prompt_tokens": None,
"completion_tokens": completion_tokens,
"total_tokens": total_tokens
}
}
yield f"data: {json.dumps(final_chunk)}\n\n".encode('utf-8')
return async_generator() return async_generator()
else: else:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment