Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in
Toggle navigation
A
aisbf
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
nexlab
aisbf
Commits
dc79f93a
Commit
dc79f93a
authored
Feb 06, 2026
by
Stefy Lanza (nextime / spora )
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Fix Google GenAI streaming response handling
parent
77c08ee2
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
95 additions
and
37 deletions
+95
-37
handlers.py
aisbf/handlers.py
+35
-29
main.py
main.py
+60
-8
No files found.
aisbf/handlers.py
View file @
dc79f93a
...
...
@@ -761,6 +761,8 @@ class AutoselectHandler:
{
**
request_data
,
"stream"
:
True
}
)
logger
.
info
(
f
"Autoselect stream response type: {type(response)}"
)
# Check if this is a Google streaming response (synchronous iterator)
# Google's generate_content_stream() returns a sync iterator, not async
is_google_stream
=
hasattr
(
response
,
'__iter__'
)
and
not
hasattr
(
response
,
'__aiter__'
)
...
...
@@ -790,10 +792,10 @@ class AutoselectHandler:
# Create OpenAI-compatible chunk
openai_chunk
=
{
"id"
:
f
"google-{
selected_model_id
}-{int(time.time())}-chunk-{chunk_id}"
,
"id"
:
f
"google-{
request_data['model']
}-{int(time.time())}-chunk-{chunk_id}"
,
"object"
:
"chat.completion.chunk"
,
"created"
:
int
(
time
.
time
()),
"model"
:
selected_model_id
,
"model"
:
request_data
[
'model'
]
,
"choices"
:
[{
"index"
:
0
,
"delta"
:
{
...
...
@@ -818,34 +820,38 @@ class AutoselectHandler:
continue
else
:
# Handle OpenAI/Anthropic streaming responses (async iterators)
for
chunk
in
response
:
try
:
# Debug: Log chunk type and content before serialization
logger
.
debug
(
f
"Chunk type: {type(chunk)}"
)
logger
.
debug
(
f
"Chunk: {chunk}"
)
# Convert chunk to dict and serialize as JSON
chunk_dict
=
chunk
.
model_dump
()
if
hasattr
(
chunk
,
'model_dump'
)
else
chunk
import
json
yield
f
"data: {json.dumps(chunk_dict)}
\n\n
"
.
encode
(
'utf-8'
)
except
Exception
as
chunk_error
:
# Handle errors during chunk serialization (e.g., tool calls without tool_choice)
# This is a critical error - the model is trying to call tools without proper configuration
error_msg
=
str
(
chunk_error
)
if
"tool"
in
error_msg
.
lower
():
logger
.
error
(
f
"Tool call error during streaming: {error_msg}"
)
logger
.
error
(
f
"Chunk type: {type(chunk)}"
)
logger
.
error
(
f
"Chunk content: {chunk}"
)
# Re-raise to trigger retry in rotation handler
raise
else
:
logger
.
warning
(
f
"Error serializing chunk: {error_msg}"
)
logger
.
warning
(
f
"Chunk type: {type(chunk)}"
)
logger
.
warning
(
f
"Chunk content: {chunk}"
)
# Skip this chunk and continue with the next one
continue
if
hasattr
(
response
,
'__aiter__'
):
# It's an async iterator
async
for
chunk
in
response
:
try
:
# Debug: Log chunk type and content before serialization
logger
.
debug
(
f
"Chunk type: {type(chunk)}"
)
logger
.
debug
(
f
"Chunk: {chunk}"
)
# Convert chunk to dict and serialize as JSON
chunk_dict
=
chunk
.
model_dump
()
if
hasattr
(
chunk
,
'model_dump'
)
else
chunk
import
json
yield
f
"data: {json.dumps(chunk_dict)}
\n\n
"
.
encode
(
'utf-8'
)
except
Exception
as
chunk_error
:
# Handle errors during chunk serialization (e.g., tool calls without tool_choice)
# This is a critical error - the model is trying to call tools without proper configuration
error_msg
=
str
(
chunk_error
)
if
"tool"
in
error_msg
.
lower
():
logger
.
error
(
f
"Tool call error during streaming: {error_msg}"
)
logger
.
error
(
f
"Chunk type: {type(chunk)}"
)
logger
.
error
(
f
"Chunk content: {chunk}"
)
# Re-raise to trigger retry in rotation handler
raise
else
:
logger
.
warning
(
f
"Error serializing chunk: {error_msg}"
)
logger
.
warning
(
f
"Chunk type: {type(chunk)}"
)
logger
.
warning
(
f
"Chunk content: {chunk}"
)
# Skip this chunk and continue with the next one
continue
else
:
logger
.
warning
(
f
"Unknown stream type: {type(response)}"
)
except
Exception
as
e
:
logger
.
error
(
f
"Error in streaming response: {str(e)}"
)
logger
.
error
(
f
"Error in streaming response: {str(e)}"
,
exc_info
=
True
)
import
json
error_dict
=
{
"error"
:
str
(
e
)}
yield
f
"data: {json.dumps(error_dict)}
\n\n
"
.
encode
(
'utf-8'
)
...
...
main.py
View file @
dc79f93a
...
...
@@ -216,17 +216,69 @@ async def rotation_chat_completions(request: Request, body: ChatCompletionReques
if
not
rotation_config
:
raise
HTTPException
(
status_code
=
400
,
detail
=
f
"Rotation {body.model} not found"
)
# Check if this is a Google streaming response
async
def
stream_generator
():
try
:
response
=
await
rotation_handler
.
handle_rotation_request
(
body
.
model
,
body_dict
)
for
chunk
in
response
:
try
:
chunk_dict
=
chunk
.
model_dump
()
if
hasattr
(
chunk
,
'model_dump'
)
else
chunk
import
json
yield
f
"data: {json.dumps(chunk_dict)}
\n\n
"
.
encode
(
'utf-8'
)
except
Exception
as
chunk_error
:
logger
.
warning
(
f
"Error serializing chunk: {str(chunk_error)}"
)
continue
# Check if response is a Google-style streaming response (sync iterator)
is_google_stream
=
hasattr
(
response
,
'__iter__'
)
and
not
hasattr
(
response
,
'__aiter__'
)
logger
.
debug
(
f
"Rotation stream type: {'Google' if is_google_stream else 'OpenAI/Anthropic'}"
)
if
is_google_stream
:
# Handle Google's synchronous streaming response
chunk_id
=
0
for
chunk
in
response
:
try
:
logger
.
debug
(
f
"Google chunk type: {type(chunk)}"
)
logger
.
debug
(
f
"Google chunk: {chunk}"
)
# Extract text from Google chunk
chunk_text
=
""
try
:
if
hasattr
(
chunk
,
'candidates'
)
and
chunk
.
candidates
:
candidate
=
chunk
.
candidates
[
0
]
if
chunk
.
candidates
else
None
if
candidate
and
hasattr
(
candidate
,
'content'
)
and
candidate
.
content
:
if
hasattr
(
candidate
.
content
,
'parts'
)
and
candidate
.
content
.
parts
:
for
part
in
candidate
.
content
.
parts
:
if
hasattr
(
part
,
'text'
)
and
part
.
text
:
chunk_text
+=
part
.
text
except
Exception
as
e
:
logger
.
error
(
f
"Error extracting text from Google chunk: {e}"
)
# Create OpenAI-compatible chunk
openai_chunk
=
{
"id"
:
f
"google-{body.model}-{int(time.time())}-chunk-{chunk_id}"
,
"object"
:
"chat.completion.chunk"
,
"created"
:
int
(
time
.
time
()),
"model"
:
body
.
model
,
"choices"
:
[{
"index"
:
0
,
"delta"
:
{
"content"
:
chunk_text
},
"finish_reason"
:
None
}]
}
chunk_id
+=
1
logger
.
debug
(
f
"OpenAI chunk: {openai_chunk}"
)
import
json
yield
f
"data: {json.dumps(openai_chunk)}
\n\n
"
.
encode
(
'utf-8'
)
except
Exception
as
chunk_error
:
logger
.
error
(
f
"Error processing Google chunk: {str(chunk_error)}"
)
continue
else
:
# Handle OpenAI/Anthropic streaming responses (async iterators)
for
chunk
in
response
:
try
:
chunk_dict
=
chunk
.
model_dump
()
if
hasattr
(
chunk
,
'model_dump'
)
else
chunk
import
json
yield
f
"data: {json.dumps(chunk_dict)}
\n\n
"
.
encode
(
'utf-8'
)
except
Exception
as
chunk_error
:
logger
.
warning
(
f
"Error serializing chunk: {str(chunk_error)}"
)
continue
except
Exception
as
e
:
logger
.
error
(
f
"Error in streaming response: {str(e)}"
)
import
json
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment