Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in
Toggle navigation
A
aisbf
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
nexlab
aisbf
Commits
9333666f
Commit
9333666f
authored
Feb 06, 2026
by
Stefy Lanza (nextime / spora )
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Fix streaming response handling for OpenAI async iterators
parent
0e5fab02
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
79 additions
and
107 deletions
+79
-107
handlers.py
aisbf/handlers.py
+70
-98
main.py
main.py
+9
-9
No files found.
aisbf/handlers.py
View file @
9333666f
...
...
@@ -764,45 +764,45 @@ class AutoselectHandler:
logger
.
info
(
f
"Autoselect stream response type: {type(response)}"
)
# Check if this is a Google streaming response
# Google's generate_content_stream() returns a sync iterator with chunks that have 'candidates' attribute
is_google_stream
=
hasattr
(
response
,
'__iter__'
)
and
not
hasattr
(
response
,
'__aiter__'
)
# Test the first chunk to verify if it's a Google response
if
is_google_stream
:
# Handle streaming responses based on chunk type rather than iterator type
if
hasattr
(
response
,
'__aiter__'
):
# Async iterator (OpenAI/Anthropic style)
logger
.
info
(
f
"Handling async stream response"
)
async
for
chunk
in
response
:
try
:
# Get the first chunk to test
import
itertools
first_chunk
=
next
(
iter
(
response
))
# Check if it's a Google chunk by looking for 'candidates' attribute
if
hasattr
(
first_chunk
,
'candidates'
):
logger
.
info
(
f
"Confirmed Google streaming response"
)
else
:
logger
.
warning
(
f
"Response is sync iterator but not Google format - treating as OpenAI/Anthropic stream"
)
is_google_stream
=
False
# Recreate the iterator with the first chunk
response
=
itertools
.
chain
([
first_chunk
],
response
)
except
Exception
as
e
:
logger
.
error
(
f
"Error testing stream type: {e}"
)
is_google_stream
=
False
else
:
logger
.
info
(
f
"Not a sync iterator - treating as OpenAI/Anthropic async stream"
)
logger
.
info
(
f
"Is Google streaming response: {is_google_stream}"
)
logger
.
debug
(
f
"Chunk type: {type(chunk)}"
)
logger
.
debug
(
f
"Chunk: {chunk}"
)
if
is_google_stream
:
# Handle Google's synchronous streaming response
# Convert Google chunks to OpenAI format
chunk_dict
=
chunk
.
model_dump
()
if
hasattr
(
chunk
,
'model_dump'
)
else
chunk
import
json
yield
f
"data: {json.dumps(chunk_dict)}
\n\n
"
.
encode
(
'utf-8'
)
except
Exception
as
chunk_error
:
error_msg
=
str
(
chunk_error
)
if
"tool"
in
error_msg
.
lower
():
logger
.
error
(
f
"Tool call error during streaming: {error_msg}"
)
logger
.
error
(
f
"Chunk type: {type(chunk)}"
)
logger
.
error
(
f
"Chunk content: {chunk}"
)
raise
else
:
logger
.
warning
(
f
"Error serializing chunk: {error_msg}"
)
logger
.
warning
(
f
"Chunk type: {type(chunk)}"
)
logger
.
warning
(
f
"Chunk content: {chunk}"
)
continue
elif
hasattr
(
response
,
'__iter__'
):
# Sync iterator - determine if it's Google or other format
logger
.
info
(
f
"Handling sync stream response"
)
chunk_id
=
0
for
chunk
in
response
:
try
:
logger
.
debug
(
f
"
Google c
hunk type: {type(chunk)}"
)
logger
.
debug
(
f
"
Google c
hunk: {chunk}"
)
logger
.
debug
(
f
"
C
hunk type: {type(chunk)}"
)
logger
.
debug
(
f
"
C
hunk: {chunk}"
)
# Extract text from Google chunk
# Check if it's a Google-style chunk with candidates
if
hasattr
(
chunk
,
'candidates'
):
logger
.
debug
(
"Processing Google-style chunk"
)
chunk_text
=
""
try
:
if
hasattr
(
chunk
,
'candidates'
)
and
chunk
.
candidates
:
if
chunk
.
candidates
:
candidate
=
chunk
.
candidates
[
0
]
if
chunk
.
candidates
else
None
if
candidate
and
hasattr
(
candidate
,
'content'
)
and
candidate
.
content
:
if
hasattr
(
candidate
.
content
,
'parts'
)
and
candidate
.
content
.
parts
:
...
...
@@ -812,7 +812,6 @@ class AutoselectHandler:
except
Exception
as
e
:
logger
.
error
(
f
"Error extracting text from Google chunk: {e}"
)
# Create OpenAI-compatible chunk
openai_chunk
=
{
"id"
:
f
"google-{request_data['model']}-{int(time.time())}-chunk-{chunk_id}"
,
"object"
:
"chat.completion.chunk"
,
...
...
@@ -827,48 +826,21 @@ class AutoselectHandler:
}]
}
chunk_id
+=
1
logger
.
debug
(
f
"OpenAI chunk: {openai_chunk}"
)
# Serialize as JSON
import
json
yield
f
"data: {json.dumps(openai_chunk)}
\n\n
"
.
encode
(
'utf-8'
)
except
Exception
as
chunk_error
:
error_msg
=
str
(
chunk_error
)
logger
.
error
(
f
"Error processing Google chunk: {error_msg}"
)
logger
.
error
(
f
"Chunk type: {type(chunk)}"
)
logger
.
error
(
f
"Chunk content: {chunk}"
)
# Skip this chunk and continue
continue
chunk_id
+=
1
else
:
# Handle OpenAI/Anthropic streaming responses (async iterators)
if
hasattr
(
response
,
'__aiter__'
):
# It's an async iterator
async
for
chunk
in
response
:
try
:
# Debug: Log chunk type and content before serialization
logger
.
debug
(
f
"Chunk type: {type(chunk)}"
)
logger
.
debug
(
f
"Chunk: {chunk}"
)
# Convert chunk to dict and serialize as JSON
logger
.
debug
(
"Processing other sync stream chunk"
)
# For other sync stream formats (like OpenAI's sync stream)
chunk_dict
=
chunk
.
model_dump
()
if
hasattr
(
chunk
,
'model_dump'
)
else
chunk
import
json
yield
f
"data: {json.dumps(chunk_dict)}
\n\n
"
.
encode
(
'utf-8'
)
except
Exception
as
chunk_error
:
# Handle errors during chunk serialization (e.g., tool calls without tool_choice)
# This is a critical error - the model is trying to call tools without proper configuration
error_msg
=
str
(
chunk_error
)
if
"tool"
in
error_msg
.
lower
():
logger
.
error
(
f
"Tool call error during streaming: {error_msg}"
)
logger
.
error
(
f
"Error processing chunk: {error_msg}"
)
logger
.
error
(
f
"Chunk type: {type(chunk)}"
)
logger
.
error
(
f
"Chunk content: {chunk}"
)
# Re-raise to trigger retry in rotation handler
raise
else
:
logger
.
warning
(
f
"Error serializing chunk: {error_msg}"
)
logger
.
warning
(
f
"Chunk type: {type(chunk)}"
)
logger
.
warning
(
f
"Chunk content: {chunk}"
)
# Skip this chunk and continue with the next one
continue
else
:
logger
.
warning
(
f
"Unknown stream type: {type(response)}"
)
...
...
main.py
View file @
9333666f
...
...
@@ -294,7 +294,7 @@ async def rotation_chat_completions(request: Request, body: ChatCompletionReques
continue
else
:
# Handle OpenAI/Anthropic streaming responses (async iterators)
for
chunk
in
response
:
async
for
chunk
in
response
:
try
:
chunk_dict
=
chunk
.
model_dump
()
if
hasattr
(
chunk
,
'model_dump'
)
else
chunk
import
json
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment