fix: Return correct response type based on original request mode

- Add stream parameter to handle_rotation_request()
- When notifyerrors is enabled, return StreamingResponse if original request was streaming
- Return dict if original request was non-streaming
- Fixes issue where autoselect handler expects StreamingResponse but was getting dict
parent b3a547e1
......@@ -924,7 +924,7 @@ class RotationHandler:
logger.info(f"notifyerrors is enabled for rotation '{rotation_id}', returning error as normal message")
# Return a normal response with error message instead of HTTP 503
error_message = f"All providers in rotation '{rotation_id}' failed. Details: {'; '.join(error_details)}"
return {
error_response = {
"id": f"error-{rotation_id}-{int(time.time())}",
"object": "chat.completion",
"created": int(time.time()),
......@@ -946,6 +946,11 @@ class RotationHandler:
"rotation_id": rotation_id,
"error_details": error_details
}
# Return as StreamingResponse if original request was streaming, otherwise return dict
if stream:
return self._create_streaming_response(error_response, rotation_id)
else:
return error_response
else:
raise HTTPException(
status_code=503,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment