Fix CLI streaming to use iter_content with smaller chunks for real-time output

parent 47738566
......@@ -416,11 +416,15 @@ class CoderClient:
tool_calls = []
current_tool_call = None
for line in response.iter_lines():
if not line:
continue
# Use iter_content with smaller chunk size for better real-time handling
buffer = ""
for chunk in response.iter_content(chunk_size=256, decode_unicode=True):
if chunk:
buffer += chunk
line = line.decode('utf-8')
# Process complete lines from buffer
while '\n' in buffer:
line, buffer = buffer.split('\n', 1)
# Handle SSE format
if line.startswith('data: '):
......@@ -429,6 +433,9 @@ class CoderClient:
if line == '[DONE]':
break
if not line:
continue
try:
data = json.loads(line)
delta = data.get('choices', [{}])[0].get('delta', {})
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment