Fix CLI streaming to use iter_content with smaller chunks for real-time output

parent 47738566
...@@ -416,11 +416,15 @@ class CoderClient: ...@@ -416,11 +416,15 @@ class CoderClient:
tool_calls = [] tool_calls = []
current_tool_call = None current_tool_call = None
for line in response.iter_lines(): # Use iter_content with smaller chunk size for better real-time handling
if not line: buffer = ""
continue for chunk in response.iter_content(chunk_size=256, decode_unicode=True):
if chunk:
buffer += chunk
line = line.decode('utf-8') # Process complete lines from buffer
while '\n' in buffer:
line, buffer = buffer.split('\n', 1)
# Handle SSE format # Handle SSE format
if line.startswith('data: '): if line.startswith('data: '):
...@@ -429,6 +433,9 @@ class CoderClient: ...@@ -429,6 +433,9 @@ class CoderClient:
if line == '[DONE]': if line == '[DONE]':
break break
if not line:
continue
try: try:
data = json.loads(line) data = json.loads(line)
delta = data.get('choices', [{}])[0].get('delta', {}) delta = data.get('choices', [{}])[0].get('delta', {})
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment