Add more debugging to track llama.cpp streaming response

1b47f3ff · Stefy Lanza (nextime / spora ) · b341f96a · 1b47f3ff
Commit 1b47f3ff authored Feb 28, 2026 by Stefy Lanza (nextime / spora )
Show whitespace changes
Inline Side-by-side

Showing with 6 additions and 0 deletions

coderai coderai +6 -0

No files found.
--- a/coderai
+++ b/coderai
@@ -1048,6 +1048,7 @@ class VulkanBackend(ModelBackend):
        total_content = ""
        chunk_count = 0
        try:
+            print(f"DEBUG: generate_chat_stream: Calling create_chat_completion with tools={tools}")
            stream = self.model.create_chat_completion(
                messages=messages,
                max_tokens=max_tokens,
@@ -1057,8 +1058,10 @@ class VulkanBackend(ModelBackend):
                tools=tools,
                stream=True,
            )
+            print(f"DEBUG: generate_chat_stream: Got stream object: {type(stream)}")
            for chunk in stream:
                chunk_count += 1
+                print(f"DEBUG: generate_chat_stream: Raw chunk {chunk_count}: {repr(chunk)}")
                delta = chunk["choices"][0].get("delta", {})
                content = delta.get("content", "")
                if content:
@@ -1069,6 +1072,9 @@ class VulkanBackend(ModelBackend):
                print(f"DEBUG: Empty stream from create_chat_completion, using fallback")
                raise Exception("Empty stream response")
        except Exception as e:
+            print(f"DEBUG: generate_chat_stream exception: {type(e).__name__}: {e}")
+            import traceback
+            traceback.print_exc()
            if chunk_count == 0:
                print(f"Warning: create_chat_completion stream failed ({e}), falling back to text generation")
                # Fallback: format messages manually and use text generation