Add more debugging to track llama.cpp streaming response

parent b341f96a
...@@ -1048,6 +1048,7 @@ class VulkanBackend(ModelBackend): ...@@ -1048,6 +1048,7 @@ class VulkanBackend(ModelBackend):
total_content = "" total_content = ""
chunk_count = 0 chunk_count = 0
try: try:
print(f"DEBUG: generate_chat_stream: Calling create_chat_completion with tools={tools}")
stream = self.model.create_chat_completion( stream = self.model.create_chat_completion(
messages=messages, messages=messages,
max_tokens=max_tokens, max_tokens=max_tokens,
...@@ -1057,8 +1058,10 @@ class VulkanBackend(ModelBackend): ...@@ -1057,8 +1058,10 @@ class VulkanBackend(ModelBackend):
tools=tools, tools=tools,
stream=True, stream=True,
) )
print(f"DEBUG: generate_chat_stream: Got stream object: {type(stream)}")
for chunk in stream: for chunk in stream:
chunk_count += 1 chunk_count += 1
print(f"DEBUG: generate_chat_stream: Raw chunk {chunk_count}: {repr(chunk)}")
delta = chunk["choices"][0].get("delta", {}) delta = chunk["choices"][0].get("delta", {})
content = delta.get("content", "") content = delta.get("content", "")
if content: if content:
...@@ -1069,6 +1072,9 @@ class VulkanBackend(ModelBackend): ...@@ -1069,6 +1072,9 @@ class VulkanBackend(ModelBackend):
print(f"DEBUG: Empty stream from create_chat_completion, using fallback") print(f"DEBUG: Empty stream from create_chat_completion, using fallback")
raise Exception("Empty stream response") raise Exception("Empty stream response")
except Exception as e: except Exception as e:
print(f"DEBUG: generate_chat_stream exception: {type(e).__name__}: {e}")
import traceback
traceback.print_exc()
if chunk_count == 0: if chunk_count == 0:
print(f"Warning: create_chat_completion stream failed ({e}), falling back to text generation") print(f"Warning: create_chat_completion stream failed ({e}), falling back to text generation")
# Fallback: format messages manually and use text generation # Fallback: format messages manually and use text generation
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment