Add debug output to diagnose empty responses

parent 7947fb75
...@@ -1025,7 +1025,12 @@ class VulkanBackend(ModelBackend): ...@@ -1025,7 +1025,12 @@ class VulkanBackend(ModelBackend):
stop=stop or [], stop=stop or [],
tools=tools, tools=tools,
) )
return response["choices"][0]["message"].get("content", "") content = response["choices"][0]["message"].get("content", "")
print(f"DEBUG: generate_chat returned content length: {len(content) if content else 0}")
if not content or not content.strip():
print(f"DEBUG: Empty content from create_chat_completion, using fallback")
raise Exception("Empty response from create_chat_completion")
return content
except Exception as e: except Exception as e:
print(f"Warning: create_chat_completion failed ({e}), falling back to text generation") print(f"Warning: create_chat_completion failed ({e}), falling back to text generation")
# Fallback: format messages manually and use text generation # Fallback: format messages manually and use text generation
...@@ -1039,6 +1044,8 @@ class VulkanBackend(ModelBackend): ...@@ -1039,6 +1044,8 @@ class VulkanBackend(ModelBackend):
if max_tokens is None: if max_tokens is None:
max_tokens = 512 max_tokens = 512
total_content = ""
chunk_count = 0
try: try:
stream = self.model.create_chat_completion( stream = self.model.create_chat_completion(
messages=messages, messages=messages,
...@@ -1050,16 +1057,25 @@ class VulkanBackend(ModelBackend): ...@@ -1050,16 +1057,25 @@ class VulkanBackend(ModelBackend):
stream=True, stream=True,
) )
for chunk in stream: for chunk in stream:
chunk_count += 1
delta = chunk["choices"][0].get("delta", {}) delta = chunk["choices"][0].get("delta", {})
content = delta.get("content", "") content = delta.get("content", "")
if content: if content:
total_content += content
yield content yield content
print(f"DEBUG: generate_chat_stream yielded {chunk_count} chunks, total content length: {len(total_content)}")
if chunk_count == 0 or not total_content.strip():
print(f"DEBUG: Empty stream from create_chat_completion, using fallback")
raise Exception("Empty stream response")
except Exception as e: except Exception as e:
print(f"Warning: create_chat_completion stream failed ({e}), falling back to text generation") if chunk_count == 0:
# Fallback: format messages manually and use text generation print(f"Warning: create_chat_completion stream failed ({e}), falling back to text generation")
prompt = self._manual_format_messages(messages) # Fallback: format messages manually and use text generation
async for chunk in self.generate_stream(prompt, max_tokens, temperature, top_p, stop): prompt = self._manual_format_messages(messages)
yield chunk async for chunk in self.generate_stream(prompt, max_tokens, temperature, top_p, stop):
yield chunk
else:
print(f"DEBUG: Stream completed with {chunk_count} chunks")
def _manual_format_messages(self, messages: List[Dict]) -> str: def _manual_format_messages(self, messages: List[Dict]) -> str:
"""Manual fallback for formatting messages when create_chat_completion fails.""" """Manual fallback for formatting messages when create_chat_completion fails."""
...@@ -1481,7 +1497,6 @@ async def chat_completions(request: ChatCompletionRequest): ...@@ -1481,7 +1497,6 @@ async def chat_completions(request: ChatCompletionRequest):
tools_dict, tools_dict,
) )
async def stream_chat_response( async def stream_chat_response(
messages: List[Dict], messages: List[Dict],
model_name: str, model_name: str,
...@@ -1496,8 +1511,11 @@ async def stream_chat_response( ...@@ -1496,8 +1511,11 @@ async def stream_chat_response(
created = int(time.time()) created = int(time.time())
generated_text = "" generated_text = ""
print(f"DEBUG: stream_chat_response started, stream=True, tools={tools is not None}")
try: try:
chunk_count = 0
# Use generate_chat_stream for proper chat template handling # Use generate_chat_stream for proper chat template handling
async for chunk in model_manager.generate_chat_stream( async for chunk in model_manager.generate_chat_stream(
messages=messages, messages=messages,
...@@ -1507,9 +1525,11 @@ async def stream_chat_response( ...@@ -1507,9 +1525,11 @@ async def stream_chat_response(
stop=stop, stop=stop,
tools=tools, tools=tools,
): ):
chunk_count += 1
# Filter malformed content from each chunk # Filter malformed content from each chunk
filtered_chunk = filter_malformed_content(chunk) filtered_chunk = filter_malformed_content(chunk)
if not filtered_chunk: if not filtered_chunk:
print(f"DEBUG: filtered_chunk was empty (original chunk: {repr(chunk[:50])})")
continue continue
generated_text += filtered_chunk generated_text += filtered_chunk
...@@ -1527,6 +1547,10 @@ async def stream_chat_response( ...@@ -1527,6 +1547,10 @@ async def stream_chat_response(
} }
yield f"data: {json.dumps(data)}\n\n" yield f"data: {json.dumps(data)}\n\n"
print(f"DEBUG: stream_chat_response completed, {chunk_count} chunks, generated_text length: {len(generated_text)}")
if not generated_text.strip():
print(f"DEBUG: Warning - no content generated!")
# Check for tool calls in complete output # Check for tool calls in complete output
if tools: if tools:
# Convert tools back to Tool objects for parsing # Convert tools back to Tool objects for parsing
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment