Commit 0653c58a authored by Your Name's avatar Your Name

Add debug output for model input in both streaming and non-streaming modes

parent 3890a849
...@@ -5791,6 +5791,28 @@ async def stream_chat_response( ...@@ -5791,6 +5791,28 @@ async def stream_chat_response(
try: try:
chunk_count = 0 chunk_count = 0
# Debug: Print what is being passed to the model
if global_debug:
print(f"\n{'='*80}")
print(f"=== MODEL INPUT (DEBUG) ===")
print(f"{'='*80}")
print(f"Model: {model_name}")
print(f"Max tokens: {max_tokens}")
print(f"Temperature: {temperature}")
print(f"Top P: {top_p}")
print(f"Stop sequences: {stop}")
print(f"Tools: {tools is not None}")
print(f"Response format: {response_format}")
print(f"\n--- Messages ---")
for i, msg in enumerate(messages):
role = msg.get('role', 'unknown')
content = msg.get('content', '')
if content and len(content) > 500:
content = content[:500] + "... [truncated]"
print(f"[{i}] {role}: {repr(content)}")
print(f"{'='*80}\n")
# Use generate_chat_stream for proper chat template handling # Use generate_chat_stream for proper chat template handling
async for chunk in current_manager.generate_chat_stream( async for chunk in current_manager.generate_chat_stream(
messages=messages, messages=messages,
...@@ -5991,6 +6013,27 @@ async def generate_chat_response( ...@@ -5991,6 +6013,27 @@ async def generate_chat_response(
completion_id = f"chatcmpl-{uuid.uuid4().hex}" completion_id = f"chatcmpl-{uuid.uuid4().hex}"
created = int(time.time()) created = int(time.time())
# Debug: Print what is being passed to the model
if global_debug:
print(f"\n{'='*80}")
print(f"=== MODEL INPUT (DEBUG) ===")
print(f"{'='*80}")
print(f"Model: {model_name}")
print(f"Max tokens: {max_tokens}")
print(f"Temperature: {temperature}")
print(f"Top P: {top_p}")
print(f"Stop sequences: {stop}")
print(f"Tools: {tools is not None}")
print(f"Response format: {response_format}")
print(f"\n--- Messages ---")
for i, msg in enumerate(messages):
role = msg.get('role', 'unknown')
content = msg.get('content', '')
if content and len(content) > 500:
content = content[:500] + "... [truncated]"
print(f"[{i}] {role}: {repr(content)}")
print(f"{'='*80}\n")
try: try:
# Use generate_chat for proper chat template handling # Use generate_chat for proper chat template handling
generated_text = current_manager.generate_chat( generated_text = current_manager.generate_chat(
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment