Commit bdecb8c9 authored by Your Name's avatar Your Name

feat(cli): Add --dump option to show model output

Shows:
- Raw model output
- Parsed output (after formatter)
- Litellm debug info (via --debug)
parent 905b1814
...@@ -200,6 +200,7 @@ global_system_prompt = None ...@@ -200,6 +200,7 @@ global_system_prompt = None
# Global debug flag # Global debug flag
global_debug = False global_debug = False
global_dump = False
global_file_path = None global_file_path = None
# ============================================================================= # =============================================================================
...@@ -2480,6 +2481,14 @@ async def generate_chat_response( ...@@ -2480,6 +2481,14 @@ async def generate_chat_response(
# Always filter out malformed content # Always filter out malformed content
generated_text = filter_malformed_content(generated_text) generated_text = filter_malformed_content(generated_text)
# Dump raw output if enabled
if global_dump:
print(f"\n{'='*80}")
print(f"=== RAW MODEL OUTPUT (DUMP) ===")
print(f"{'='*80}")
print(generated_text)
print(f"{'='*80}\n")
response_message = { response_message = {
"role": "assistant", "role": "assistant",
"content": generated_text, "content": generated_text,
...@@ -2531,12 +2540,23 @@ async def generate_chat_response( ...@@ -2531,12 +2540,23 @@ async def generate_chat_response(
# Use OpenAIFormatter for final sanitization # Use OpenAIFormatter for final sanitization
formatter = OpenAIFormatter(model_name) formatter = OpenAIFormatter(model_name)
return formatter.format_litellm_full( formatted_response = formatter.format_litellm_full(
text=response_message.get("content", ""), text=response_message.get("content", ""),
prompt_tokens=prompt_tokens, prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens, completion_tokens=completion_tokens,
tool_calls=response_message.get("tool_calls") tool_calls=response_message.get("tool_calls")
) )
# Dump parsed output if enabled
if global_dump:
import json
print(f"\n{'='*80}")
print(f"=== PARSED OUTPUT (DUMP) ===")
print(f"{'='*80}")
print(json.dumps(formatted_response, indent=2))
print(f"{'='*80}\n")
return formatted_response
except Exception as e: except Exception as e:
print(f"Error during generation: {e}") print(f"Error during generation: {e}")
raise HTTPException(status_code=500, detail=f"Generation error: {str(e)}") raise HTTPException(status_code=500, detail=f"Generation error: {str(e)}")
...@@ -3050,6 +3070,11 @@ def parse_args(): ...@@ -3050,6 +3070,11 @@ def parse_args():
action="store_true", action="store_true",
help="Enable debug mode - dumps full request/response to stdout for troubleshooting", help="Enable debug mode - dumps full request/response to stdout for troubleshooting",
) )
parser.add_argument(
"--dump",
action="store_true",
help="Dump model output: raw output, parsed output, and litellm debug info",
)
parser.add_argument( parser.add_argument(
"--file-path", "--file-path",
type=str, type=str,
...@@ -3086,7 +3111,7 @@ def parse_args(): ...@@ -3086,7 +3111,7 @@ def parse_args():
return parser.parse_args() return parser.parse_args()
def main(): def main():
"""Main entry point.""" """Main entry point."""
global global_system_prompt, model_manager, multi_model_manager, global_debug, global_args, global_file_path global global_system_prompt, model_manager, multi_model_manager, global_debug, global_dump, global_args, global_file_path
# Suppress unraisable exceptions from LlamaModel.__del__ # Suppress unraisable exceptions from LlamaModel.__del__
import sys import sys
...@@ -3113,6 +3138,10 @@ def main(): ...@@ -3113,6 +3138,10 @@ def main():
# Set global debug flag # Set global debug flag
global_debug = args.debug global_debug = args.debug
# Set global dump flag (enables debug as well for litellm output)
global_dump = args.dump
if global_dump:
global_debug = True
# Set global file path for storing generated files # Set global file path for storing generated files
global_file_path = args.file_path global_file_path = args.file_path
if global_debug: if global_debug:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment