Fix imports in coder CLI and add tokenizer dependencies + GGUF error detection

parent 905dc92d
...@@ -4,7 +4,14 @@ coder - A CLI tool for interacting with coderai API ...@@ -4,7 +4,14 @@ coder - A CLI tool for interacting with coderai API
Connects to OpenAI-compatible API and executes tools automatically. Connects to OpenAI-compatible API and executes tools automatically.
""" """
# Debug: Verify script execution - imports must come first
import sys
import os import os
if os.environ.get('CODER_DEBUG'):
print(f"DEBUG: Script started", file=sys.stderr)
print(f"DEBUG: Arguments: {sys.argv}", file=sys.stderr)
print(f"DEBUG: Python executable: {sys.executable}", file=sys.stderr)
import sys import sys
import json import json
import argparse import argparse
...@@ -151,6 +158,7 @@ class Config: ...@@ -151,6 +158,7 @@ class Config:
debug: bool = False # Show debug output including raw tool calls debug: bool = False # Show debug output including raw tool calls
max_context: int = 32768 # Maximum context size in tokens max_context: int = 32768 # Maximum context size in tokens
no_prompt: bool = False # Don't send system prompt no_prompt: bool = False # Don't send system prompt
no_tools: bool = False # Don't send tool definitions
def __post_init__(self): def __post_init__(self):
if self.confirm_commands is None: if self.confirm_commands is None:
...@@ -179,6 +187,7 @@ class Config: ...@@ -179,6 +187,7 @@ class Config:
config.debug = data.get('debug', config.debug) config.debug = data.get('debug', config.debug)
config.max_context = data.get('max_context', config.max_context) config.max_context = data.get('max_context', config.max_context)
config.no_prompt = data.get('no_prompt', config.no_prompt) config.no_prompt = data.get('no_prompt', config.no_prompt)
config.no_tools = data.get('no_tools', config.no_tools)
except (json.JSONDecodeError, IOError) as e: except (json.JSONDecodeError, IOError) as e:
print(f"Warning: Could not load config from {config_path}: {e}", file=sys.stderr) print(f"Warning: Could not load config from {config_path}: {e}", file=sys.stderr)
...@@ -203,7 +212,8 @@ class Config: ...@@ -203,7 +212,8 @@ class Config:
'timeout': self.timeout, 'timeout': self.timeout,
'debug': self.debug, 'debug': self.debug,
'max_context': self.max_context, 'max_context': self.max_context,
'no_prompt': self.no_prompt 'no_prompt': self.no_prompt,
'no_tools': self.no_tools
} }
with open(config_path, 'w') as f: with open(config_path, 'w') as f:
...@@ -544,14 +554,18 @@ class CoderClient: ...@@ -544,14 +554,18 @@ class CoderClient:
if self.config.token: if self.config.token:
headers["Authorization"] = f"Bearer {self.config.token}" headers["Authorization"] = f"Bearer {self.config.token}"
# Build payload, conditionally including tools
payload = { payload = {
"model": self.config.model, "model": self.config.model,
"messages": messages, "messages": messages,
"tools": self.tool_executor.tools,
"tool_choice": "auto",
"stream": stream "stream": stream
} }
# Only include tools if not disabled
if not self.config.no_tools:
payload["tools"] = self.tool_executor.tools
payload["tool_choice"] = "auto"
try: try:
response = requests.post( response = requests.post(
f"{self.config.api_url}/chat/completions", f"{self.config.api_url}/chat/completions",
...@@ -1024,14 +1038,18 @@ class CoderClient: ...@@ -1024,14 +1038,18 @@ class CoderClient:
if self.config.token: if self.config.token:
headers["Authorization"] = f"Bearer {self.config.token}" headers["Authorization"] = f"Bearer {self.config.token}"
# Build payload, conditionally including tools
payload = { payload = {
"model": self.config.model, "model": self.config.model,
"messages": messages, "messages": messages,
"tools": self.tool_executor.tools,
"tool_choice": "auto",
"stream": True "stream": True
} }
# Only include tools if not disabled
if not self.config.no_tools:
payload["tools"] = self.tool_executor.tools
payload["tool_choice"] = "auto"
response = requests.post( response = requests.post(
f"{self.config.api_url}/chat/completions", f"{self.config.api_url}/chat/completions",
headers=headers, headers=headers,
...@@ -1409,6 +1427,13 @@ Examples: ...@@ -1409,6 +1427,13 @@ Examples:
help='Show debug output including raw tool calls' help='Show debug output including raw tool calls'
) )
parser.add_argument(
'--no-tools',
action='store_true',
dest='no_tools',
help='Do not send tool definitions to the API (plain chat mode)'
)
parser.add_argument( parser.add_argument(
'--ctx', '--ctx',
type=int, type=int,
...@@ -1436,7 +1461,8 @@ Examples: ...@@ -1436,7 +1461,8 @@ Examples:
'model': config.model, 'model': config.model,
'small': config.small, 'small': config.small,
'tiny': config.tiny, 'tiny': config.tiny,
'max_context': config.max_context 'max_context': config.max_context,
'no_tools': config.no_tools
}, indent=2)) }, indent=2))
return return
...@@ -1466,6 +1492,8 @@ Examples: ...@@ -1466,6 +1492,8 @@ Examples:
config.max_context = args.max_context config.max_context = args.max_context
if args.no_prompt: if args.no_prompt:
config.no_prompt = True config.no_prompt = True
if args.no_tools:
config.no_tools = True
# Apply small/tiny model system prompt if enabled # Apply small/tiny model system prompt if enabled
if config.micro: if config.micro:
...@@ -1498,15 +1526,25 @@ Examples: ...@@ -1498,15 +1526,25 @@ Examples:
# Get message # Get message
message = args.message or args.msg_flag message = args.message or args.msg_flag
if os.environ.get('CODER_DEBUG'):
print(f"DEBUG: message = {message}", file=sys.stderr)
print(f"DEBUG: args.no_stream = {args.no_stream}", file=sys.stderr)
if message: if message:
# Single message mode - disable confirmations for non-interactive use # Single message mode - disable confirmations for non-interactive use
if os.environ.get('CODER_DEBUG'):
print(f"DEBUG: Entering single message mode", file=sys.stderr)
client.config.confirm_all = False client.config.confirm_all = False
result = client.chat(message, stream=not args.no_stream) result = client.chat(message, stream=not args.no_stream)
# Print result if non-streaming (streaming prints internally) # Print result if non-streaming (streaming prints internally)
if args.no_stream and result: if args.no_stream and result:
print(result) print(result)
if os.environ.get('CODER_DEBUG'):
print(f"DEBUG: chat() returned", file=sys.stderr)
else: else:
# Interactive shell mode # Interactive shell mode
if os.environ.get('CODER_DEBUG'):
print(f"DEBUG: Entering interactive shell mode", file=sys.stderr)
run_interactive_shell(client, session_manager) run_interactive_shell(client, session_manager)
......
...@@ -2012,6 +2012,7 @@ def main(): ...@@ -2012,6 +2012,7 @@ def main():
) )
except Exception as e: except Exception as e:
print(f"\nError loading model: {e}") print(f"\nError loading model: {e}")
error_str = str(e).lower()
print("\nTroubleshooting:") print("\nTroubleshooting:")
if args.backend == "vulkan": if args.backend == "vulkan":
print(" - For Vulkan, ensure you have Vulkan drivers installed") print(" - For Vulkan, ensure you have Vulkan drivers installed")
...@@ -2020,6 +2021,16 @@ def main(): ...@@ -2020,6 +2021,16 @@ def main():
else: else:
print(" - For NVIDIA, ensure PyTorch with CUDA is installed") print(" - For NVIDIA, ensure PyTorch with CUDA is installed")
print(" - Run build.sh with 'nvidia' argument first") print(" - Run build.sh with 'nvidia' argument first")
if "tokenizer" in error_str or "sentencepiece" in error_str or "tiktoken" in error_str:
print(" - Tokenizer error: ensure sentencepiece and tiktoken are installed")
print(" pip install sentencepiece tiktoken tokenizers")
# Check if trying to load GGUF model with NVIDIA backend
if "gguf" in model_name.lower():
print(f"\n *** IMPORTANT: '{model_name}' appears to be a GGUF model ***")
print(" GGUF models are NOT compatible with the NVIDIA backend.")
print(" Use --backend vulkan instead, or choose a HuggingFace Transformers model.")
print("\n Example Vulkan command:")
print(f" coderai --backend vulkan --model {model_name}")
sys.exit(1) sys.exit(1)
# Start the server # Start the server
......
...@@ -14,6 +14,8 @@ psutil>=5.9.0 ...@@ -14,6 +14,8 @@ psutil>=5.9.0
# Optional: for better performance with NVIDIA GPUs # Optional: for better performance with NVIDIA GPUs
bitsandbytes>=0.41.0 bitsandbytes>=0.41.0
sentencepiece>=0.1.99 sentencepiece>=0.1.99
tiktoken>=0.5.0
tokenizers>=0.15.0
protobuf>=3.20.0 protobuf>=3.20.0
# Optional: Flash Attention 2 for faster inference on supported NVIDIA GPUs # Optional: Flash Attention 2 for faster inference on supported NVIDIA GPUs
......
# FastAPI and server dependencies
# CLI dependencies
# PyTorch - Uncomment the appropriate version for your system.
# IMPORTANT: Use quotes around version specifiers to prevent shell interpretation!
# The >= operator will be interpreted as output redirection without quotes!
#
# Option 1: Use exact versions (recommended for requirements.txt)
# Option 2: Use quotes: pip install "torch>=2.0.0"
# For NVIDIA (CUDA):
# torch==2.0.0
torchvision
torchaudio
# For AMD (ROCm) - see available versions at https://pytorch.org/get-started/locally/
# rocm6.0 is recommended for newer AMD GPUs, rocm5.6 for older ones
# --index-url https://download.pytorch.org/whl/rocm6.0
# torch==2.0.0
# torchvision==0.15.0
# torchaudio==2.0.0
# For CPU only:
torch
# ML dependencies
transformers
accelerate
# System resource detection
psutil
# Optional: for better performance
bitsandbytes>=0.41.0 # for 4-bit/8-bit quantization
sentencepiece>=0.1.99 # for some tokenizers
protobuf>=3.20.0 # for some models
# Optional: Flash Attention 2 for faster inference on supported GPUs
# Requires specific CUDA/ROCm versions and may need manual installation
# Install with: pip install flash-attn --no-build-isolation
#flash-attn>=2.5.0
# Installation instructions:
# IMPORTANT: Always use quotes or exact versions to avoid shell redirection issues!
#
# 1. For NVIDIA GPUs (CUDA 12.1):
# pip install torch torchvision torchaudio
#
# 2. For AMD GPUs (ROCm 6.0 recommended):
# pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm6.0
#
# 3. For CPU only:
# pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
#
# If you see "No such file or directory: '0.0'" errors, you forgot to use quotes!
# The shell interprets >= as redirection. Fix: pip install "torch>=2.0.0" (with quotes)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment