Fix imports in coder CLI and add tokenizer dependencies + GGUF error detection

parent 905dc92d
......@@ -4,7 +4,14 @@ coder - A CLI tool for interacting with coderai API
Connects to OpenAI-compatible API and executes tools automatically.
"""
# Debug: Verify script execution - imports must come first
import sys
import os
if os.environ.get('CODER_DEBUG'):
print(f"DEBUG: Script started", file=sys.stderr)
print(f"DEBUG: Arguments: {sys.argv}", file=sys.stderr)
print(f"DEBUG: Python executable: {sys.executable}", file=sys.stderr)
import sys
import json
import argparse
......@@ -151,6 +158,7 @@ class Config:
debug: bool = False # Show debug output including raw tool calls
max_context: int = 32768 # Maximum context size in tokens
no_prompt: bool = False # Don't send system prompt
no_tools: bool = False # Don't send tool definitions
def __post_init__(self):
if self.confirm_commands is None:
......@@ -179,6 +187,7 @@ class Config:
config.debug = data.get('debug', config.debug)
config.max_context = data.get('max_context', config.max_context)
config.no_prompt = data.get('no_prompt', config.no_prompt)
config.no_tools = data.get('no_tools', config.no_tools)
except (json.JSONDecodeError, IOError) as e:
print(f"Warning: Could not load config from {config_path}: {e}", file=sys.stderr)
......@@ -203,7 +212,8 @@ class Config:
'timeout': self.timeout,
'debug': self.debug,
'max_context': self.max_context,
'no_prompt': self.no_prompt
'no_prompt': self.no_prompt,
'no_tools': self.no_tools
}
with open(config_path, 'w') as f:
......@@ -544,14 +554,18 @@ class CoderClient:
if self.config.token:
headers["Authorization"] = f"Bearer {self.config.token}"
# Build payload, conditionally including tools
payload = {
"model": self.config.model,
"messages": messages,
"tools": self.tool_executor.tools,
"tool_choice": "auto",
"stream": stream
}
# Only include tools if not disabled
if not self.config.no_tools:
payload["tools"] = self.tool_executor.tools
payload["tool_choice"] = "auto"
try:
response = requests.post(
f"{self.config.api_url}/chat/completions",
......@@ -1024,14 +1038,18 @@ class CoderClient:
if self.config.token:
headers["Authorization"] = f"Bearer {self.config.token}"
# Build payload, conditionally including tools
payload = {
"model": self.config.model,
"messages": messages,
"tools": self.tool_executor.tools,
"tool_choice": "auto",
"stream": True
}
# Only include tools if not disabled
if not self.config.no_tools:
payload["tools"] = self.tool_executor.tools
payload["tool_choice"] = "auto"
response = requests.post(
f"{self.config.api_url}/chat/completions",
headers=headers,
......@@ -1409,6 +1427,13 @@ Examples:
help='Show debug output including raw tool calls'
)
parser.add_argument(
'--no-tools',
action='store_true',
dest='no_tools',
help='Do not send tool definitions to the API (plain chat mode)'
)
parser.add_argument(
'--ctx',
type=int,
......@@ -1436,7 +1461,8 @@ Examples:
'model': config.model,
'small': config.small,
'tiny': config.tiny,
'max_context': config.max_context
'max_context': config.max_context,
'no_tools': config.no_tools
}, indent=2))
return
......@@ -1466,6 +1492,8 @@ Examples:
config.max_context = args.max_context
if args.no_prompt:
config.no_prompt = True
if args.no_tools:
config.no_tools = True
# Apply small/tiny model system prompt if enabled
if config.micro:
......@@ -1498,15 +1526,25 @@ Examples:
# Get message
message = args.message or args.msg_flag
if os.environ.get('CODER_DEBUG'):
print(f"DEBUG: message = {message}", file=sys.stderr)
print(f"DEBUG: args.no_stream = {args.no_stream}", file=sys.stderr)
if message:
# Single message mode - disable confirmations for non-interactive use
if os.environ.get('CODER_DEBUG'):
print(f"DEBUG: Entering single message mode", file=sys.stderr)
client.config.confirm_all = False
result = client.chat(message, stream=not args.no_stream)
# Print result if non-streaming (streaming prints internally)
if args.no_stream and result:
print(result)
if os.environ.get('CODER_DEBUG'):
print(f"DEBUG: chat() returned", file=sys.stderr)
else:
# Interactive shell mode
if os.environ.get('CODER_DEBUG'):
print(f"DEBUG: Entering interactive shell mode", file=sys.stderr)
run_interactive_shell(client, session_manager)
......
......@@ -2012,6 +2012,7 @@ def main():
)
except Exception as e:
print(f"\nError loading model: {e}")
error_str = str(e).lower()
print("\nTroubleshooting:")
if args.backend == "vulkan":
print(" - For Vulkan, ensure you have Vulkan drivers installed")
......@@ -2020,6 +2021,16 @@ def main():
else:
print(" - For NVIDIA, ensure PyTorch with CUDA is installed")
print(" - Run build.sh with 'nvidia' argument first")
if "tokenizer" in error_str or "sentencepiece" in error_str or "tiktoken" in error_str:
print(" - Tokenizer error: ensure sentencepiece and tiktoken are installed")
print(" pip install sentencepiece tiktoken tokenizers")
# Check if trying to load GGUF model with NVIDIA backend
if "gguf" in model_name.lower():
print(f"\n *** IMPORTANT: '{model_name}' appears to be a GGUF model ***")
print(" GGUF models are NOT compatible with the NVIDIA backend.")
print(" Use --backend vulkan instead, or choose a HuggingFace Transformers model.")
print("\n Example Vulkan command:")
print(f" coderai --backend vulkan --model {model_name}")
sys.exit(1)
# Start the server
......
......@@ -14,6 +14,8 @@ psutil>=5.9.0
# Optional: for better performance with NVIDIA GPUs
bitsandbytes>=0.41.0
sentencepiece>=0.1.99
tiktoken>=0.5.0
tokenizers>=0.15.0
protobuf>=3.20.0
# Optional: Flash Attention 2 for faster inference on supported NVIDIA GPUs
......
# FastAPI and server dependencies
# CLI dependencies
# PyTorch - Uncomment the appropriate version for your system.
# IMPORTANT: Use quotes around version specifiers to prevent shell interpretation!
# The >= operator will be interpreted as output redirection without quotes!
#
# Option 1: Use exact versions (recommended for requirements.txt)
# Option 2: Use quotes: pip install "torch>=2.0.0"
# For NVIDIA (CUDA):
# torch==2.0.0
torchvision
torchaudio
# For AMD (ROCm) - see available versions at https://pytorch.org/get-started/locally/
# rocm6.0 is recommended for newer AMD GPUs, rocm5.6 for older ones
# --index-url https://download.pytorch.org/whl/rocm6.0
# torch==2.0.0
# torchvision==0.15.0
# torchaudio==2.0.0
# For CPU only:
torch
# ML dependencies
transformers
accelerate
# System resource detection
psutil
# Optional: for better performance
bitsandbytes>=0.41.0 # for 4-bit/8-bit quantization
sentencepiece>=0.1.99 # for some tokenizers
protobuf>=3.20.0 # for some models
# Optional: Flash Attention 2 for faster inference on supported GPUs
# Requires specific CUDA/ROCm versions and may need manual installation
# Install with: pip install flash-attn --no-build-isolation
#flash-attn>=2.5.0
# Installation instructions:
# IMPORTANT: Always use quotes or exact versions to avoid shell redirection issues!
#
# 1. For NVIDIA GPUs (CUDA 12.1):
# pip install torch torchvision torchaudio
#
# 2. For AMD GPUs (ROCm 6.0 recommended):
# pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm6.0
#
# 3. For CPU only:
# pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
#
# If you see "No such file or directory: '0.0'" errors, you forgot to use quotes!
# The shell interprets >= as redirection. Fix: pip install "torch>=2.0.0" (with quotes)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment