Fix imports in coder CLI and add tokenizer dependencies + GGUF error detection

2ca7368f · Stefy Lanza (nextime / spora ) · 905dc92d · 2ca7368f · 2ca7368f · 2ca7368f
Commit 2ca7368f authored Mar 01, 2026 by Stefy Lanza (nextime / spora )
Hide whitespace changes
Inline Side-by-side

Showing with 114 additions and 6 deletions

coder coder +44 -6

coderai coderai +11 -0

requirements-nvidia.txt requirements-nvidia.txt +2 -0

requirements.txt~ requirements.txt~ +57 -0

No files found.
--- a/coder
+++ b/coder
@@ -4,7 +4,14 @@ coder - A CLI tool for interacting with coderai API
 Connects to OpenAI-compatible API and executes tools automatically.
 """
+# Debug: Verify script execution - imports must come first
+import sys
 import os
+if os.environ.get('CODER_DEBUG'):
+    print(f"DEBUG: Script started", file=sys.stderr)
+    print(f"DEBUG: Arguments: {sys.argv}", file=sys.stderr)
+    print(f"DEBUG: Python executable: {sys.executable}", file=sys.stderr)
 import sys
 import json
 import argparse
@@ -151,6 +158,7 @@ class Config:
    debug: bool = False  # Show debug output including raw tool calls
    max_context: int = 32768  # Maximum context size in tokens
    no_prompt: bool = False  # Don't send system prompt
+    no_tools: bool = False   # Don't send tool definitions
    def __post_init__(self):
        if self.confirm_commands is None:
@@ -179,6 +187,7 @@ class Config:
                config.debug = data.get('debug', config.debug)
                config.max_context = data.get('max_context', config.max_context)
                config.no_prompt = data.get('no_prompt', config.no_prompt)
+                config.no_tools = data.get('no_tools', config.no_tools)
            except (json.JSONDecodeError, IOError) as e:
                print(f"Warning: Could not load config from {config_path}: {e}", file=sys.stderr)
@@ -203,7 +212,8 @@ class Config:
            'timeout': self.timeout,
            'debug': self.debug,
            'max_context': self.max_context,
-            'no_prompt': self.no_prompt
+            'no_prompt': self.no_prompt,
+            'no_tools': self.no_tools
        }
        with open(config_path, 'w') as f:
@@ -544,14 +554,18 @@ class CoderClient:
        if self.config.token:
            headers["Authorization"] = f"Bearer {self.config.token}"
+        # Build payload, conditionally including tools
        payload = {
            "model": self.config.model,
            "messages": messages,
-            "tools": self.tool_executor.tools,
-            "tool_choice": "auto",
            "stream": stream
        }
+        # Only include tools if not disabled
+        if not self.config.no_tools:
+            payload["tools"] = self.tool_executor.tools
+            payload["tool_choice"] = "auto"
        try:
            response = requests.post(
                f"{self.config.api_url}/chat/completions",
@@ -1024,14 +1038,18 @@ class CoderClient:
        if self.config.token:
            headers["Authorization"] = f"Bearer {self.config.token}"
+        # Build payload, conditionally including tools
        payload = {
            "model": self.config.model,
            "messages": messages,
-            "tools": self.tool_executor.tools,
-            "tool_choice": "auto",
            "stream": True
        }
+        # Only include tools if not disabled
+        if not self.config.no_tools:
+            payload["tools"] = self.tool_executor.tools
+            payload["tool_choice"] = "auto"
        response = requests.post(
            f"{self.config.api_url}/chat/completions",
            headers=headers,
@@ -1409,6 +1427,13 @@ Examples:
        help='Show debug output including raw tool calls'
    )
+    parser.add_argument(
+        '--no-tools',
+        action='store_true',
+        dest='no_tools',
+        help='Do not send tool definitions to the API (plain chat mode)'
+    )
    parser.add_argument(
        '--ctx',
        type=int,
@@ -1436,7 +1461,8 @@ Examples:
            'model': config.model,
            'small': config.small,
            'tiny': config.tiny,
-            'max_context': config.max_context
+            'max_context': config.max_context,
+            'no_tools': config.no_tools
        }, indent=2))
        return
@@ -1466,6 +1492,8 @@ Examples:
        config.max_context = args.max_context
    if args.no_prompt:
        config.no_prompt = True
+    if args.no_tools:
+        config.no_tools = True
    # Apply small/tiny model system prompt if enabled
    if config.micro:
@@ -1498,15 +1526,25 @@ Examples:
    # Get message
    message = args.message or args.msg_flag
+    if os.environ.get('CODER_DEBUG'):
+        print(f"DEBUG: message = {message}", file=sys.stderr)
+        print(f"DEBUG: args.no_stream = {args.no_stream}", file=sys.stderr)
    if message:
        # Single message mode - disable confirmations for non-interactive use
+        if os.environ.get('CODER_DEBUG'):
+            print(f"DEBUG: Entering single message mode", file=sys.stderr)
        client.config.confirm_all = False
        result = client.chat(message, stream=not args.no_stream)
        # Print result if non-streaming (streaming prints internally)
        if args.no_stream and result:
            print(result)
+        if os.environ.get('CODER_DEBUG'):
+            print(f"DEBUG: chat() returned", file=sys.stderr)
    else:
        # Interactive shell mode
+        if os.environ.get('CODER_DEBUG'):
+            print(f"DEBUG: Entering interactive shell mode", file=sys.stderr)
        run_interactive_shell(client, session_manager)

--- a/coderai
+++ b/coderai
@@ -2012,6 +2012,7 @@ def main():
        )
    except Exception as e:
        print(f"\nError loading model: {e}")
+        error_str = str(e).lower()
        print("\nTroubleshooting:")
        if args.backend == "vulkan":
            print("  - For Vulkan, ensure you have Vulkan drivers installed")
@@ -2020,6 +2021,16 @@ def main():
        else:
            print("  - For NVIDIA, ensure PyTorch with CUDA is installed")
            print("  - Run build.sh with 'nvidia' argument first")
+            if "tokenizer" in error_str or "sentencepiece" in error_str or "tiktoken" in error_str:
+                print("  - Tokenizer error: ensure sentencepiece and tiktoken are installed")
+                print("    pip install sentencepiece tiktoken tokenizers")
+            # Check if trying to load GGUF model with NVIDIA backend
+            if "gguf" in model_name.lower():
+                print(f"\n  *** IMPORTANT: '{model_name}' appears to be a GGUF model ***")
+                print("  GGUF models are NOT compatible with the NVIDIA backend.")
+                print("  Use --backend vulkan instead, or choose a HuggingFace Transformers model.")
+                print("\n  Example Vulkan command:")
+                print(f"    coderai --backend vulkan --model {model_name}")
        sys.exit(1)
    # Start the server

--- a/requirements-nvidia.txt
+++ b/requirements-nvidia.txt
@@ -14,6 +14,8 @@ psutil>=5.9.0
 # Optional: for better performance with NVIDIA GPUs
 bitsandbytes>=0.41.0
 sentencepiece>=0.1.99
+tiktoken>=0.5.0
+tokenizers>=0.15.0
 protobuf>=3.20.0
 # Optional: Flash Attention 2 for faster inference on supported NVIDIA GPUs

--- a/requirements.txt~
+++ b/requirements.txt~
+# FastAPI and server dependencies
+# CLI dependencies
+# PyTorch - Uncomment the appropriate version for your system.
+# IMPORTANT: Use quotes around version specifiers to prevent shell interpretation!
+# The >= operator will be interpreted as output redirection without quotes!
+#
+# Option 1: Use exact versions (recommended for requirements.txt)
+# Option 2: Use quotes: pip install "torch>=2.0.0"
+# For NVIDIA (CUDA):
+# torch==2.0.0
+torchvision
+torchaudio
+# For AMD (ROCm) - see available versions at https://pytorch.org/get-started/locally/
+# rocm6.0 is recommended for newer AMD GPUs, rocm5.6 for older ones
+# --index-url https://download.pytorch.org/whl/rocm6.0
+# torch==2.0.0
+# torchvision==0.15.0
+# torchaudio==2.0.0
+# For CPU only:
+torch
+# ML dependencies
+transformers
+accelerate
+# System resource detection
+psutil
+# Optional: for better performance
+bitsandbytes>=0.41.0  # for 4-bit/8-bit quantization
+sentencepiece>=0.1.99  # for some tokenizers
+protobuf>=3.20.0  # for some models
+# Optional: Flash Attention 2 for faster inference on supported GPUs
+# Requires specific CUDA/ROCm versions and may need manual installation
+# Install with: pip install flash-attn --no-build-isolation
+#flash-attn>=2.5.0
+# Installation instructions:
+# IMPORTANT: Always use quotes or exact versions to avoid shell redirection issues!
+#
+# 1. For NVIDIA GPUs (CUDA 12.1):
+#    pip install torch torchvision torchaudio
+#
+# 2. For AMD GPUs (ROCm 6.0 recommended):
+#    pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm6.0
+#
+# 3. For CPU only:
+#    pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
+#
+# If you see "No such file or directory: '0.0'" errors, you forgot to use quotes!
+# The shell interprets >= as redirection. Fix: pip install "torch>=2.0.0" (with quotes)