Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in
Toggle navigation
C
coderai
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
nexlab
coderai
Commits
2ca7368f
Commit
2ca7368f
authored
Mar 01, 2026
by
Stefy Lanza (nextime / spora )
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Fix imports in coder CLI and add tokenizer dependencies + GGUF error detection
parent
905dc92d
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
114 additions
and
6 deletions
+114
-6
coder
coder
+44
-6
coderai
coderai
+11
-0
requirements-nvidia.txt
requirements-nvidia.txt
+2
-0
requirements.txt~
requirements.txt~
+57
-0
No files found.
coder
View file @
2ca7368f
...
@@ -4,7 +4,14 @@ coder - A CLI tool for interacting with coderai API
...
@@ -4,7 +4,14 @@ coder - A CLI tool for interacting with coderai API
Connects to OpenAI-compatible API and executes tools automatically.
Connects to OpenAI-compatible API and executes tools automatically.
"""
"""
# Debug: Verify script execution - imports must come first
import
sys
import
os
import
os
if
os
.
environ
.
get
(
'CODER_DEBUG'
):
print
(
f
"DEBUG: Script started"
,
file
=
sys
.
stderr
)
print
(
f
"DEBUG: Arguments: {sys.argv}"
,
file
=
sys
.
stderr
)
print
(
f
"DEBUG: Python executable: {sys.executable}"
,
file
=
sys
.
stderr
)
import
sys
import
sys
import
json
import
json
import
argparse
import
argparse
...
@@ -151,6 +158,7 @@ class Config:
...
@@ -151,6 +158,7 @@ class Config:
debug
:
bool
=
False
# Show debug output including raw tool calls
debug
:
bool
=
False
# Show debug output including raw tool calls
max_context
:
int
=
32768
# Maximum context size in tokens
max_context
:
int
=
32768
# Maximum context size in tokens
no_prompt
:
bool
=
False
# Don't send system prompt
no_prompt
:
bool
=
False
# Don't send system prompt
no_tools
:
bool
=
False
# Don't send tool definitions
def
__post_init__
(
self
):
def
__post_init__
(
self
):
if
self
.
confirm_commands
is
None
:
if
self
.
confirm_commands
is
None
:
...
@@ -179,6 +187,7 @@ class Config:
...
@@ -179,6 +187,7 @@ class Config:
config
.
debug
=
data
.
get
(
'debug'
,
config
.
debug
)
config
.
debug
=
data
.
get
(
'debug'
,
config
.
debug
)
config
.
max_context
=
data
.
get
(
'max_context'
,
config
.
max_context
)
config
.
max_context
=
data
.
get
(
'max_context'
,
config
.
max_context
)
config
.
no_prompt
=
data
.
get
(
'no_prompt'
,
config
.
no_prompt
)
config
.
no_prompt
=
data
.
get
(
'no_prompt'
,
config
.
no_prompt
)
config
.
no_tools
=
data
.
get
(
'no_tools'
,
config
.
no_tools
)
except
(
json
.
JSONDecodeError
,
IOError
)
as
e
:
except
(
json
.
JSONDecodeError
,
IOError
)
as
e
:
print
(
f
"Warning: Could not load config from {config_path}: {e}"
,
file
=
sys
.
stderr
)
print
(
f
"Warning: Could not load config from {config_path}: {e}"
,
file
=
sys
.
stderr
)
...
@@ -203,7 +212,8 @@ class Config:
...
@@ -203,7 +212,8 @@ class Config:
'timeout'
:
self
.
timeout
,
'timeout'
:
self
.
timeout
,
'debug'
:
self
.
debug
,
'debug'
:
self
.
debug
,
'max_context'
:
self
.
max_context
,
'max_context'
:
self
.
max_context
,
'no_prompt'
:
self
.
no_prompt
'no_prompt'
:
self
.
no_prompt
,
'no_tools'
:
self
.
no_tools
}
}
with
open
(
config_path
,
'w'
)
as
f
:
with
open
(
config_path
,
'w'
)
as
f
:
...
@@ -544,14 +554,18 @@ class CoderClient:
...
@@ -544,14 +554,18 @@ class CoderClient:
if
self
.
config
.
token
:
if
self
.
config
.
token
:
headers
[
"Authorization"
]
=
f
"Bearer {self.config.token}"
headers
[
"Authorization"
]
=
f
"Bearer {self.config.token}"
# Build payload, conditionally including tools
payload
=
{
payload
=
{
"model"
:
self
.
config
.
model
,
"model"
:
self
.
config
.
model
,
"messages"
:
messages
,
"messages"
:
messages
,
"tools"
:
self
.
tool_executor
.
tools
,
"tool_choice"
:
"auto"
,
"stream"
:
stream
"stream"
:
stream
}
}
# Only include tools if not disabled
if
not
self
.
config
.
no_tools
:
payload
[
"tools"
]
=
self
.
tool_executor
.
tools
payload
[
"tool_choice"
]
=
"auto"
try
:
try
:
response
=
requests
.
post
(
response
=
requests
.
post
(
f
"{self.config.api_url}/chat/completions"
,
f
"{self.config.api_url}/chat/completions"
,
...
@@ -1024,14 +1038,18 @@ class CoderClient:
...
@@ -1024,14 +1038,18 @@ class CoderClient:
if
self
.
config
.
token
:
if
self
.
config
.
token
:
headers
[
"Authorization"
]
=
f
"Bearer {self.config.token}"
headers
[
"Authorization"
]
=
f
"Bearer {self.config.token}"
# Build payload, conditionally including tools
payload
=
{
payload
=
{
"model"
:
self
.
config
.
model
,
"model"
:
self
.
config
.
model
,
"messages"
:
messages
,
"messages"
:
messages
,
"tools"
:
self
.
tool_executor
.
tools
,
"tool_choice"
:
"auto"
,
"stream"
:
True
"stream"
:
True
}
}
# Only include tools if not disabled
if
not
self
.
config
.
no_tools
:
payload
[
"tools"
]
=
self
.
tool_executor
.
tools
payload
[
"tool_choice"
]
=
"auto"
response
=
requests
.
post
(
response
=
requests
.
post
(
f
"{self.config.api_url}/chat/completions"
,
f
"{self.config.api_url}/chat/completions"
,
headers
=
headers
,
headers
=
headers
,
...
@@ -1409,6 +1427,13 @@ Examples:
...
@@ -1409,6 +1427,13 @@ Examples:
help
=
'Show debug output including raw tool calls'
help
=
'Show debug output including raw tool calls'
)
)
parser
.
add_argument
(
'--no-tools'
,
action
=
'store_true'
,
dest
=
'no_tools'
,
help
=
'Do not send tool definitions to the API (plain chat mode)'
)
parser
.
add_argument
(
parser
.
add_argument
(
'--ctx'
,
'--ctx'
,
type
=
int
,
type
=
int
,
...
@@ -1436,7 +1461,8 @@ Examples:
...
@@ -1436,7 +1461,8 @@ Examples:
'model'
:
config
.
model
,
'model'
:
config
.
model
,
'small'
:
config
.
small
,
'small'
:
config
.
small
,
'tiny'
:
config
.
tiny
,
'tiny'
:
config
.
tiny
,
'max_context'
:
config
.
max_context
'max_context'
:
config
.
max_context
,
'no_tools'
:
config
.
no_tools
},
indent
=
2
))
},
indent
=
2
))
return
return
...
@@ -1466,6 +1492,8 @@ Examples:
...
@@ -1466,6 +1492,8 @@ Examples:
config
.
max_context
=
args
.
max_context
config
.
max_context
=
args
.
max_context
if
args
.
no_prompt
:
if
args
.
no_prompt
:
config
.
no_prompt
=
True
config
.
no_prompt
=
True
if
args
.
no_tools
:
config
.
no_tools
=
True
# Apply small/tiny model system prompt if enabled
# Apply small/tiny model system prompt if enabled
if
config
.
micro
:
if
config
.
micro
:
...
@@ -1498,15 +1526,25 @@ Examples:
...
@@ -1498,15 +1526,25 @@ Examples:
# Get message
# Get message
message
=
args
.
message
or
args
.
msg_flag
message
=
args
.
message
or
args
.
msg_flag
if
os
.
environ
.
get
(
'CODER_DEBUG'
):
print
(
f
"DEBUG: message = {message}"
,
file
=
sys
.
stderr
)
print
(
f
"DEBUG: args.no_stream = {args.no_stream}"
,
file
=
sys
.
stderr
)
if
message
:
if
message
:
# Single message mode - disable confirmations for non-interactive use
# Single message mode - disable confirmations for non-interactive use
if
os
.
environ
.
get
(
'CODER_DEBUG'
):
print
(
f
"DEBUG: Entering single message mode"
,
file
=
sys
.
stderr
)
client
.
config
.
confirm_all
=
False
client
.
config
.
confirm_all
=
False
result
=
client
.
chat
(
message
,
stream
=
not
args
.
no_stream
)
result
=
client
.
chat
(
message
,
stream
=
not
args
.
no_stream
)
# Print result if non-streaming (streaming prints internally)
# Print result if non-streaming (streaming prints internally)
if
args
.
no_stream
and
result
:
if
args
.
no_stream
and
result
:
print
(
result
)
print
(
result
)
if
os
.
environ
.
get
(
'CODER_DEBUG'
):
print
(
f
"DEBUG: chat() returned"
,
file
=
sys
.
stderr
)
else
:
else
:
# Interactive shell mode
# Interactive shell mode
if
os
.
environ
.
get
(
'CODER_DEBUG'
):
print
(
f
"DEBUG: Entering interactive shell mode"
,
file
=
sys
.
stderr
)
run_interactive_shell
(
client
,
session_manager
)
run_interactive_shell
(
client
,
session_manager
)
...
...
coderai
View file @
2ca7368f
...
@@ -2012,6 +2012,7 @@ def main():
...
@@ -2012,6 +2012,7 @@ def main():
)
)
except
Exception
as
e
:
except
Exception
as
e
:
print
(
f
"
\n
Error loading model: {e}"
)
print
(
f
"
\n
Error loading model: {e}"
)
error_str
=
str
(
e
)
.
lower
()
print
(
"
\n
Troubleshooting:"
)
print
(
"
\n
Troubleshooting:"
)
if
args
.
backend
==
"vulkan"
:
if
args
.
backend
==
"vulkan"
:
print
(
" - For Vulkan, ensure you have Vulkan drivers installed"
)
print
(
" - For Vulkan, ensure you have Vulkan drivers installed"
)
...
@@ -2020,6 +2021,16 @@ def main():
...
@@ -2020,6 +2021,16 @@ def main():
else
:
else
:
print
(
" - For NVIDIA, ensure PyTorch with CUDA is installed"
)
print
(
" - For NVIDIA, ensure PyTorch with CUDA is installed"
)
print
(
" - Run build.sh with 'nvidia' argument first"
)
print
(
" - Run build.sh with 'nvidia' argument first"
)
if
"tokenizer"
in
error_str
or
"sentencepiece"
in
error_str
or
"tiktoken"
in
error_str
:
print
(
" - Tokenizer error: ensure sentencepiece and tiktoken are installed"
)
print
(
" pip install sentencepiece tiktoken tokenizers"
)
# Check if trying to load GGUF model with NVIDIA backend
if
"gguf"
in
model_name
.
lower
():
print
(
f
"
\n
*** IMPORTANT: '{model_name}' appears to be a GGUF model ***"
)
print
(
" GGUF models are NOT compatible with the NVIDIA backend."
)
print
(
" Use --backend vulkan instead, or choose a HuggingFace Transformers model."
)
print
(
"
\n
Example Vulkan command:"
)
print
(
f
" coderai --backend vulkan --model {model_name}"
)
sys
.
exit
(
1
)
sys
.
exit
(
1
)
# Start the server
# Start the server
...
...
requirements-nvidia.txt
View file @
2ca7368f
...
@@ -14,6 +14,8 @@ psutil>=5.9.0
...
@@ -14,6 +14,8 @@ psutil>=5.9.0
# Optional: for better performance with NVIDIA GPUs
# Optional: for better performance with NVIDIA GPUs
bitsandbytes>=0.41.0
bitsandbytes>=0.41.0
sentencepiece>=0.1.99
sentencepiece>=0.1.99
tiktoken>=0.5.0
tokenizers>=0.15.0
protobuf>=3.20.0
protobuf>=3.20.0
# Optional: Flash Attention 2 for faster inference on supported NVIDIA GPUs
# Optional: Flash Attention 2 for faster inference on supported NVIDIA GPUs
...
...
requirements.txt~
0 → 100644
View file @
2ca7368f
# FastAPI and server dependencies
# CLI dependencies
# PyTorch - Uncomment the appropriate version for your system.
# IMPORTANT: Use quotes around version specifiers to prevent shell interpretation!
# The >= operator will be interpreted as output redirection without quotes!
#
# Option 1: Use exact versions (recommended for requirements.txt)
# Option 2: Use quotes: pip install "torch>=2.0.0"
# For NVIDIA (CUDA):
# torch==2.0.0
torchvision
torchaudio
# For AMD (ROCm) - see available versions at https://pytorch.org/get-started/locally/
# rocm6.0 is recommended for newer AMD GPUs, rocm5.6 for older ones
# --index-url https://download.pytorch.org/whl/rocm6.0
# torch==2.0.0
# torchvision==0.15.0
# torchaudio==2.0.0
# For CPU only:
torch
# ML dependencies
transformers
accelerate
# System resource detection
psutil
# Optional: for better performance
bitsandbytes>=0.41.0 # for 4-bit/8-bit quantization
sentencepiece>=0.1.99 # for some tokenizers
protobuf>=3.20.0 # for some models
# Optional: Flash Attention 2 for faster inference on supported GPUs
# Requires specific CUDA/ROCm versions and may need manual installation
# Install with: pip install flash-attn --no-build-isolation
#flash-attn>=2.5.0
# Installation instructions:
# IMPORTANT: Always use quotes or exact versions to avoid shell redirection issues!
#
# 1. For NVIDIA GPUs (CUDA 12.1):
# pip install torch torchvision torchaudio
#
# 2. For AMD GPUs (ROCm 6.0 recommended):
# pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm6.0
#
# 3. For CPU only:
# pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
#
# If you see "No such file or directory: '0.0'" errors, you forgot to use quotes!
# The shell interprets >= as redirection. Fix: pip install "torch>=2.0.0" (with quotes)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment