Rewrite done

parent 80b1c060
......@@ -209,17 +209,20 @@ elif [ "$BACKEND" = "vulkan" ]; then
echo -e "${GREEN}✓ Found Vulkan shader compiler: $GLSLC_CMD${NC}"
fi
# Build with Vulkan support
# Build with Vulkan support (add CUDA too if available)
echo -e "${YELLOW}Building llama-cpp-python with Vulkan support...${NC}"
CMAKE_ARGS="-DGGML_VULKAN=ON" pip install --upgrade llama-cpp-python --no-cache-dir || {
_LLAMA_CMAKE="-DGGML_VULKAN=ON"
if command -v nvcc &> /dev/null || [ -d "/usr/local/cuda" ]; then
_LLAMA_CMAKE="$_LLAMA_CMAKE -DGGML_CUDA=ON"
echo -e "${GREEN} ✓ Also enabling CUDA support (NVIDIA detected)${NC}"
fi
CMAKE_ARGS="$_LLAMA_CMAKE" pip install --upgrade llama-cpp-python --no-cache-dir || {
echo -e "${RED}Build failed!${NC}"
exit 1
}
echo -e "${YELLOW}Installing Vulkan-specific requirements...${NC}"
pip install -r requirements-vulkan.txt
# Build whispercpp Python package with Vulkan support for GPU-accelerated audio transcription
echo -e "${YELLOW}Building whispercpp with Vulkan support for GPU-accelerated transcription...${NC}"
# First, uninstall any existing whispercpp (pip version doesn't have Vulkan)
......@@ -318,11 +321,16 @@ elif [ "$BACKEND" = "vulkan-nvidia" ]; then
echo -e "${GREEN}✓ Found Vulkan shader compiler: $GLSLC_CMD${NC}"
fi
# Build with Vulkan support
# Build with Vulkan support (add CUDA too if available)
# Note: llama.cpp doesn't have a compile-time option to disable specific GPUs
# The device selection happens at runtime via environment variables
echo -e "${YELLOW}Building llama-cpp-python with Vulkan support...${NC}"
CMAKE_ARGS="-DGGML_VULKAN=ON" pip install --upgrade llama-cpp-python --no-cache-dir || {
_LLAMA_CMAKE="-DGGML_VULKAN=ON"
if command -v nvcc &> /dev/null || [ -d "/usr/local/cuda" ]; then
_LLAMA_CMAKE="$_LLAMA_CMAKE -DGGML_CUDA=ON"
echo -e "${GREEN} ✓ Also enabling CUDA support (NVIDIA detected)${NC}"
fi
CMAKE_ARGS="$_LLAMA_CMAKE" pip install --upgrade llama-cpp-python --no-cache-dir || {
echo -e "${RED}Build failed!${NC}"
exit 1
}
......@@ -378,10 +386,15 @@ elif [ "$BACKEND" = "cuda" ]; then
echo -e "${GREEN}✓ Found CUDA at /usr/local/cuda${NC}"
fi
# Build llama-cpp-python with CUDA support
# Build llama-cpp-python with CUDA support (add Vulkan too if available)
echo -e "${YELLOW}Building llama-cpp-python with CUDA support...${NC}"
echo -e "${YELLOW}This may take several minutes...${NC}"
CMAKE_ARGS="-DGGML_CUDA=ON" pip install --upgrade llama-cpp-python --no-cache-dir || {
_LLAMA_CMAKE="-DGGML_CUDA=ON"
if pkg-config --exists vulkan 2>/dev/null; then
_LLAMA_CMAKE="$_LLAMA_CMAKE -DGGML_VULKAN=ON"
echo -e "${GREEN} ✓ Also enabling Vulkan support (Vulkan detected)${NC}"
fi
CMAKE_ARGS="$_LLAMA_CMAKE" pip install --upgrade llama-cpp-python --no-cache-dir || {
echo ""
echo -e "${RED}Build failed!${NC}"
echo -e "${YELLOW}Make sure CUDA toolkit is installed:${NC}"
......
#!/bin/bash
# Build script for CoderAI - Supports NVIDIA (CUDA), Vulkan, OpenCL, and CPU backends
# Usage: ./build.sh [nvidia|vulkan|vulkan-nvidia|cuda|opencl|all] [--flash] [--venv <venv>]
# Default: all (installs all backends)
# --flash: Enable and install Flash Attention 2 (for NVIDIA GPUs)
# --venv <venv>: Specify custom virtual environment name
set -e
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# Determine backend and flags
BACKEND="${1:-all}"
FLASH=false
CUSTOM_VENV=""
# Parse arguments
i=1
for arg in "$@"; do
case $arg in
--flash)
FLASH=true
;;
--venv)
i=$((i + 1))
eval "CUSTOM_VENV=\${$i}"
;;
esac
i=$((i + 1))
done
BACKEND=$(echo "$BACKEND" | tr '[:upper:]' '[:lower:]')
if [[ "$BACKEND" != "nvidia" && "$BACKEND" != "vulkan" && "$BACKEND" != "vulkan-nvidia" && "$BACKEND" != "cuda" && "$BACKEND" != "opencl" && "$BACKEND" != "all" ]]; then
echo -e "${RED}Error: Invalid backend '$BACKEND'${NC}"
echo "Usage: ./build.sh [nvidia|vulkan|vulkan-nvidia|cuda|opencl|all] [--flash]"
echo " nvidia - Use PyTorch with CUDA for NVIDIA GPUs"
echo " vulkan - Use llama-cpp-python with Vulkan for AMD GPUs"
echo " vulkan-nvidia - Use llama-cpp-python with Vulkan for NVIDIA GPU only"
echo " cuda - Use llama-cpp-python with CUDA for NVIDIA GPUs"
echo " opencl - Use stable-diffusion-cpp-python with OpenCL"
echo " all - Install all backends (nvidia, cuda, vulkan, opencl, cpu) - DEFAULT"
echo ""
echo "Options:"
echo " --flash - Install Flash Attention 2 for faster inference (NVIDIA only)"
exit 1
fi
echo -e "${BLUE}========================================${NC}"
echo -e "${BLUE} CoderAI Build Script${NC}"
echo -e "${BLUE} Backend: ${GREEN}$BACKEND${NC}"
if [ "$FLASH" = true ]; then
echo -e "${BLUE} Flash Attention 2: ${GREEN}ENABLED${NC}"
fi
echo -e "${BLUE}========================================${NC}"
echo ""
# Check Python version
PYTHON_VERSION=$(python3 --version 2>&1 | grep -oP '\d+\.\d+' | head -1)
REQUIRED_VERSION="3.8"
if [ "$(printf '%s\n' "$REQUIRED_VERSION" "$PYTHON_VERSION" | sort -V | head -n1)" != "$REQUIRED_VERSION" ]; then
echo -e "${RED}Error: Python 3.8+ required, found $PYTHON_VERSION${NC}"
exit 1
fi
echo -e "${GREEN}✓ Python version: $PYTHON_VERSION${NC}"
# Determine cmake args for stable-diffusion-cpp-python.
# The pip release is missing the libwebm/build/ cmake submodule files.
# If libwebm-dev is installed system-wide we can link against it; otherwise disable WebM.
if ldconfig -p 2>/dev/null | grep -q "libwebm" || pkg-config --exists libwebm 2>/dev/null; then
SD_CMAKE_ARGS="-DSD_USE_SYSTEM_WEBM=ON"
echo -e "${GREEN}✓ Found system libwebm — stable-diffusion-cpp-python will use it${NC}"
else
SD_CMAKE_ARGS="-DSD_WEBM=OFF"
echo -e "${YELLOW}Note: libwebm-dev not found — WebM video output disabled for stable-diffusion-cpp-python${NC}"
echo -e "${YELLOW} Install libwebm-dev to enable WebM support${NC}"
fi
# Determine venv directory based on backend
if [ -n "$CUSTOM_VENV" ]; then
VENV_DIR="$CUSTOM_VENV"
echo -e "${BLUE}Using custom virtual environment: $VENV_DIR${NC}"
elif [ "$BACKEND" = "nvidia" ]; then
VENV_DIR="venv_nvidia"
elif [ "$BACKEND" = "vulkan" ]; then
VENV_DIR="venv_vulkan"
elif [ "$BACKEND" = "vulkan-nvidia" ]; then
VENV_DIR="venv_vulkan_nvidia"
elif [ "$BACKEND" = "cuda" ]; then
VENV_DIR="venv_cuda"
elif [ "$BACKEND" = "opencl" ]; then
VENV_DIR="venv_opencl"
elif [ "$BACKEND" = "all" ]; then
VENV_DIR="venv_all"
fi
# Create virtual environment if it doesn't exist
if [ -n "$CUSTOM_VENV" ]; then
echo -e "${YELLOW}Creating custom virtual environment: $VENV_DIR${NC}"
else
echo -e "${YELLOW}Creating virtual environment: $VENV_DIR${NC}"
fi
if [ ! -d "$VENV_DIR" ]; then
python3 -m venv "$VENV_DIR"
echo -e "${GREEN}✓ Created virtual environment: $VENV_DIR${NC}"
else
echo -e "${YELLOW}Using existing virtual environment: $VENV_DIR${NC}"
fi
# Activate virtual environment
echo -e "${YELLOW}Activating virtual environment...${NC}"
source "$VENV_DIR/bin/activate"
# Force pip to use this venv and install packages
export PIP_NO_INPUT=1
export PIP_REQUIRE_VIRTUALENV=1
# Upgrade pip
echo -e "${YELLOW}Upgrading pip...${NC}"
pip install --upgrade pip
echo ""
echo -e "${BLUE}Installing dependencies for $BACKEND backend...${NC}"
echo ""
if [ "$BACKEND" = "nvidia" ]; then
# NVIDIA/CUDA backend
echo -e "${YELLOW}Installing PyTorch with CUDA support...${NC}"
pip install "torch>=2.0.0" "torchvision>=0.15.0" "torchaudio>=2.0.0"
echo -e "${YELLOW}Installing NVIDIA-specific requirements...${NC}"
pip install -r requirements-nvidia.txt || {
echo -e "${YELLOW}Warning: Some NVIDIA packages failed to install${NC}"
}
# Install Flash Attention 2 if requested
if [ "$FLASH" = true ]; then
echo ""
echo -e "${YELLOW}Installing Flash Attention 2...${NC}"
echo -e "${YELLOW}This may take several minutes and requires CUDA 11.6+${NC}"
MAX_JOBS=5 NVCC_THREADS=2 pip install flash-attn --no-build-isolation || {
echo -e "${RED}Warning: Flash Attention 2 installation failed${NC}"
echo -e "${YELLOW}Requirements: CUDA 11.6+, Linux, Ampere/Ada/Hopper GPU${NC}"
echo -e "${YELLOW}Continuing without Flash Attention...${NC}"
}
fi
echo ""
echo -e "${GREEN}========================================${NC}"
echo -e "${GREEN} NVIDIA/CUDA build complete!${NC}"
echo -e "${GREEN}========================================${NC}"
echo ""
echo "Usage:"
echo " source $VENV_DIR/bin/activate"
echo " python coderai --model <huggingface-model-name>"
if [ "$FLASH" = true ]; then
echo ""
echo "Flash Attention 2 enabled - use --flash-attn flag when running"
fi
echo ""
echo "Example:"
echo " python coderai --model microsoft/DialoGPT-medium"
echo ""
echo -e "${GREEN}========================================${NC}"
echo -e "${GREEN} NVIDIA/CUDA build complete!${NC}"
echo -e "${GREEN}========================================${NC}"
echo ""
echo "Usage:"
echo " source $VENV_DIR/bin/activate"
echo " python coderai --model <huggingface-model-name>"
echo ""
echo "Example:"
echo " python coderai --model microsoft/DialoGPT-medium"
echo ""
elif [ "$BACKEND" = "vulkan" ]; then
# Vulkan backend (all GPUs)
echo -e "${YELLOW}Installing llama-cpp-python with Vulkan support (all GPUs)...${NC}"
# Check for required Vulkan development libraries
if ! pkg-config --exists vulkan 2>/dev/null; then
echo -e "${YELLOW}Warning: Vulkan development libraries not found via pkg-config${NC}"
echo -e "${YELLOW}You may need to install Vulkan drivers and SDK:${NC}"
echo " Debian/Ubuntu: sudo apt install libvulkan-dev vulkan-tools"
echo " Fedora: sudo dnf install vulkan-loader-devel vulkan-tools"
echo " Arch: sudo pacman -S vulkan-headers vulkan-icd-loader"
echo ""
echo -e "${YELLOW}Attempting installation anyway...${NC}"
fi
# Check for glslc (Vulkan shader compiler)
GLSLC_CMD=""
if command -v glslc &> /dev/null; then
GLSLC_CMD="glslc"
elif command -v glslangValidator &> /dev/null; then
GLSLC_CMD="glslangValidator"
fi
if [ -z "$GLSLC_CMD" ]; then
echo -e "${YELLOW}Warning: glslc/glslangValidator not found in PATH${NC}"
else
echo -e "${GREEN}✓ Found Vulkan shader compiler: $GLSLC_CMD${NC}"
fi
# Build with Vulkan support
echo -e "${YELLOW}Building llama-cpp-python with Vulkan support...${NC}"
CMAKE_ARGS="-DGGML_VULKAN=ON" pip install --upgrade llama-cpp-python --no-cache-dir || {
echo -e "${RED}Build failed!${NC}"
exit 1
}
echo -e "${YELLOW}Installing Vulkan-specific requirements...${NC}"
pip install -r requirements-vulkan.txt
# Build whispercpp Python package with Vulkan support for GPU-accelerated audio transcription
echo -e "${YELLOW}Building whispercpp with Vulkan support for GPU-accelerated transcription...${NC}"
# First, uninstall any existing whispercpp (pip version doesn't have Vulkan)
pip uninstall -y whispercpp 2>/dev/null || true
# Clone and build whisper.cpp with Vulkan for Python bindings
WHISPERCPP_DIR="$HOME/whisper.cpp"
if [ ! -d "$WHISPERCPP_DIR" ]; then
echo "Cloning whisper.cpp..."
git clone --depth 1 https://github.com/ggerganov/whisper.cpp "$WHISPERCPP_DIR" 2>/dev/null || {
echo -e "${YELLOW}Warning: Could not clone whisper.cpp${NC}"
}
fi
if [ -d "$WHISPERCPP_DIR/bindings/python" ]; then
cd "$WHISPERCPP_DIR/bindings/python"
# Build with Vulkan support
# Set CMAKE_ARGS to enable Vulkan for ggml (whisper uses ggml library internally)
CMAKE_ARGS="-DWHISPER_VULKAN=ON -DGGML_VULKAN=ON" pip install . --no-cache-dir --force-reinstall 2>/dev/null || {
# If Vulkan build fails, try without (will fall back to CPU)
echo -e "${YELLOW}Warning: whispercpp Vulkan build failed, will use CPU${NC}"
pip install . --no-cache-dir --force-reinstall 2>/dev/null || {
echo -e "${YELLOW}Warning: Could not install whispercpp at all${NC}"
}
}
cd "$OLDPWD"
echo -e "${GREEN}✓ whispercpp with Vulkan support installed!${NC}"
else
echo -e "${YELLOW}Warning: whisper.cpp Python bindings not found${NC}"
fi
# Also build the main whisper.cpp C++ with Vulkan for standalone usage
echo -e "${YELLOW}Building whisper.cpp C++ with Vulkan support (optional)...${NC}"
WHISPER_DIR="$HOME/whisper.cpp"
if [ -d "$WHISPER_DIR" ]; then
echo "Using existing whisper.cpp installation"
else
echo "Cloning whisper.cpp..."
git clone https://github.com/ggerganov/whisper.cpp "$WHISPER_DIR" 2>/dev/null || {
echo -e "${YELLOW}Warning: Could not clone whisper.cpp. Audio transcription will use CPU.${NC}"
}
fi
if [ -d "$WHISPER_DIR" ]; then
cd "$WHISPER_DIR"
mkdir -p build 2>/dev/null
cd build
cmake -DGGML_VULKAN=ON .. >/dev/null 2>&1 || {
echo -e "${YELLOW}Warning: Vulkan build failed, building with OpenBLAS${NC}"
cmake -DBUILD_SHARED_LIBS=ON .. >/dev/null 2>&1
}
make -j$(nproc) >/dev/null 2>&1 || {
echo -e "${YELLOW}Warning: Build failed. Audio transcription will use CPU.${NC}"
}
cd "$OLDPWD"
if [ ! -f "$WHISPER_DIR/models/ggml-base.bin" ]; then
echo "Downloading Whisper base model..."
bash "$WHISPER_DIR/models/download-ggml-model.sh" base 2>/dev/null || {
echo -e "${YELLOW}Warning: Could not download Whisper model.${NC}"
}
fi
echo -e "${GREEN}✓ whisper.cpp ready for audio transcription!${NC}"
fi
echo ""
echo -e "${GREEN}========================================${NC}"
echo -e "${GREEN} Vulkan build complete!${NC}"
echo -e "${GREEN}========================================${NC}"
echo ""
echo "Usage:"
echo " python coderai --model <gguf-model> --backend vulkan"
echo ""
elif [ "$BACKEND" = "vulkan-nvidia" ]; then
# Vulkan backend (NVIDIA only)
echo -e "${YELLOW}Installing llama-cpp-python with Vulkan support (NVIDIA-only)...${NC}"
# Check for required Vulkan development libraries
if ! pkg-config --exists vulkan 2>/dev/null; then
echo -e "${YELLOW}Warning: Vulkan development libraries not found via pkg-config${NC}"
fi
# Check for glslc (Vulkan shader compiler)
GLSLC_CMD=""
if command -v glslc &> /dev/null; then
GLSLC_CMD="glslc"
elif command -v glslangValidator &> /dev/null; then
GLSLC_CMD="glslangValidator"
fi
if [ -z "$GLSLC_CMD" ]; then
echo -e "${YELLOW}Warning: glslc/glslangValidator not found in PATH${NC}"
else
echo -e "${GREEN}✓ Found Vulkan shader compiler: $GLSLC_CMD${NC}"
fi
# Build with Vulkan support
# Note: llama.cpp doesn't have a compile-time option to disable specific GPUs
# The device selection happens at runtime via environment variables
echo -e "${YELLOW}Building llama-cpp-python with Vulkan support...${NC}"
CMAKE_ARGS="-DGGML_VULKAN=ON" pip install --upgrade llama-cpp-python --no-cache-dir || {
echo -e "${RED}Build failed!${NC}"
exit 1
}
echo -e "${YELLOW}Installing Vulkan-specific requirements...${NC}"
pip install -r requirements-vulkan.txt
# Build whispercpp Python package with Vulkan support for GPU-accelerated audio transcription
echo -e "${YELLOW}Building whispercpp with Vulkan support for GPU-accelerated transcription...${NC}"
pip uninstall -y whispercpp 2>/dev/null || true
WHISPERCPP_DIR="$HOME/whisper.cpp"
if [ ! -d "$WHISPERCPP_DIR" ]; then
git clone --depth 1 https://github.com/ggerganov/whisper.cpp "$WHISPERCPP_DIR" 2>/dev/null || true
fi
if [ -d "$WHISPERCPP_DIR/bindings/python" ]; then
cd "$WHISPERCPP_DIR/bindings/python"
CMAKE_ARGS="-DWHISPER_VULKAN=ON -DGGML_VULKAN=ON" pip install . --no-cache-dir --force-reinstall 2>/dev/null || {
pip install . --no-cache-dir --force-reinstall 2>/dev/null || true
}
cd "$OLDPWD"
echo -e "${GREEN}✓ whispercpp with Vulkan support installed!${NC}"
fi
echo ""
echo -e "${GREEN}========================================${NC}"
echo -e "${GREEN} Vulkan (NVIDIA-only) build complete!${NC}"
echo -e "${GREEN}========================================${NC}"
echo ""
echo "Usage:"
echo " VK_ICD_FILENAMES=/usr/share/vulkan/icd.d/nvidia_icd.json \\"
echo " python coderai --model <gguf-model> --backend vulkan"
echo ""
echo "Note: This build includes both AMD and NVIDIA Vulkan support."
echo " At runtime, use VK_ICD_FILENAMES to select only NVIDIA."
echo ""
elif [ "$BACKEND" = "cuda" ]; then
# llama-cpp-python with CUDA backend (NVIDIA only)
echo -e "${YELLOW}Installing llama-cpp-python with CUDA support...${NC}"
# Check for CUDA toolkit
if ! command -v nvcc &> /dev/null; then
echo -e "${YELLOW}Warning: CUDA toolkit (nvcc) not found in PATH${NC}"
echo -e "${YELLOW}You may need to install CUDA toolkit:${NC}"
echo " Download from: https://developer.nvidia.com/cuda-downloads"
else
CUDA_VERSION=$(nvcc --version | grep "release" | sed -n 's/.*release \([0-9.]*\),.*/\1/p')
echo -e "${GREEN}✓ Found CUDA $CUDA_VERSION${NC}"
fi
# Check for CUDA libraries
if [ -d "/usr/local/cuda" ]; then
echo -e "${GREEN}✓ Found CUDA at /usr/local/cuda${NC}"
fi
# Build llama-cpp-python with CUDA support
echo -e "${YELLOW}Building llama-cpp-python with CUDA support...${NC}"
echo -e "${YELLOW}This may take several minutes...${NC}"
CMAKE_ARGS="-DGGML_CUDA=ON" pip install --upgrade llama-cpp-python --no-cache-dir || {
echo ""
echo -e "${RED}Build failed!${NC}"
echo -e "${YELLOW}Make sure CUDA toolkit is installed:${NC}"
echo " sudo apt install cuda-toolkit-12"
echo " or"
echo " Download from: https://developer.nvidia.com/cuda-downloads"
exit 1
}
echo -e "${YELLOW}Installing Vulkan-specific requirements...${NC}"
pip install -r requirements-vulkan.txt
echo ""
echo -e "${GREEN}========================================${NC}"
echo -e "${GREEN} llama-cpp-python CUDA build complete!${NC}"
echo -e "${GREEN}========================================${NC}"
echo ""
echo "Usage:"
echo " source $VENV_DIR/bin/activate"
echo " python coderai --model <gguf-model> --backend vulkan --vulkan-device 0"
echo ""
echo "Note: With CUDA backend, llama-cpp-python will only use NVIDIA GPUs."
echo ""
elif [ "$BACKEND" = "opencl" ]; then
# stable-diffusion-cpp-python with OpenCL backend
echo -e "${YELLOW}Installing stable-diffusion-cpp-python with OpenCL support...${NC}"
# Check for OpenCL
if ! command -v clinfo &> /dev/null && ! ls /usr/lib/*/libOpenCL* &> /dev/null; then
echo -e "${YELLOW}Warning: OpenCL not found in system${NC}"
echo -e "${YELLOW}You may need to install OpenCL runtime:${NC}"
echo " Debian/Ubuntu: sudo apt install ocl-icd-opencl-dev"
echo " Fedora: sudo dnf install ocl-icd-devel"
else
echo -e "${GREEN}✓ Found OpenCL${NC}"
fi
# Install base requirements
echo -e "${YELLOW}Installing base requirements...${NC}"
pip install -r requirements.txt
# Install stable-diffusion-cpp-python with OpenCL (disable WebM to avoid libwebm cmake issue)
echo -e "${YELLOW}Installing stable-diffusion-cpp-python with OpenCL...${NC}"
CMAKE_ARGS="$SD_CMAKE_ARGS" pip install stable-diffusion-cpp-python || {
echo ""
echo -e "${YELLOW}Note: If stable-diffusion-cpp-python is not available with pip,${NC}"
echo -e "${YELLOW}you may need to build from source.${NC}"
}
# Install additional requirements for OpenCL
echo -e "${YELLOW}Installing additional requirements for OpenCL...${NC}"
pip install numpy pillow
echo ""
echo -e "${GREEN}========================================${NC}"
echo -e "${GREEN} OpenCL build complete!${NC}"
echo -e "${GREEN}========================================${NC}"
echo ""
echo "Usage:"
echo " source $VENV_DIR/bin/activate"
echo " python coderai --model <model> --image-backend opencl"
echo ""
echo "Note: With OpenCL backend, stable-diffusion-cpp-python can use various GPUs."
echo ""
elif [ "$BACKEND" = "all" ]; then
# Install ALL backends: nvidia (CUDA), vulkan, opencl, and cpu
echo -e "${BLUE}========================================${NC}"
echo -e "${BLUE} Installing ALL backends${NC}"
echo -e "${BLUE} (NVIDIA/CUDA, Vulkan, OpenCL, CPU)${NC}"
echo -e "${BLUE}========================================${NC}"
echo ""
# Install base requirements
echo -e "${YELLOW}Installing base requirements...${NC}"
pip install --upgrade pip
# Install requirements with error handling for problematic packages
echo -e "${YELLOW}Installing core dependencies...${NC}"
pip install -r requirements.txt || {
echo -e "${YELLOW}Some packages failed to install, trying individually...${NC}"
# Install core packages that should always work
pip install fastapi uvicorn pydantic requests python-multipart psutil || {
echo -e "${RED}Failed to install core dependencies${NC}"
exit 1
}
# Try optional packages individually
echo -e "${YELLOW}Installing optional packages...${NC}"
pip install transformers accelerate diffusers safetensors || echo -e "${YELLOW}Warning: Some ML packages failed${NC}"
pip install faster-whisper || echo -e "${YELLOW}Warning: faster-whisper failed${NC}"
pip install whispercpp || echo -e "${YELLOW}Warning: whispercpp failed${NC}"
pip install litellm || echo -e "${YELLOW}Warning: litellm failed${NC}"
pip install setproctitle || echo -e "${YELLOW}Warning: setproctitle failed (optional)${NC}"
# Try stable-diffusion-cpp-python (disable WebM to avoid missing libwebm cmake submodule)
CMAKE_ARGS="$SD_CMAKE_ARGS" pip install stable-diffusion-cpp-python || echo -e "${YELLOW}Warning: stable-diffusion-cpp-python failed (optional)${NC}"
}
# Install PyTorch with CUDA support (for nvidia backend)
echo -e "${YELLOW}Installing PyTorch with CUDA support (NVIDIA backend)...${NC}"
pip install torch torchvision torchaudio || {
echo -e "${YELLOW}Warning: PyTorch installation failed, will try CPU version${NC}"
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu || {
echo -e "${RED}Failed to install PyTorch${NC}"
exit 1
}
}
echo -e "${YELLOW}Installing NVIDIA-specific requirements...${NC}"
pip install -r requirements-nvidia.txt || {
echo -e "${YELLOW}Warning: Some NVIDIA packages failed to install${NC}"
}
# Check for Vulkan development libraries
VULKAN_AVAILABLE=false
if pkg-config --exists vulkan 2>/dev/null; then
VULKAN_AVAILABLE=true
echo -e "${GREEN}✓ Found Vulkan development libraries${NC}"
else
echo -e "${YELLOW}Warning: Vulkan development libraries not found${NC}"
echo -e "${YELLOW} Vulkan support will be limited${NC}"
fi
# Check for CUDA
CUDA_AVAILABLE=false
if command -v nvcc &> /dev/null || [ -d "/usr/local/cuda" ]; then
CUDA_AVAILABLE=true
echo -e "${GREEN}✓ Found CUDA toolkit${NC}"
else
echo -e "${YELLOW}Warning: CUDA toolkit not found${NC}"
echo -e "${YELLOW} CUDA support will be limited${NC}"
fi
# Check for OpenCL
OPENCL_AVAILABLE=false
if command -v clinfo &> /dev/null || ls /usr/lib/*/libOpenCL* &> /dev/null 2>&1; then
OPENCL_AVAILABLE=true
echo -e "${GREEN}✓ Found OpenCL${NC}"
else
echo -e "${YELLOW}Warning: OpenCL not found${NC}"
echo -e "${YELLOW} OpenCL support will be limited${NC}"
fi
# Build llama-cpp-python with both CUDA and Vulkan support
echo -e "${YELLOW}Building llama-cpp-python with CUDA and Vulkan support...${NC}"
echo -e "${YELLOW}This may take several minutes...${NC}"
# Determine CMAKE_ARGS based on available hardware
CMAKE_ARGS=""
if [ "$CUDA_AVAILABLE" = true ]; then
CMAKE_ARGS="-DGGML_CUDA=ON"
echo -e "${GREEN} ✓ Enabling CUDA support${NC}"
fi
if [ "$VULKAN_AVAILABLE" = true ]; then
if [ -n "$CMAKE_ARGS" ]; then
CMAKE_ARGS="$CMAKE_ARGS -DGGML_VULKAN=ON"
else
CMAKE_ARGS="-DGGML_VULKAN=ON"
fi
echo -e "${GREEN} ✓ Enabling Vulkan support${NC}"
fi
if [ -n "$CMAKE_ARGS" ]; then
echo -e "${YELLOW} Building with: $CMAKE_ARGS${NC}"
CMAKE_ARGS="$CMAKE_ARGS" pip install --upgrade llama-cpp-python --no-cache-dir || {
echo -e "${YELLOW}Warning: llama-cpp-python build failed, installing from pip${NC}"
pip install llama-cpp-python
}
else
echo -e "${YELLOW}Warning: No GPU backends available, installing CPU version${NC}"
pip install llama-cpp-python
fi
# Install Vulkan-specific requirements
echo -e "${YELLOW}Installing Vulkan-specific requirements...${NC}"
pip install -r requirements-vulkan.txt || {
echo -e "${YELLOW}Warning: Some Vulkan packages failed to install${NC}"
}
# Try to install stable-diffusion-cpp-python with OpenCL
if [ "$OPENCL_AVAILABLE" = true ]; then
echo -e "${YELLOW}Installing stable-diffusion-cpp-python with OpenCL support...${NC}"
CMAKE_ARGS="$SD_CMAKE_ARGS" pip install stable-diffusion-cpp-python || {
echo -e "${YELLOW}Warning: stable-diffusion-cpp-python not available (requires CMake and build tools)${NC}"
}
else
echo -e "${YELLOW}Skipping OpenCL (stable-diffusion-cpp-python) - OpenCL not available${NC}"
fi
# Install additional requirements
echo -e "${YELLOW}Installing additional requirements...${NC}"
pip install numpy pillow || {
echo -e "${YELLOW}Warning: Some additional packages failed${NC}"
}
# Install setproctitle for process naming (Python 3.13 compatible)
echo -e "${YELLOW}Installing setproctitle...${NC}"
pip install setproctitle || {
echo -e "${YELLOW}Note: setproctitle failed to install (optional package, not critical)${NC}"
}
# Install Flash Attention 2 if requested and CUDA is available
if [ "$FLASH" = true ] && [ "$CUDA_AVAILABLE" = true ]; then
echo ""
echo -e "${YELLOW}Installing Flash Attention 2...${NC}"
echo -e "${YELLOW}This may take several minutes and requires CUDA 11.6+${NC}"
MAX_JOBS=6 pip install flash-attn --no-build-isolation || {
echo -e "${RED}Warning: Flash Attention 2 installation failed${NC}"
echo -e "${YELLOW}Requirements: CUDA 11.6+, Linux, Ampere/Ada/Hopper GPU${NC}"
echo -e "${YELLOW}Continuing without Flash Attention...${NC}"
}
elif [ "$FLASH" = true ]; then
echo -e "${YELLOW}Warning: Flash Attention 2 requires CUDA backend${NC}"
echo -e "${YELLOW}Skipping Flash Attention installation${NC}"
fi
echo ""
echo -e "${GREEN}========================================${NC}"
echo -e "${GREEN} ALL backends build complete!${NC}"
echo -e "${GREEN}========================================${NC}"
echo ""
echo "Available backends:"
[ "$CUDA_AVAILABLE" = true ] && echo " ✓ NVIDIA/CUDA (PyTorch)"
[ "$CUDA_AVAILABLE" = true ] && echo " ✓ CUDA (llama-cpp-python)"
[ "$VULKAN_AVAILABLE" = true ] && echo " ✓ Vulkan (llama-cpp-python)"
[ "$OPENCL_AVAILABLE" = true ] && echo " ✓ OpenCL (stable-diffusion-cpp-python)"
echo " ✓ CPU (fallback for all)"
if [ "$FLASH" = true ] && [ "$CUDA_AVAILABLE" = true ]; then
echo ""
echo " ✓ Flash Attention 2 (NVIDIA)"
fi
echo ""
echo "Usage:"
echo " source $VENV_DIR/bin/activate"
echo ""
echo " # For text models with NVIDIA:"
echo " python coderai --model <model> --backend nvidia"
if [ "$FLASH" = true ]; then
echo " python coderai --model <model> --backend nvidia --flash-attn"
fi
echo ""
echo " # For GGUF models with CUDA:"
echo " python coderai --model <gguf-model> --backend vulkan"
echo ""
echo " # For GGUF models with Vulkan:"
echo " python coderai --model <gguf-model> --backend vulkan"
echo ""
echo " # For image generation with OpenCL:"
echo " python coderai --model <model> --image-backend opencl"
echo ""
fi
# Create .backend file to track which backend was used
echo "$BACKEND" > .backend
echo -e "${GREEN}Build completed successfully!${NC}"
echo ""
echo "To activate the environment in the future, run:"
echo " source $VENV_DIR/bin/activate"
......@@ -65,23 +65,37 @@ class SessionManager:
self.config_dir = config_dir
self.secret = get_or_create_secret(config_dir)
self.session_timeout = timedelta(minutes=session_timeout_minutes)
self._lock = __import__('threading').Lock()
def _load_auth_data(self) -> Dict[str, Any]:
"""Load auth.json data."""
auth_path = self.config_dir / "auth.json"
if auth_path.exists():
with open(auth_path, 'r') as f:
return json.load(f)
try:
with open(auth_path, 'r') as f:
content = f.read()
if content.strip():
return json.loads(content)
except (json.JSONDecodeError, OSError):
pass
return {"users": [], "tokens": [], "sessions": {}}
def _save_auth_data(self, auth_data: Dict[str, Any]):
"""Save auth.json data."""
"""Save auth.json data atomically."""
auth_path = self.config_dir / "auth.json"
# Atomic write
temp_path = auth_path.with_suffix('.tmp')
with open(temp_path, 'w') as f:
json.dump(auth_data, f, indent=2)
temp_path.replace(auth_path)
with self._lock:
import os, tempfile
fd, tmp = tempfile.mkstemp(dir=str(self.config_dir), suffix='.tmp')
try:
with os.fdopen(fd, 'w') as f:
json.dump(auth_data, f, indent=2)
os.replace(tmp, str(auth_path))
except Exception:
try:
os.unlink(tmp)
except OSError:
pass
raise
def create_session(self, username: str) -> str:
"""Create a new session for a user.
......
......@@ -3,10 +3,14 @@ from pathlib import Path
from typing import Optional
from fastapi import APIRouter, Request, Response, Form, HTTPException, Depends
from fastapi.responses import HTMLResponse, RedirectResponse, JSONResponse
from fastapi.responses import HTMLResponse, RedirectResponse, JSONResponse, StreamingResponse
from fastapi.templating import Jinja2Templates
from codai.admin.auth import SessionManager
import queue as _q
import threading as _t
import uuid as _uuid
import json as _j
router = APIRouter()
......@@ -17,6 +21,9 @@ templates = Jinja2Templates(directory=str(templates_dir))
# Session manager (will be initialized in main.py)
session_manager: Optional[SessionManager] = None
config_manager = None # set via set_config_manager()
_download_sessions: dict = {}
_download_status: dict = {} # session_id → latest progress state (survives SSE disconnect)
def init_session_manager(config_dir: Path):
......@@ -25,6 +32,12 @@ def init_session_manager(config_dir: Path):
session_manager = SessionManager(config_dir)
def set_config_manager(mgr):
"""Set the shared ConfigManager instance."""
global config_manager
config_manager = mgr
def get_current_user(request: Request) -> Optional[str]:
"""Get the current logged-in user from session cookie."""
if session_manager is None:
......@@ -65,10 +78,7 @@ async def login_page(request: Request):
if username:
return RedirectResponse(url="/admin", status_code=302)
return templates.TemplateResponse("login.html", {
"request": request,
"error": None
})
return templates.TemplateResponse(request, "login.html", {"error": None})
@router.post("/login")
......@@ -80,14 +90,11 @@ async def login(
"""Handle login form submission."""
if session_manager is None:
raise HTTPException(status_code=500, detail="Session manager not initialized")
session_cookie = session_manager.authenticate(username, password)
if not session_cookie:
return templates.TemplateResponse("login.html", {
"request": request,
"error": "Invalid username or password"
})
return templates.TemplateResponse(request, "login.html", {"error": "Invalid username or password"})
# Check if must change password
must_change = session_cookie.endswith(".MUST_CHANGE")
......@@ -123,110 +130,82 @@ async def logout(request: Request):
@router.get("/admin/change-password", response_class=HTMLResponse)
async def change_password_page(request: Request, username: str = Depends(require_auth)):
"""Display password change page."""
user = session_manager.get_user(username)
must_change = user.get("must_change_password", False) if user else False
return templates.TemplateResponse("change_password.html", {
"request": request,
return templates.TemplateResponse(request, "change_password.html", {
"username": username,
"must_change": must_change,
"error": None
"is_admin": session_manager.is_admin(username),
"error": None,
})
@router.post("/admin/change-password")
async def change_password(
request: Request,
old_password: str = Form(...),
old_password: Optional[str] = Form(None),
new_password: str = Form(...),
confirm_password: str = Form(...),
username: str = Depends(require_auth)
):
"""Handle password change."""
if new_password != confirm_password:
return templates.TemplateResponse("change_password.html", {
"request": request,
"username": username,
"must_change": False,
"error": "Passwords do not match"
user = session_manager.get_user(username)
is_admin = session_manager.is_admin(username)
must_change = user.get("must_change_password", False) if user else False
def render_error(msg: str):
return templates.TemplateResponse(request, "change_password.html", {
"username": username, "must_change": must_change,
"is_admin": is_admin, "error": msg,
})
if new_password != confirm_password:
return render_error("Passwords do not match")
if len(new_password) < 8:
return templates.TemplateResponse("change_password.html", {
"request": request,
"username": username,
"must_change": False,
"error": "Password must be at least 8 characters"
})
# Check if this is a forced change (first login)
user = session_manager.get_user(username)
if user and user.get("must_change_password"):
# Force change without verifying old password
return render_error("Password must be at least 8 characters")
if must_change:
success = session_manager.force_password_change(username, new_password)
else:
if not old_password:
return render_error("Current password is required")
success = session_manager.change_password(username, old_password, new_password)
if not success:
return templates.TemplateResponse("change_password.html", {
"request": request,
"username": username,
"must_change": False,
"error": "Current password is incorrect"
})
return render_error("Current password is incorrect")
return RedirectResponse(url="/admin", status_code=302)
@router.get("/admin", response_class=HTMLResponse)
async def admin_dashboard(request: Request, username: str = Depends(require_auth)):
"""Display admin dashboard."""
is_admin = session_manager.is_admin(username)
return templates.TemplateResponse("dashboard.html", {
"request": request,
"username": username,
"is_admin": is_admin
return templates.TemplateResponse(request, "dashboard.html", {
"username": username, "is_admin": is_admin,
})
@router.get("/admin/models", response_class=HTMLResponse)
async def models_page(request: Request, username: str = Depends(require_admin)):
"""Display models management page."""
return templates.TemplateResponse("models.html", {
"request": request,
"username": username
})
return templates.TemplateResponse(request, "models.html", {"username": username, "is_admin": True})
@router.get("/admin/tokens", response_class=HTMLResponse)
async def tokens_page(request: Request, username: str = Depends(require_admin)):
"""Display API tokens management page."""
return templates.TemplateResponse("tokens.html", {
"request": request,
"username": username
})
return templates.TemplateResponse(request, "tokens.html", {"username": username, "is_admin": True})
@router.get("/admin/users", response_class=HTMLResponse)
async def users_page(request: Request, username: str = Depends(require_admin)):
"""Display users management page."""
users = session_manager.list_users()
return templates.TemplateResponse("users.html", {
"request": request,
"username": username,
"users": users
return templates.TemplateResponse(request, "users.html", {
"username": username, "is_admin": True, "users": users,
})
@router.get("/chat", response_class=HTMLResponse)
async def chat_page(request: Request, username: str = Depends(require_auth)):
"""Display chat interface."""
return templates.TemplateResponse("chat.html", {
"request": request,
"username": username
return templates.TemplateResponse(request, "chat.html", {
"username": username, "is_admin": session_manager.is_admin(username),
})
......@@ -234,12 +213,62 @@ async def chat_page(request: Request, username: str = Depends(require_auth)):
@router.get("/admin/api/status")
async def api_status(username: str = Depends(require_auth)):
"""Get system status."""
# TODO: Implement actual status gathering
from codai.models.manager import multi_model_manager
from codai.api.state import get_load_mode
loaded_keys = list(multi_model_manager.models.keys())
# VRAM info
vram = None
try:
import torch
if torch.cuda.is_available():
free, total = torch.cuda.mem_get_info()
used = total - free
vram = {"used": round(used / 1e9, 2), "total": round(total / 1e9, 2)}
except Exception:
pass
# Request stats from queue manager
req_total = 0
req_active = 0
try:
from codai.queue.manager import queue_manager
req_active = 1 if queue_manager._processing else 0
except Exception:
pass
# Backend info
backend = "—"
try:
from codai.models.manager import model_manager
if model_manager.backend_type:
backend = model_manager.backend_type
except Exception:
pass
# Enabled (configured) models
enabled_models = []
try:
if config_manager:
md = config_manager.models_data
for cat in ("text_models", "image_models", "audio_models", "vision_models", "tts_models"):
for m in md.get(cat, []):
mid = (m.get("path") or m.get("id") or m) if isinstance(m, dict) else m
if mid and mid not in enabled_models:
enabled_models.append(mid)
except Exception:
pass
return {
"status": "ok",
"backend": "auto",
"models_loaded": 0,
"uptime": "0h 0m"
"backend": backend,
"load_mode": get_load_mode(),
"models_loaded": len(loaded_keys),
"loaded_models": loaded_keys,
"enabled_models": enabled_models,
"vram": vram,
"requests": {"total": req_total, "active": req_active},
}
......@@ -359,10 +388,194 @@ async def api_delete_token(token_id: int, username: str = Depends(require_admin)
async def api_list_models(username: str = Depends(require_admin)):
"""List all configured models with details."""
models_data = session_manager._load_auth_data() # TODO: move to ModelManager
# For now, load from models file directly
models_path = Path.cwd() / "codai" / "admin" / "templates" # hack
# Actually use config_mgr
pass
from codai.models.manager import multi_model_manager
try:
return multi_model_manager.list_models()
except Exception:
return []
def _make_tqdm_class(pq, status=None):
"""Return a tqdm-compatible class that forwards progress events to pq and optionally updates a status dict."""
import time as _time
class _PQTqdm:
def __init__(self, iterable=None, desc=None, total=None, initial=0, **kwargs):
self.iterable = iterable
self.desc = str(desc or 'downloading')
self.total = int(total) if total else 0
self.n = int(initial) if initial else 0
self._start = _time.time()
if self.total:
pq.put({"type": "start", "filename": self.desc, "total": self.total})
if status is not None:
status.update({"status": "downloading", "filename": self.desc,
"total": self.total, "downloaded": self.n, "percent": 0})
def update(self, n=1):
self.n += n
elapsed = (_time.time() - self._start) or 0.001
rate = self.n / elapsed
eta = (self.total - self.n) / rate if rate and self.total else None
pct = round(self.n / self.total * 100, 1) if self.total else 0
evt = {
"type": "progress",
"filename": self.desc,
"downloaded": self.n,
"total": self.total,
"percent": pct,
"rate": round(rate),
"eta": round(eta) if eta is not None else None,
}
pq.put(evt)
if status is not None:
status.update({"status": "downloading", "filename": self.desc,
"percent": pct, "rate": round(rate), "eta": evt["eta"],
"downloaded": self.n, "total": self.total})
def close(self): pass
def refresh(self, nolock=False, lock_args=None): pass
def clear(self, nolock=False): pass
def display(self, msg=None, pos=None): pass
def unpause(self): pass
def moveto(self, n): pass
def set_postfix(self, *a, **kw): pass
def set_description(self, desc=None, **kw):
if desc: self.desc = str(desc)
def set_postfix_str(self, *a, **kw): pass
def reset(self, total=None):
self.n = 0
self._start = _time.time()
if total is not None: self.total = int(total)
def __enter__(self): return self
def __exit__(self, *a): self.close()
def __iter__(self):
for obj in (self.iterable or []):
yield obj
def write(self, s, **kw):
pq.put({"type": "info", "message": str(s)})
monitor_interval = 0
monitor = None
_lock = None
@classmethod
def get_lock(cls):
import threading
if cls._lock is None:
cls._lock = threading.RLock()
return cls._lock
@classmethod
def set_lock(cls, lock):
cls._lock = lock
return _PQTqdm
def _run_download_thread(session_id: str, model_id: str, file_pattern: str, pq):
"""Background thread: download model via HF snapshot_download and stream progress events."""
import time
import os
status = {"session_id": session_id, "model_id": model_id, "status": "starting",
"percent": 0, "filename": "", "rate": 0, "eta": None}
_download_status[session_id] = status
def push(evt):
pq.put(evt)
t = evt.get("type")
if t == "start":
status.update({"status": "downloading", "filename": evt.get("filename", ""),
"total": evt.get("total", 0), "downloaded": 0, "percent": 0})
elif t == "progress":
status.update({"status": "downloading",
"filename": evt.get("filename", status.get("filename", "")),
"percent": evt.get("percent", 0), "rate": evt.get("rate", 0),
"eta": evt.get("eta"), "downloaded": evt.get("downloaded", 0),
"total": evt.get("total", 0)})
elif t == "done":
status.update({"status": "done", "percent": 100, "path": evt.get("path", "")})
elif t == "error":
status.update({"status": "error", "error": evt.get("message", "")})
elif t == "info":
status["last_info"] = evt.get("message", "")
try:
from codai.models.cache import is_huggingface_model_id
from huggingface_hub import snapshot_download
tqdm_cls = _make_tqdm_class(pq, status=status)
if is_huggingface_model_id(model_id):
if file_pattern:
# Convert suffix/quant pattern to fnmatch glob for allow_patterns
if file_pattern.startswith('.'):
allow = [f"*{file_pattern}"] # ".gguf" → "*.gguf"
elif '/' in file_pattern:
allow = [file_pattern] # exact subpath
else:
allow = [f"*{file_pattern}"] # "Q4_K_M.gguf" → "*Q4_K_M.gguf"
push({"type": "info", "message": f"Downloading {allow[0]} from {model_id}…"})
path = snapshot_download(model_id, allow_patterns=allow, tqdm_class=tqdm_cls)
else:
push({"type": "info", "message": f"Downloading full repository {model_id}…"})
path = snapshot_download(model_id, tqdm_class=tqdm_cls)
else:
# Direct URL download (non-HF source)
import requests as _req
import hashlib
from codai.models.cache import get_model_cache_dir
cache_dir = get_model_cache_dir()
url_path = model_id.split('?')[0]
filename = os.path.basename(url_path) or "model.bin"
url_hash = hashlib.sha256(model_id.encode()).hexdigest()
dest = os.path.join(cache_dir, f"{url_hash}_{filename}")
if os.path.exists(dest):
push({"type": "done", "path": dest})
return
resp = _req.get(model_id, stream=True, timeout=60, allow_redirects=True)
resp.raise_for_status()
total = int(resp.headers.get('content-length', 0))
push({"type": "start", "filename": filename, "total": total})
downloaded = 0
start_t = time.time()
last_evt = 0.0
with open(dest, 'wb') as f:
for chunk in resp.iter_content(chunk_size=524288):
if chunk:
f.write(chunk)
downloaded += len(chunk)
now = time.time()
if now - last_evt >= 0.25:
last_evt = now
elapsed = (now - start_t) or 0.001
rate = downloaded / elapsed
eta = (total - downloaded) / rate if rate and total else None
push({
"type": "progress", "filename": filename,
"downloaded": downloaded, "total": total,
"percent": round(downloaded / total * 100, 1) if total else 0,
"rate": round(rate),
"eta": round(eta) if eta is not None else None,
})
path = dest
push({"type": "done", "path": str(path)})
except Exception as exc:
push({"type": "error", "message": str(exc)})
finally:
def _gc():
time.sleep(300)
_download_sessions.pop(session_id, None)
_download_status.pop(session_id, None)
_t.Thread(target=_gc, daemon=True).start()
@router.post("/admin/api/model-download")
......@@ -370,35 +583,60 @@ async def api_download_model(
request: Request,
username: str = Depends(require_admin)
):
"""Download a model from HuggingFace."""
"""Start a background download; returns session_id for SSE progress streaming."""
data = await request.json()
model_id = data.get("model_id")
file_pattern = data.get("file_pattern")
file_pattern = (data.get("file_pattern") or "").strip()
if not model_id:
raise HTTPException(status_code=400, detail="Model ID required")
from codai.models.cache import download_model, is_huggingface_model_id
try:
if is_huggingface_model_id(model_id):
if file_pattern:
cached = download_model(model_id, file_pattern=file_pattern)
else:
cached = download_model(model_id, file_pattern='.gguf')
if not cached:
# Download full repo
from huggingface_hub import snapshot_download
cached = snapshot_download(model_id)
else:
cached = download_model(model_id, file_pattern=file_pattern or '.gguf')
if cached:
return {"success": True, "path": cached}
else:
raise HTTPException(status_code=500, detail="Download failed")
except Exception as e:
raise HTTPException(status_code=500, detail=f"Error: {str(e)}")
session_id = str(_uuid.uuid4())
pq = _q.Queue()
_download_sessions[session_id] = pq
_t.Thread(
target=_run_download_thread,
args=(session_id, model_id, file_pattern, pq),
daemon=True,
).start()
return {"session_id": session_id}
@router.get("/admin/api/download-stream/{session_id}")
async def api_download_stream(
session_id: str,
request: Request,
username: str = Depends(require_admin),
):
"""Server-Sent Events stream for download progress."""
import asyncio
pq = _download_sessions.get(session_id)
if pq is None:
raise HTTPException(status_code=404, detail="Download session not found")
async def _generate():
loop = asyncio.get_event_loop()
while True:
try:
evt = await loop.run_in_executor(None, lambda: pq.get(timeout=2))
yield f"data: {_j.dumps(evt)}\n\n"
if evt.get("type") in ("done", "error"):
break
except _q.Empty:
yield 'data: {"type":"keepalive"}\n\n'
return StreamingResponse(
_generate(),
media_type="text/event-stream",
headers={
"Cache-Control": "no-cache",
"X-Accel-Buffering": "no",
"Connection": "keep-alive",
},
)
@router.delete("/admin/api/models/{model_identifier}")
......@@ -418,6 +656,548 @@ async def api_delete_model(
raise HTTPException(status_code=500, detail=str(e))
# --- Download status / cache management ---
@router.get("/admin/api/downloads")
async def api_list_downloads(username: str = Depends(require_admin)):
"""Return status of all active and recently completed download sessions."""
return list(_download_status.values())
@router.post("/admin/api/model-upload")
async def api_model_upload(request: Request, username: str = Depends(require_admin)):
"""Upload a GGUF model file in chunks."""
from codai.models.cache import get_model_cache_dir
import tempfile
form = await request.form()
chunk = form.get("chunk")
filename = form.get("filename", "model.gguf")
chunk_index = int(form.get("chunk_index", 0))
total_chunks = int(form.get("total_chunks", 1))
if not chunk or not hasattr(chunk, "read"):
raise HTTPException(status_code=400, detail="No file chunk provided")
cache_dir = get_model_cache_dir()
temp_dir = tempfile.gettempdir()
upload_id = form.get("upload_id", filename)
temp_path = os.path.join(temp_dir, f"upload_{upload_id}.part")
# Append chunk
chunk_data = await chunk.read()
with open(temp_path, "ab") as f:
f.write(chunk_data)
# If last chunk, move to final location
if chunk_index == total_chunks - 1:
final_path = os.path.join(cache_dir, filename)
os.replace(temp_path, final_path)
return {"success": True, "complete": True, "path": final_path}
return {"success": True, "complete": False, "chunk_index": chunk_index}
# ── cache scan helpers (run in thread pool) ──────────────────────────────────
def _scan_caches() -> dict:
import os
result: dict = {"hf": [], "gguf": []}
from codai.models.cache import get_all_cache_dirs, get_model_cache_dir
caches = get_all_cache_dirs()
# Collect configured models: key (path/id) → (settings_dict, model_type)
configured_settings: dict = {}
if config_manager:
md = config_manager.models_data
for cat in ("text_models", "image_models", "audio_models",
"gguf_models", "tts_models", "vision_models"):
for m in md.get(cat, []):
if isinstance(m, str):
p = m
configured_settings[p] = ({}, cat)
else:
p = m.get("path") or m.get("id") or ""
if p:
configured_settings[p] = (m, cat)
# HuggingFace cache
hf_dir = caches.get("huggingface")
if hf_dir:
try:
from huggingface_hub import scan_cache_dir
info = scan_cache_dir(hf_dir)
for repo in sorted(info.repos, key=lambda r: r.repo_id):
revs = sorted(repo.revisions, key=lambda r: r.commit_hash)
size_bytes = sum(r.size_on_disk for r in repo.revisions)
files = sorted(f.file_name for f in revs[-1].files) if revs else []
# If ALL model files are .gguf, treat as GGUF entries not HF entries
model_files = [f for f in files if not f.endswith(('.json', '.txt', '.md', '.py', '.gitattributes'))]
if model_files and all(f.endswith('.gguf') for f in model_files):
for rev in revs[-1:]:
for hf_file in rev.files:
if not hf_file.file_name.endswith('.gguf'):
continue
fpath = str(hf_file.file_path)
fname = hf_file.file_name
fsize = hf_file.size_on_disk
cfg = (configured_settings.get(fpath)
or configured_settings.get(fname)
or ({}, None))
result["gguf"].append({
"filename": fname,
"path": fpath,
"size_gb": round(fsize / 1e9, 2),
"size_bytes": fsize,
"in_config": fpath in configured_settings or fname in configured_settings,
"model_type": cfg[1] if cfg[1] and cfg[1] != "gguf_models" else "text_models",
"settings": cfg[0] if isinstance(cfg[0], dict) else {},
})
continue # skip adding to hf list
cfg = configured_settings.get(repo.repo_id, ({}, None))
result["hf"].append({
"id": repo.repo_id,
"size_gb": round(size_bytes / 1e9, 2),
"size_bytes": size_bytes,
"revision_count": len(list(repo.revisions)),
"files": files[:30],
"file_count": len(files),
"in_config": repo.repo_id in configured_settings,
"model_type": cfg[1] if cfg[1] and cfg[1] != "gguf_models" else "text_models",
"settings": cfg[0] if isinstance(cfg[0], dict) else {},
})
except Exception as e:
result["hf_error"] = str(e)
# GGUF cache (coderai-specific)
gguf_dir = caches.get("coderai") or get_model_cache_dir()
if gguf_dir and os.path.exists(gguf_dir):
for fname in sorted(os.listdir(gguf_dir)):
fpath = os.path.join(gguf_dir, fname)
if os.path.isfile(fpath):
size = os.path.getsize(fpath)
cfg = (configured_settings.get(fpath)
or configured_settings.get(fname)
or ({}, None))
result["gguf"].append({
"filename": fname,
"path": fpath,
"size_gb": round(size / 1e9, 2),
"size_bytes": size,
"in_config": fpath in configured_settings or fname in configured_settings,
"model_type": cfg[1] if cfg[1] and cfg[1] != "gguf_models" else "text_models",
"settings": cfg[0] if isinstance(cfg[0], dict) else {},
})
# Add configured GGUF models not yet in the list (e.g., HF repo IDs or external paths)
existing_paths = {m["path"] for m in result["gguf"]}
for path, (settings, mtype) in configured_settings.items():
if path in existing_paths:
continue
# Check if it's a GGUF model (ends with .gguf or is in a GGUF repo)
is_gguf = path.endswith('.gguf') or 'gguf' in path.lower() or mtype == "gguf_models"
if is_gguf:
# Try to get size if it's a local file
size_bytes = 0
if os.path.isfile(path):
size_bytes = os.path.getsize(path)
result["gguf"].append({
"filename": os.path.basename(path) if '/' in path else path,
"path": path,
"size_gb": round(size_bytes / 1e9, 2) if size_bytes else 0,
"size_bytes": size_bytes,
"in_config": True,
"model_type": mtype if mtype and mtype != "gguf_models" else "text_models",
"settings": settings if isinstance(settings, dict) else {},
})
return result
def _get_cache_stats() -> dict:
import os
stats = {"hf_bytes": 0, "hf_models": 0, "gguf_bytes": 0, "gguf_files": 0,
"hf_disk_free_bytes": None, "hf_disk_total_bytes": None,
"gguf_disk_free_bytes": None, "gguf_disk_total_bytes": None}
from codai.models.cache import get_all_cache_dirs, get_model_cache_dir
caches = get_all_cache_dirs()
hf_dir = caches.get("huggingface")
if hf_dir:
try:
from huggingface_hub import scan_cache_dir
info = scan_cache_dir(hf_dir)
# Only count non-GGUF repos
for repo in info.repos:
revs = list(repo.revisions)
if not revs:
continue
files = [f.file_name for f in revs[-1].files]
model_files = [f for f in files if not f.endswith(('.json', '.txt', '.md', '.py', '.gitattributes'))]
# Skip if all model files are GGUF
if model_files and all(f.endswith('.gguf') for f in model_files):
continue
stats["hf_bytes"] += sum(r.size_on_disk for r in repo.revisions)
stats["hf_models"] += 1
except Exception:
pass
# HF disk space
try:
sv = os.statvfs(hf_dir)
stats["hf_disk_free_bytes"] = sv.f_bavail * sv.f_frsize
stats["hf_disk_total_bytes"] = sv.f_blocks * sv.f_frsize
except Exception:
pass
gguf_dir = caches.get("coderai") or get_model_cache_dir()
if gguf_dir and os.path.exists(gguf_dir):
files = [f for f in os.listdir(gguf_dir)
if os.path.isfile(os.path.join(gguf_dir, f))]
stats["gguf_files"] = len(files)
stats["gguf_bytes"] = sum(os.path.getsize(os.path.join(gguf_dir, f)) for f in files)
# GGUF disk space
try:
sv = os.statvfs(gguf_dir)
stats["gguf_disk_free_bytes"] = sv.f_bavail * sv.f_frsize
stats["gguf_disk_total_bytes"] = sv.f_blocks * sv.f_frsize
except Exception:
pass
# Also count GGUF files in HF cache
if hf_dir:
try:
from huggingface_hub import scan_cache_dir
info = scan_cache_dir(hf_dir)
for repo in info.repos:
revs = list(repo.revisions)
if not revs:
continue
files = [f.file_name for f in revs[-1].files]
model_files = [f for f in files if not f.endswith(('.json', '.txt', '.md', '.py', '.gitattributes'))]
# If all model files are GGUF, count them in gguf_bytes
if model_files and all(f.endswith('.gguf') for f in model_files):
for rev in repo.revisions:
for hf_file in rev.files:
if hf_file.file_name.endswith('.gguf'):
stats["gguf_bytes"] += hf_file.size_on_disk
stats["gguf_files"] += 1
except Exception:
pass
return stats
def _do_clear_cache(cache_type: str) -> dict:
import os, shutil
from codai.models.cache import get_all_cache_dirs, get_model_cache_dir
caches = get_all_cache_dirs()
freed = 0
if cache_type in ("all", "hf"):
hf_dir = caches.get("huggingface")
if hf_dir and os.path.exists(hf_dir):
try:
from huggingface_hub import scan_cache_dir
info = scan_cache_dir(hf_dir)
hashes = [r.commit_hash for repo in info.repos for r in repo.revisions]
if hashes:
strategy = info.delete_revisions(*hashes)
freed += strategy.expected_freed_size
strategy.execute()
except Exception:
for item in os.listdir(hf_dir):
p = os.path.join(hf_dir, item)
try:
if os.path.isdir(p):
shutil.rmtree(p)
else:
freed += os.path.getsize(p)
os.remove(p)
except Exception:
pass
if cache_type in ("all", "gguf"):
gguf_dir = caches.get("coderai") or get_model_cache_dir()
if gguf_dir and os.path.exists(gguf_dir):
for f in os.listdir(gguf_dir):
fp = os.path.join(gguf_dir, f)
if os.path.isfile(fp):
try:
freed += os.path.getsize(fp)
os.remove(fp)
except Exception:
pass
return {"success": True, "freed_bytes": freed}
def _do_delete_model(model_id: str, cache_type: str) -> dict:
import os, shutil
from codai.models.cache import get_all_cache_dirs, get_model_cache_dir
caches = get_all_cache_dirs()
if cache_type == "hf":
hf_dir = caches.get("huggingface")
if hf_dir:
try:
from huggingface_hub import scan_cache_dir
info = scan_cache_dir(hf_dir)
repo = next((r for r in info.repos if r.repo_id == model_id), None)
if repo:
hashes = [r.commit_hash for r in repo.revisions]
info.delete_revisions(*hashes).execute()
return {"success": True}
except Exception:
pass
# Fallback: remove directory directly
safe = model_id.replace("/", "--")
d = os.path.join(hf_dir, f"models--{safe}")
if os.path.exists(d):
shutil.rmtree(d, ignore_errors=True)
return {"success": True}
return {"success": False, "detail": "Model not found in HF cache"}
if cache_type == "gguf":
gguf_dir = get_model_cache_dir()
fp = os.path.join(gguf_dir, model_id)
if os.path.isfile(fp):
os.remove(fp)
return {"success": True}
return {"success": False, "detail": "File not found"}
return {"success": False, "detail": "Unknown cache_type"}
@router.get("/admin/api/cached-models")
async def api_cached_models(username: str = Depends(require_admin)):
"""Scan both caches and return all locally stored models."""
import asyncio
return await asyncio.to_thread(_scan_caches)
@router.get("/admin/api/cache-stats")
async def api_cache_stats(username: str = Depends(require_admin)):
"""Return disk-usage statistics for each cache."""
import asyncio
return await asyncio.to_thread(_get_cache_stats)
@router.delete("/admin/api/cache")
async def api_clear_cache(cache_type: str = "all", username: str = Depends(require_admin)):
"""Bulk-delete cache. cache_type: all | hf | gguf"""
import asyncio
return await asyncio.to_thread(_do_clear_cache, cache_type)
@router.delete("/admin/api/cached-models/{model_id:path}")
async def api_delete_cached_model(
model_id: str,
cache_type: str = "hf",
username: str = Depends(require_admin),
):
"""Delete a specific cached model (HF repo ID or GGUF filename)."""
import asyncio
return await asyncio.to_thread(_do_delete_model, model_id, cache_type)
@router.post("/admin/api/model-enable")
async def api_model_enable(request: Request, username: str = Depends(require_admin)):
"""Register a cached model in models.json so CoderAI can use it."""
if config_manager is None:
raise HTTPException(status_code=503, detail="Config manager not initialized")
data = await request.json()
path = data.get("path") or data.get("model_id", "")
model_type = data.get("model_type", "text_models")
valid = {"text_models", "image_models", "audio_models", "gguf_models", "tts_models", "vision_models"}
if model_type not in valid:
raise HTTPException(status_code=400, detail=f"model_type must be one of {valid}")
lst = config_manager.models_data.setdefault(model_type, [])
if path not in lst:
lst.append(path)
config_manager.save_models()
return {"success": True}
@router.post("/admin/api/model-disable")
async def api_model_disable(request: Request, username: str = Depends(require_admin)):
"""Remove a model from models.json (keeps it cached locally)."""
if config_manager is None:
raise HTTPException(status_code=503, detail="Config manager not initialized")
data = await request.json()
path = data.get("path") or data.get("model_id", "")
changed = False
for cat in ("text_models", "image_models", "audio_models",
"gguf_models", "tts_models", "vision_models"):
lst = config_manager.models_data.get(cat, [])
new_lst = [m for m in lst
if (m if isinstance(m, str) else m.get("path", m.get("id", ""))) != path]
if len(new_lst) != len(lst):
config_manager.models_data[cat] = new_lst
changed = True
if changed:
config_manager.save_models()
return {"success": True}
@router.get("/admin/api/model-loaded-status")
async def api_model_loaded_status(username: str = Depends(require_admin)):
"""Return the set of model keys currently loaded in VRAM."""
from codai.models.manager import multi_model_manager
return {"loaded": list(multi_model_manager.models.keys())}
@router.post("/admin/api/model-load")
async def api_model_load(request: Request, username: str = Depends(require_admin)):
"""Load a configured model into VRAM (same VRAM checks as a real request)."""
from codai.models.manager import multi_model_manager
data = await request.json()
path = data.get("path", "")
if not path:
raise HTTPException(status_code=400, detail="path required")
# Find the model config entry to determine its type
model_type = "text"
if config_manager:
md = config_manager.models_data
for cat, mtype in (("image_models", "image"), ("audio_models", "audio"),
("vision_models", "vision"), ("tts_models", "tts")):
for m in md.get(cat, []):
mid = m if isinstance(m, str) else m.get("path") or m.get("id") or ""
if mid == path:
model_type = mtype
break
result = multi_model_manager.request_model(path, model_type if model_type != "text" else None)
if result.get("already_loaded"):
return {"success": True, "already_loaded": True}
# Not loaded yet — trigger actual load
try:
if model_type == "text":
mm = multi_model_manager._load_model_by_name(result["model_name"] or path)
if mm is None:
raise RuntimeError("Model failed to load")
multi_model_manager.models[result["model_key"] or path] = mm
multi_model_manager.active_in_vram = result["model_key"] or path
elif model_type == "image":
from codai.api.images import _load_diffusers_pipeline, _is_gguf_model, _load_sdcpp_model
from codai.api.state import get_global_args
global_args = get_global_args()
model_key = f"image:{path}"
if _is_gguf_model(path):
resolved = multi_model_manager.load_model(path)
import os as _os
if resolved and _os.path.isfile(resolved):
sd_model = _load_sdcpp_model(resolved, global_args)
if sd_model:
multi_model_manager.add_model(model_key, sd_model)
else:
pipeline = _load_diffusers_pipeline(path, global_args)
if pipeline:
multi_model_manager.add_model(model_key, pipeline)
return {"success": True, "already_loaded": False}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@router.post("/admin/api/model-unload")
async def api_model_unload(request: Request, username: str = Depends(require_admin)):
"""Unload a model from VRAM (keeps it available for on-request reload)."""
import gc
from codai.models.manager import multi_model_manager
data = await request.json()
path = data.get("path", "")
if not path:
raise HTTPException(status_code=400, detail="path required")
# Find the key in loaded models (exact or prefixed)
key = None
for k in list(multi_model_manager.models.keys()):
if k == path or k.endswith(f":{path}") or k.endswith(path.split("/")[-1]):
key = k
break
if key is None:
return {"success": True, "was_loaded": False}
model_obj = multi_model_manager.models.pop(key, None)
if model_obj is not None:
try:
if hasattr(model_obj, "cleanup"):
model_obj.cleanup()
elif hasattr(model_obj, "to"):
model_obj.to("cpu")
except Exception:
pass
if multi_model_manager.active_in_vram == key:
multi_model_manager.active_in_vram = None
if multi_model_manager.current_model_key == key:
multi_model_manager.current_model_key = None
gc.collect()
try:
import torch
if torch.cuda.is_available():
torch.cuda.empty_cache()
except Exception:
pass
return {"success": True, "was_loaded": True}
@router.post("/admin/api/model-configure")
async def api_model_configure(request: Request, username: str = Depends(require_admin)):
"""Save per-model configuration and register/update in models.json."""
if config_manager is None:
raise HTTPException(status_code=503, detail="Config manager not initialized")
data = await request.json()
path = data.get("path") or data.get("model_id", "")
model_type = data.get("model_type", "text_models")
# Treat legacy gguf_models as text_models (GGUF is a format, not a type)
if model_type == "gguf_models":
model_type = "text_models"
valid = {"text_models", "image_models", "audio_models", "tts_models", "vision_models"}
if not path:
raise HTTPException(status_code=400, detail="path is required")
if model_type not in valid:
raise HTTPException(status_code=400, detail=f"model_type must be one of {valid}")
# Remove from all categories (handles type changes)
for cat in valid | {"gguf_models"}:
lst = config_manager.models_data.get(cat, [])
config_manager.models_data[cat] = [
m for m in lst
if (m if isinstance(m, str) else m.get("path", m.get("id", ""))) != path
]
# Auto-estimate used_vram_gb from file size if not provided
used_vram_gb = data.get("used_vram_gb")
if used_vram_gb is None:
import os
if os.path.isfile(path):
size_bytes = os.path.getsize(path)
# GGUF: ~1.1x file size; HF safetensors: ~1.2x
multiplier = 1.1 if path.endswith(".gguf") else 1.2
used_vram_gb = round(size_bytes / 1e9 * multiplier, 2)
# Build settings entry (drop None-valued optional keys to keep JSON tidy)
entry: dict = {"path": path, "model_type": model_type}
if used_vram_gb is not None:
entry["used_vram_gb"] = used_vram_gb
for key in ("alias", "backend", "load_mode", "n_gpu_layers", "n_ctx",
"max_gpu_percent", "manual_ram_gb", "load_in_4bit", "load_in_8bit",
"flash_attention", "no_ram", "offload_strategy", "offload_dir",
"system_prompt", "parser", "tools_closer_prompt", "grammar_guided"):
if key in data:
entry[key] = data[key]
config_manager.models_data.setdefault(model_type, []).append(entry)
config_manager.save_models()
return {"success": True}
# --- System endpoints ---
@router.post("/admin/api/system/reload")
......@@ -442,3 +1222,354 @@ async def api_reload_config(username: str = Depends(require_admin)):
from datetime import datetime
# --- Settings page ---
@router.get("/admin/settings", response_class=HTMLResponse)
async def settings_page(request: Request, username: str = Depends(require_admin)):
return templates.TemplateResponse(request, "settings.html", {"username": username, "is_admin": True})
@router.get("/admin/api/settings")
async def api_get_settings(username: str = Depends(require_admin)):
"""Return current config.json as JSON."""
if config_manager is None or config_manager.config is None:
raise HTTPException(status_code=503, detail="Config manager not initialized")
c = config_manager.config
return {
"server": {
"host": c.server.host,
"port": c.server.port,
"https": c.server.https,
"https_key_path": c.server.https_key_path,
"https_cert_path": c.server.https_cert_path,
},
"backend": {
"type": c.backend.type,
"image_backend": c.backend.image_backend,
"audio_backend": c.backend.audio_backend,
"tts_backend": c.backend.tts_backend,
},
"models": {
"default_load_mode": c.models.default_load_mode,
"hf_cache_dir": c.models.hf_cache_dir,
"gguf_cache_dir": c.models.gguf_cache_dir,
},
"offload": {
"directory": c.offload.directory,
"strategy": c.offload.strategy,
"max_gpu_percent": c.offload.max_gpu_percent,
"no_ram": c.offload.no_ram,
"load_in_4bit": c.offload.load_in_4bit,
"load_in_8bit": c.offload.load_in_8bit,
"manual_ram_gb": c.offload.manual_ram_gb,
"flash_attention": c.offload.flash_attention,
},
"vulkan": {
"n_gpu_layers": c.vulkan.n_gpu_layers,
"n_ctx": c.vulkan.n_ctx,
"device_id": c.vulkan.device_id,
"single_gpu": c.vulkan.single_gpu,
},
"whisper": {
"server_path": c.whisper.server_path,
"server_port": c.whisper.server_port,
},
"system_prompt": c.system_prompt,
"tools_closer_prompt": c.tools_closer_prompt,
"grammar_guided": c.grammar_guided,
"parser": c.parser,
}
@router.post("/admin/api/settings")
async def api_save_settings(request: Request, username: str = Depends(require_admin)):
"""Update and persist config.json from submitted JSON. Only sections present in the payload are updated."""
if config_manager is None or config_manager.config is None:
raise HTTPException(status_code=503, detail="Config manager not initialized")
data = await request.json()
c = config_manager.config
if "server" in data:
srv = data["server"]
c.server.host = srv.get("host", c.server.host)
c.server.port = int(srv.get("port", c.server.port))
c.server.https = bool(srv.get("https", c.server.https))
c.server.https_key_path = srv.get("https_key_path") or None
c.server.https_cert_path = srv.get("https_cert_path") or None
if "backend" in data:
bk = data["backend"]
c.backend.type = bk.get("type", c.backend.type)
c.backend.image_backend = bk.get("image_backend", c.backend.image_backend)
c.backend.audio_backend = bk.get("audio_backend", c.backend.audio_backend)
c.backend.tts_backend = bk.get("tts_backend", c.backend.tts_backend)
if "models" in data:
mdl = data["models"]
c.models.default_load_mode = mdl.get("default_load_mode", c.models.default_load_mode)
if "hf_cache_dir" in mdl:
c.models.hf_cache_dir = mdl["hf_cache_dir"] or None
if "gguf_cache_dir" in mdl:
c.models.gguf_cache_dir = mdl["gguf_cache_dir"] or None
if "offload" in data:
off = data["offload"]
c.offload.directory = off.get("directory", c.offload.directory)
c.offload.strategy = off.get("strategy", c.offload.strategy)
if "max_gpu_percent" in off:
c.offload.max_gpu_percent = off["max_gpu_percent"] or None
c.offload.no_ram = bool(off.get("no_ram", c.offload.no_ram))
c.offload.load_in_4bit = bool(off.get("load_in_4bit", c.offload.load_in_4bit))
c.offload.load_in_8bit = bool(off.get("load_in_8bit", c.offload.load_in_8bit))
if "manual_ram_gb" in off:
c.offload.manual_ram_gb = off["manual_ram_gb"] or None
c.offload.flash_attention = bool(off.get("flash_attention", c.offload.flash_attention))
if "vulkan" in data:
vk = data["vulkan"]
c.vulkan.n_gpu_layers = int(vk.get("n_gpu_layers", c.vulkan.n_gpu_layers))
c.vulkan.n_ctx = int(vk.get("n_ctx", c.vulkan.n_ctx))
c.vulkan.device_id = int(vk.get("device_id", c.vulkan.device_id))
c.vulkan.single_gpu = bool(vk.get("single_gpu", c.vulkan.single_gpu))
if "whisper" in data:
wh = data["whisper"]
c.whisper.server_path = wh.get("server_path") or None
c.whisper.server_port = int(wh.get("server_port", c.whisper.server_port))
if "system_prompt" in data:
c.system_prompt = data["system_prompt"] or None
if "tools_closer_prompt" in data:
c.tools_closer_prompt = bool(data["tools_closer_prompt"])
if "grammar_guided" in data:
c.grammar_guided = bool(data["grammar_guided"])
if "parser" in data:
c.parser = data["parser"]
config_manager.save_config()
return {"success": True}
# --- HuggingFace model search proxy ---
import re as _re
_QUANT_RE = _re.compile(
r'\b(IQ[1-4]_XX[SML]?|Q[2-8]_K_[MSLX]|Q[2-8]_K|Q[2-8]_[0-9]|F16|F32|BF16)\b',
_re.IGNORECASE,
)
def _hf_file_size(sibling: dict) -> int:
"""Return actual byte size from an HF siblings entry (prefers LFS size)."""
lfs = sibling.get("lfs") or {}
return lfs.get("size") or sibling.get("size") or 0
@router.get("/admin/api/hf-search")
async def api_hf_search(
q: str = "",
gguf_mode: str = "gguf", # "gguf" | "all" | "no-gguf"
pipeline_tag: str = "",
sort: str = "downloads",
sizes: str = "", # comma-separated e.g. "7b,70b"
arch: str = "",
username: str = Depends(require_admin),
):
"""Proxy HuggingFace model search; supports multiple sizes via parallel requests."""
import asyncio
import urllib.request
import urllib.parse
import json as _json
if sort not in ("downloads", "likes", "lastModified", "createdAt"):
sort = "downloads"
# Filter tags shared across all requests
filter_pairs: list = []
if gguf_mode == "gguf":
filter_pairs.append(("filter", "gguf"))
if pipeline_tag:
filter_pairs.append(("filter", pipeline_tag))
if arch == "lora":
filter_pairs.append(("filter", "lora"))
# Base search keywords
base_parts = [q.strip()] if q.strip() else []
if arch == "moe":
base_parts.append("moe")
size_list = [s.strip() for s in sizes.split(",") if s.strip()][:6]
async def _one(extra_kw: str = "") -> list:
parts = base_parts + ([extra_kw] if extra_kw else [])
effective_q = " ".join(parts)
limit = "12" if size_list else "20"
pairs = []
if effective_q:
pairs.append(("search", effective_q))
pairs.extend(filter_pairs)
pairs += [("sort", sort), ("direction", "-1"), ("limit", limit), ("full", "false")]
url = "https://huggingface.co/api/models?" + urllib.parse.urlencode(pairs)
rq = urllib.request.Request(url, headers={"User-Agent": "coderai-admin/1.0"})
def _fetch():
with urllib.request.urlopen(rq, timeout=15) as resp:
return _json.loads(resp.read())
return await asyncio.to_thread(_fetch)
try:
if size_list:
batches = await asyncio.gather(*[_one(sz) for sz in size_list], return_exceptions=True)
else:
batches = [await _one()]
seen: set = set()
merged: list = []
for batch in batches:
if isinstance(batch, Exception):
continue
for m in batch:
mid = m.get("modelId") or m.get("id", "")
if mid and mid not in seen:
seen.add(mid)
merged.append(m)
if sort == "downloads":
merged.sort(key=lambda m: m.get("downloads", 0), reverse=True)
elif sort == "likes":
merged.sort(key=lambda m: m.get("likes", 0), reverse=True)
if gguf_mode == "no-gguf":
merged = [m for m in merged if "gguf" not in (m.get("modelId") or m.get("id", "")).lower()]
return [
{
"id": m.get("modelId") or m.get("id", ""),
"downloads": m.get("downloads", 0),
"likes": m.get("likes", 0),
"pipeline_tag": m.get("pipeline_tag", ""),
}
for m in merged[:20]
]
except Exception as e:
raise HTTPException(status_code=502, detail=f"HuggingFace API error: {e}")
@router.get("/admin/api/hf-model-files")
async def api_hf_model_files(model_id: str, username: str = Depends(require_admin)):
"""Return GGUF files (name, size, VRAM estimate, quant type) for an HF model repo."""
import urllib.request
import urllib.parse
import json as _json
safe_id = urllib.parse.quote(model_id, safe="/")
url = f"https://huggingface.co/api/models/{safe_id}"
try:
req = urllib.request.Request(url, headers={"User-Agent": "coderai-admin/1.0"})
with urllib.request.urlopen(req, timeout=15) as resp:
data = _json.loads(resp.read())
except Exception as e:
raise HTTPException(status_code=502, detail=f"HuggingFace API error: {e}")
files = []
for sib in data.get("siblings", []):
name = sib.get("rfilename", "")
if not name.lower().endswith(".gguf"):
continue
size_bytes = _hf_file_size(sib)
size_gb = round(size_bytes / 1024 ** 3, 2) if size_bytes else None
vram_gb = round(size_gb * 1.1, 1) if size_gb else None
m = _QUANT_RE.search(name)
quant = m.group(1).upper() if m else None
files.append({
"name": name,
"size_gb": size_gb,
"vram_gb": vram_gb,
"quant": quant,
})
files.sort(key=lambda f: f.get("size_gb") or 0)
return files
@router.get("/admin/api/hf-model-info")
async def api_hf_model_info(model_id: str, username: str = Depends(require_admin)):
"""Full metadata for a single HuggingFace model repo."""
import urllib.request
import urllib.parse
import json as _json
safe_id = urllib.parse.quote(model_id, safe="/")
url = f"https://huggingface.co/api/models/{safe_id}"
try:
req = urllib.request.Request(url, headers={"User-Agent": "coderai-admin/1.0"})
with urllib.request.urlopen(req, timeout=15) as resp:
data = _json.loads(resp.read())
except Exception as e:
raise HTTPException(status_code=502, detail=f"HuggingFace API error: {e}")
card = data.get("cardData") or {}
# Parameter count from safetensors metadata
params_label = None
sf = data.get("safetensors") or {}
total = sf.get("total")
if total:
if total >= 1e12:
params_label = f"{total/1e12:.1f}T"
elif total >= 1e9:
params_label = f"{total/1e9:.1f}B"
elif total >= 1e6:
params_label = f"{total/1e6:.0f}M"
else:
params_label = str(total)
# GGUF files with quant/size info
gguf_files = []
for sib in data.get("siblings", []):
name = sib.get("rfilename", "")
if not name.lower().endswith(".gguf"):
continue
size_bytes = _hf_file_size(sib)
size_gb = round(size_bytes / 1024 ** 3, 2) if size_bytes else None
vram_gb = round(size_gb * 1.1, 1) if size_gb else None
m = _QUANT_RE.search(name)
gguf_files.append({
"name": name,
"size_gb": size_gb,
"vram_gb": vram_gb,
"quant": m.group(1).upper() if m else None,
})
gguf_files.sort(key=lambda f: f.get("size_gb") or 0)
# All repo files (for total count)
all_files = [sib.get("rfilename", "") for sib in data.get("siblings", [])]
# Relevant tags (strip common noisy ones)
_noise = {"transformers", "safetensors", "gguf", "endpoints_compatible",
"has_space", "region:us", "license:other"}
tags = [t for t in data.get("tags", []) if t not in _noise]
base_model = card.get("base_model") or ""
if isinstance(base_model, list):
base_model = ", ".join(base_model)
return {
"id": data.get("modelId") or data.get("id", ""),
"author": data.get("author", ""),
"pipeline_tag": data.get("pipeline_tag", ""),
"downloads": data.get("downloads", 0),
"likes": data.get("likes", 0),
"last_modified": data.get("lastModified", ""),
"private": data.get("private", False),
"gated": data.get("gated", False),
"tags": tags,
"license": card.get("license", ""),
"language": card.get("language") or [],
"base_model": base_model,
"params_label": params_label,
"gguf_files": gguf_files,
"file_count": len(all_files),
}
/* CoderAI Admin Dashboard - Dark Theme */
@import url('https://fonts.googleapis.com/css2?family=Plus+Jakarta+Sans:wght@400;500;600;700&family=JetBrains+Mono:wght@400;500&display=swap');
:root {
--bg-primary: #0d1117;
--bg-secondary: #161b22;
--bg-tertiary: #21262d;
--border-color: #30363d;
--text-primary: #c9d1d9;
--text-secondary: #8b949e;
--text-muted: #6e7681;
--accent-blue: #58a6ff;
--accent-green: #3fb950;
--accent-red: #f85149;
--accent-yellow: #d29922;
--accent-purple: #bc8cff;
}
* {
margin: 0;
padding: 0;
box-sizing: border-box;
}
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Noto Sans', Helvetica, Arial, sans-serif;
background-color: var(--bg-primary);
color: var(--text-primary);
line-height: 1.6;
}
/* Layout */
.layout {
display: flex;
min-height: 100vh;
}
.sidebar {
width: 260px;
background-color: var(--bg-secondary);
border-right: 1px solid var(--border-color);
display: flex;
flex-direction: column;
position: fixed;
height: 100vh;
overflow-y: auto;
}
.main-content {
flex: 1;
margin-left: 260px;
padding: 2rem;
max-width: 100%;
}
.content-wrapper {
max-width: 1400px;
margin: 0 auto;
}
/* Logo */
.logo {
padding: 1.5rem;
border-bottom: 1px solid var(--border-color);
}
.logo h1 {
font-size: 1.5rem;
color: var(--accent-blue);
font-weight: 600;
}
/* Navigation */
.nav {
flex: 1;
padding: 1rem 0;
}
.nav-item {
display: flex;
align-items: center;
padding: 0.75rem 1.5rem;
color: var(--text-secondary);
text-decoration: none;
transition: all 0.2s;
border-left: 3px solid transparent;
}
.nav-item:hover {
background-color: var(--bg-tertiary);
color: var(--text-primary);
}
.nav-item.active {
background-color: var(--bg-tertiary);
color: var(--accent-blue);
border-left-color: var(--accent-blue);
}
.nav-item .icon {
margin-right: 0.75rem;
font-size: 1.2rem;
}
/* Sidebar Footer */
.sidebar-footer {
padding: 1rem 1.5rem;
border-top: 1px solid var(--border-color);
}
.user-info {
display: flex;
align-items: center;
margin-bottom: 0.75rem;
color: var(--text-secondary);
font-size: 0.9rem;
}
.user-info .icon {
margin-right: 0.5rem;
}
.logout-btn {
display: block;
width: 100%;
padding: 0.5rem;
background-color: var(--bg-tertiary);
color: var(--text-primary);
text-align: center;
text-decoration: none;
border-radius: 6px;
border: 1px solid var(--border-color);
transition: all 0.2s;
}
.logout-btn:hover {
background-color: var(--accent-red);
border-color: var(--accent-red);
}
/* Page Header */
.page-header {
display: flex;
justify-content: space-between;
align-items: center;
margin-bottom: 2rem;
}
.page-header h1 {
font-size: 2rem;
font-weight: 600;
}
.header-actions {
display: flex;
gap: 0.75rem;
}
/* Cards */
.card {
background-color: var(--bg-secondary);
border: 1px solid var(--border-color);
border-radius: 8px;
padding: 1.5rem;
margin-bottom: 1.5rem;
}
.card h3 {
font-size: 1.25rem;
margin-bottom: 1rem;
color: var(--text-primary);
}
/* Dashboard Grid */
.dashboard-grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
gap: 1.5rem;
margin-bottom: 2rem;
}
/* Status Grid */
.status-grid {
display: grid;
gap: 1rem;
}
.status-item {
display: flex;
justify-content: space-between;
padding: 0.5rem 0;
}
.status-item .label {
color: var(--text-secondary);
}
.status-item .value {
font-weight: 600;
}
.status-ok {
color: var(--accent-green);
}
/* Stats Grid */
.stats-grid {
display: grid;
grid-template-columns: repeat(3, 1fr);
gap: 1rem;
}
.stat-item {
text-align: center;
}
.stat-value {
font-size: 2rem;
font-weight: 700;
color: var(--accent-blue);
}
.stat-label {
font-size: 0.875rem;
color: var(--text-secondary);
margin-top: 0.25rem;
}
/* Progress Bar */
.progress-bar {
width: 100%;
height: 24px;
background-color: var(--bg-tertiary);
border-radius: 12px;
overflow: hidden;
margin: 1rem 0;
}
.progress-fill {
height: 100%;
background: linear-gradient(90deg, var(--accent-blue), var(--accent-purple));
transition: width 0.3s ease;
}
/* Buttons */
.btn {
padding: 0.5rem 1rem;
border: none;
border-radius: 6px;
font-size: 0.875rem;
font-weight: 500;
cursor: pointer;
transition: all 0.2s;
text-decoration: none;
display: inline-block;
}
.btn-primary {
background-color: var(--accent-blue);
color: #fff;
}
.btn-primary:hover {
background-color: #4a8fd8;
}
.btn-secondary {
background-color: var(--bg-tertiary);
color: var(--text-primary);
border: 1px solid var(--border-color);
}
.btn-secondary:hover {
background-color: var(--border-color);
}
.btn-danger {
background-color: var(--accent-red);
color: #fff;
}
.btn-danger:hover {
background-color: #d63939;
}
.btn-sm {
padding: 0.375rem 0.75rem;
font-size: 0.8125rem;
}
.btn-block {
width: 100%;
display: block;
}
/* Forms */
.form {
max-width: 600px;
}
.form-group {
margin-bottom: 1.5rem;
}
.form-group label {
display: block;
margin-bottom: 0.5rem;
color: var(--text-primary);
font-weight: 500;
}
.form-control {
width: 100%;
padding: 0.625rem;
background-color: var(--bg-tertiary);
border: 1px solid var(--border-color);
border-radius: 6px;
color: var(--text-primary);
font-size: 0.875rem;
}
.form-control:focus {
outline: none;
border-color: var(--accent-blue);
}
.form-text {
display: block;
margin-top: 0.25rem;
font-size: 0.8125rem;
color: var(--text-secondary);
}
.form-actions {
display: flex;
gap: 0.75rem;
margin-top: 1.5rem;
}
/* Tables */
.table-responsive {
overflow-x: auto;
}
.table {
width: 100%;
border-collapse: collapse;
}
.table th,
.table td {
padding: 0.75rem;
text-align: left;
border-bottom: 1px solid var(--border-color);
}
.table th {
color: var(--text-secondary);
font-weight: 600;
font-size: 0.875rem;
text-transform: uppercase;
}
.table tbody tr:hover {
background-color: var(--bg-tertiary);
}
/* Badges */
.badge {
display: inline-block;
padding: 0.25rem 0.5rem;
font-size: 0.75rem;
font-weight: 600;
border-radius: 4px;
}
.badge-primary {
background-color: var(--accent-blue);
color: #fff;
}
.badge-secondary {
background-color: var(--bg-tertiary);
color: var(--text-secondary);
border: 1px solid var(--border-color);
}
/* Alerts */
.alert {
padding: 1rem;
border-radius: 6px;
margin-bottom: 1rem;
}
.alert-error {
background-color: rgba(248, 81, 73, 0.1);
border: 1px solid var(--accent-red);
color: var(--accent-red);
}
.alert-warning {
background-color: rgba(210, 153, 34, 0.1);
border: 1px solid var(--accent-yellow);
color: var(--accent-yellow);
}
/* Login Page */
.login-container {
display: flex;
align-items: center;
justify-content: center;
min-height: 100vh;
padding: 2rem;
}
.login-box {
width: 100%;
max-width: 400px;
background-color: var(--bg-secondary);
border: 1px solid var(--border-color);
border-radius: 8px;
padding: 2rem;
}
.login-header {
text-align: center;
margin-bottom: 2rem;
}
.login-header h1 {
font-size: 2rem;
color: var(--accent-blue);
margin-bottom: 0.5rem;
}
.login-header p {
color: var(--text-secondary);
}
.login-form {
margin-bottom: 1.5rem;
}
.login-footer {
text-align: center;
padding-top: 1rem;
border-top: 1px solid var(--border-color);
}
/* Tabs */
.tabs {
display: flex;
gap: 0.5rem;
margin-bottom: 1.5rem;
border-bottom: 1px solid var(--border-color);
}
.tab-btn {
padding: 0.75rem 1.5rem;
background: none;
border: none;
color: var(--text-secondary);
cursor: pointer;
border-bottom: 2px solid transparent;
transition: all 0.2s;
}
.tab-btn:hover {
color: var(--text-primary);
}
.tab-btn.active {
color: var(--accent-blue);
border-bottom-color: var(--accent-blue);
}
.tab-content {
display: none;
}
.tab-content.active {
display: block;
}
/* Modal */
.modal {
display: none;
position: fixed;
top: 0;
left: 0;
width: 100%;
height: 100%;
background-color: rgba(0, 0, 0, 0.7);
z-index: 1000;
align-items: center;
justify-content: center;
}
.modal-content {
background-color: var(--bg-secondary);
border: 1px solid var(--border-color);
border-radius: 8px;
width: 90%;
max-width: 600px;
max-height: 90vh;
overflow-y: auto;
}
.modal-header {
display: flex;
justify-content: space-between;
align-items: center;
padding: 1.5rem;
border-bottom: 1px solid var(--border-color);
}
.modal-header h2 {
font-size: 1.5rem;
}
.modal-close {
background: none;
border: none;
color: var(--text-secondary);
font-size: 1.5rem;
cursor: pointer;
padding: 0;
width: 32px;
height: 32px;
display: flex;
align-items: center;
justify-content: center;
}
.modal-close:hover {
color: var(--text-primary);
}
.modal-body {
padding: 1.5rem;
}
/* Chat Interface */
.chat-container {
display: flex;
flex-direction: column;
height: calc(100vh - 4rem);
background-color: var(--bg-secondary);
border: 1px solid var(--border-color);
border-radius: 8px;
}
.chat-header {
display: flex;
justify-content: space-between;
align-items: center;
padding: 1rem 1.5rem;
border-bottom: 1px solid var(--border-color);
}
.chat-controls {
display: flex;
gap: 0.75rem;
align-items: center;
}
.chat-controls select {
min-width: 200px;
}
.chat-messages {
flex: 1;
overflow-y: auto;
padding: 1.5rem;
}
.welcome-message {
text-align: center;
padding: 3rem 1rem;
color: var(--text-secondary);
}
.message {
display: flex;
gap: 1rem;
margin-bottom: 1.5rem;
}
.message-avatar {
width: 36px;
height: 36px;
border-radius: 50%;
background-color: var(--bg-tertiary);
display: flex;
align-items: center;
justify-content: center;
flex-shrink: 0;
}
.message-content {
flex: 1;
padding: 0.75rem 1rem;
background-color: var(--bg-tertiary);
border-radius: 8px;
line-height: 1.6;
}
.message-user .message-content {
background-color: rgba(88, 166, 255, 0.1);
}
.chat-input-container {
padding: 1rem 1.5rem;
border-top: 1px solid var(--border-color);
}
.chat-input-form {
display: flex;
gap: 0.75rem;
}
.chat-input {
flex: 1;
padding: 0.75rem;
background-color: var(--bg-tertiary);
border: 1px solid var(--border-color);
border-radius: 6px;
color: var(--text-primary);
resize: none;
font-family: inherit;
}
.chat-input:focus {
outline: none;
border-color: var(--accent-blue);
}
/* Utility Classes */
.text-muted {
color: var(--text-muted);
}
.text-center {
text-align: center;
}
.text-warning {
color: var(--accent-yellow);
}
/* Token Display */
.token-display {
display: flex;
gap: 0.75rem;
align-items: center;
padding: 1rem;
background-color: var(--bg-tertiary);
border-radius: 6px;
margin: 1rem 0;
}
.token-display code {
flex: 1;
font-family: 'Courier New', monospace;
font-size: 0.875rem;
word-break: break-all;
}
/* Responsive */
@media (max-width: 768px) {
.sidebar {
width: 100%;
position: relative;
height: auto;
}
.main-content {
margin-left: 0;
}
.dashboard-grid {
grid-template-columns: 1fr;
}
.stats-grid {
grid-template-columns: 1fr;
}
--bg: #08090D;
--nav: #0C0D13;
--card: #111218;
--raised: #161820;
--border: #1A1D28;
--border-2: #252836;
--text: #DDE1F0;
--text-2: #636880;
--text-3: #2E3145;
--accent: #6366F1;
--accent-s: rgba(99,102,241,.12);
--green: #34D399;
--amber: #F59E0B;
--red: #F87171;
--font: 'Plus Jakarta Sans', system-ui, sans-serif;
--mono: 'JetBrains Mono', monospace;
}
*,*::before,*::after{margin:0;padding:0;box-sizing:border-box}
html{scroll-behavior:smooth}
body{font-family:var(--font);font-size:14px;background:var(--bg);color:var(--text);line-height:1.5;-webkit-font-smoothing:antialiased}
a{color:inherit;text-decoration:none}
button,input,select,textarea{font-family:inherit}
/* ── Topnav ──────────────────────────────────────────────────────── */
.topnav{
position:sticky;top:0;z-index:200;
height:44px;
background:var(--nav);
border-bottom:1px solid var(--border);
}
.topnav-inner{
max-width:1200px;margin:0 auto;padding:0 1.5rem;
height:100%;display:flex;align-items:center;justify-content:space-between;gap:1.5rem;
}
.topnav-left{display:flex;align-items:center;gap:1.75rem}
.topnav-right{display:flex;align-items:center;gap:.625rem}
/* logo */
.nav-logo{display:flex;align-items:center;gap:.5rem;flex-shrink:0}
.nav-logo-mark{
width:22px;height:22px;
background:var(--accent);
border-radius:5px;
display:flex;align-items:center;justify-content:center;
font-size:9px;font-weight:700;color:#fff;letter-spacing:-.01em;
}
.nav-logo-name{font-size:13px;font-weight:700;letter-spacing:-.01em}
/* nav links */
.nav-links{display:flex;align-items:center;gap:1px}
.nav-link{
padding:.3125rem .625rem;
font-size:13px;font-weight:500;
color:var(--text-2);
border-radius:5px;
transition:color .1s,background .1s;
white-space:nowrap;
}
.nav-link:hover{color:var(--text);background:rgba(255,255,255,.04)}
.nav-link.active{color:var(--text);background:var(--accent-s)}
/* user + logout */
.nav-username{font-size:12.5px;color:var(--text-2)}
.nav-sep{width:1px;height:14px;background:var(--border-2)}
.nav-logout{
font-size:12.5px;color:var(--text-3);
padding:.25rem .5rem;border:1px solid var(--border);border-radius:4px;
transition:all .1s;cursor:pointer;background:transparent;
}
.nav-logout:hover{color:var(--text-2);border-color:var(--border-2)}
/* ── Main ────────────────────────────────────────────────────────── */
.main{min-height:calc(100vh - 44px)}
.container{max-width:1100px;margin:0 auto;padding:2rem 1.5rem}
/* ── Page header ─────────────────────────────────────────────────── */
.page-header{display:flex;justify-content:space-between;align-items:flex-start;margin-bottom:1.5rem;gap:1rem}
.page-header h1{font-size:1.125rem;font-weight:700;letter-spacing:-.01em}
.page-header p{font-size:12.5px;color:var(--text-2);margin-top:.2rem}
.header-actions{display:flex;gap:.5rem;flex-shrink:0;align-items:center}
/* ── Cards ───────────────────────────────────────────────────────── */
.card{background:var(--card);border:1px solid var(--border);border-radius:8px;padding:1.25rem;margin-bottom:1rem}
.card-title{font-size:11px;font-weight:700;text-transform:uppercase;letter-spacing:.07em;color:var(--text-2);margin-bottom:1rem}
/* ── Stat grid ───────────────────────────────────────────────────── */
.stat-grid{display:grid;grid-template-columns:repeat(auto-fit,minmax(180px,1fr));gap:.75rem;margin-bottom:1rem}
.stat{background:var(--card);border:1px solid var(--border);border-radius:8px;padding:1.125rem}
.stat-label{font-size:11px;font-weight:600;text-transform:uppercase;letter-spacing:.07em;color:var(--text-3);margin-bottom:.5rem}
.stat-value{font-size:1.625rem;font-weight:700;letter-spacing:-.02em;line-height:1}
.stat-sub{font-size:11.5px;color:var(--text-3);margin-top:.375rem;font-family:var(--mono)}
/* live dot */
.live{display:inline-flex;align-items:center;gap:.375rem;font-size:11.5px;font-weight:600;color:var(--green);font-family:var(--mono)}
.live::before{content:'';width:5px;height:5px;border-radius:50%;background:var(--green);box-shadow:0 0 4px var(--green);animation:blink 2s ease infinite}
@keyframes blink{0%,100%{opacity:1}50%{opacity:.25}}
/* progress */
.progress{height:3px;background:var(--raised);border-radius:2px;margin-top:.75rem;overflow:hidden}
.progress-fill{height:100%;background:var(--accent);transition:width .5s}
.progress-labels{display:flex;justify-content:space-between;font-size:11px;color:var(--text-3);margin-top:.3rem;font-family:var(--mono)}
/* ── Buttons ─────────────────────────────────────────────────────── */
.btn{
display:inline-flex;align-items:center;gap:.375rem;
padding:.375rem .875rem;border:none;border-radius:6px;
font-size:13px;font-weight:600;cursor:pointer;
transition:all .1s;white-space:nowrap;line-height:1.4;
}
.btn svg{width:13px;height:13px;stroke-width:2;flex-shrink:0}
.btn-primary{background:var(--accent);color:#fff}
.btn-primary:hover{background:#7577F3}
.btn-secondary{background:var(--raised);color:var(--text);border:1px solid var(--border)}
.btn-secondary:hover{background:var(--border);border-color:var(--border-2)}
.btn-ghost{background:transparent;color:var(--text-2);border:1px solid var(--border)}
.btn-ghost:hover{color:var(--text);border-color:var(--border-2)}
.btn-danger{background:rgba(248,113,113,.08);color:var(--red);border:1px solid rgba(248,113,113,.2)}
.btn-danger:hover{background:rgba(248,113,113,.15);border-color:rgba(248,113,113,.4)}
.btn-sm{padding:.25rem .625rem;font-size:12px}
.btn-sm svg{width:11px;height:11px}
.btn:disabled{opacity:.4;cursor:not-allowed}
/* ── Forms ───────────────────────────────────────────────────────── */
.form-row{margin-bottom:1rem}
.form-label{display:block;font-size:11px;font-weight:700;text-transform:uppercase;letter-spacing:.06em;color:var(--text-2);margin-bottom:.35rem}
.form-input{
width:100%;padding:.5rem .75rem;
background:var(--raised);border:1px solid var(--border);border-radius:6px;
color:var(--text);font-size:13.5px;
transition:border-color .1s,box-shadow .1s;
}
.form-input:focus{outline:none;border-color:var(--accent);box-shadow:0 0 0 3px rgba(99,102,241,.12)}
.form-input::placeholder{color:var(--text-3)}
select.form-input{
cursor:pointer;appearance:none;
background-image:url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='10' height='5' viewBox='0 0 10 5'%3E%3Cpath d='M0 0l5 5 5-5z' fill='%23363A4D'/%3E%3C/svg%3E");
background-repeat:no-repeat;background-position:right .75rem center;background-size:8px;padding-right:2rem;
}
.form-hint{font-size:11.5px;color:var(--text-3);margin-top:.25rem}
.form-actions{display:flex;gap:.5rem;margin-top:1.25rem;align-items:center}
/* ── Alerts ──────────────────────────────────────────────────────── */
.alert{display:flex;align-items:flex-start;gap:.5rem;padding:.625rem .875rem;border-radius:6px;font-size:13px;margin-bottom:1rem}
.alert-error{background:rgba(248,113,113,.07);border:1px solid rgba(248,113,113,.2);color:var(--red)}
.alert-warning{background:rgba(245,158,11,.07);border:1px solid rgba(245,158,11,.2);color:var(--amber)}
.alert-info{background:var(--accent-s);border:1px solid rgba(99,102,241,.25);color:#A5B4FC}
/* ── Tables ──────────────────────────────────────────────────────── */
.table-wrap{border:1px solid var(--border);border-radius:8px;overflow:hidden}
table{width:100%;border-collapse:collapse}
thead{background:var(--raised)}
th{padding:.5rem 1rem;text-align:left;font-size:11px;font-weight:700;text-transform:uppercase;letter-spacing:.07em;color:var(--text-3);border-bottom:1px solid var(--border);white-space:nowrap}
td{padding:.625rem 1rem;font-size:13px;color:var(--text-2);border-bottom:1px solid var(--border)}
tbody tr:last-child td{border-bottom:none}
tbody tr:hover td{background:rgba(255,255,255,.015);color:var(--text)}
.td-name{font-weight:600;color:var(--text) !important}
td code{font-family:var(--mono);font-size:11.5px;background:var(--raised);padding:.1rem .35rem;border-radius:3px}
.empty-row td{text-align:center;padding:2.5rem;color:var(--text-3) !important}
/* ── Badges ──────────────────────────────────────────────────────── */
.badge{display:inline-flex;align-items:center;padding:.15rem .45rem;font-size:11px;font-weight:700;border-radius:4px;text-transform:uppercase;letter-spacing:.04em}
.badge-admin{background:var(--accent-s);color:#A5B4FC;border:1px solid rgba(99,102,241,.2)}
.badge-user{background:var(--raised);color:var(--text-3);border:1px solid var(--border)}
.badge-ok{background:rgba(52,211,153,.08);color:var(--green);border:1px solid rgba(52,211,153,.2)}
/* ── Modals ──────────────────────────────────────────────────────── */
.modal{display:none;position:fixed;inset:0;background:rgba(0,0,0,.6);backdrop-filter:blur(2px);z-index:500;align-items:center;justify-content:center}
.modal.show{display:flex}
.modal-box{background:var(--card);border:1px solid var(--border-2);border-radius:10px;width:90%;max-width:440px;max-height:90vh;overflow-y:auto;animation:pop .12s ease}
@keyframes pop{from{opacity:0;transform:scale(.97) translateY(-4px)}to{opacity:1;transform:none}}
.modal-head{display:flex;justify-content:space-between;align-items:center;padding:.875rem 1.125rem;border-bottom:1px solid var(--border)}
.modal-title{font-size:14px;font-weight:700}
.modal-close{background:none;border:none;color:var(--text-3);cursor:pointer;font-size:1.125rem;line-height:1;padding:.125rem;border-radius:3px;transition:color .1s}
.modal-close:hover{color:var(--text)}
.modal-body{padding:1.125rem}
/* ── Tabs ────────────────────────────────────────────────────────── */
.tabs{display:flex;gap:1px;border-bottom:1px solid var(--border);margin-bottom:1.25rem}
.tab{padding:.5rem .875rem;font-size:13px;font-weight:500;color:var(--text-2);background:none;border:none;cursor:pointer;border-bottom:2px solid transparent;margin-bottom:-1px;transition:color .1s}
.tab:hover{color:var(--text)}
.tab.active{color:var(--text);border-bottom-color:var(--accent)}
.tab-panel{display:none}
.tab-panel.active{display:block}
/* ── Token box ───────────────────────────────────────────────────── */
.token-box{display:flex;align-items:center;gap:.625rem;padding:.625rem .875rem;background:var(--raised);border:1px solid var(--border);border-radius:6px;margin:.875rem 0}
.token-box code{flex:1;font-family:var(--mono);font-size:11.5px;color:var(--accent);word-break:break-all}
/* ── Chat ────────────────────────────────────────────────────────── */
.chat-wrap{display:flex;flex-direction:column;height:calc(100vh - 44px - 2rem);background:var(--card);border:1px solid var(--border);border-radius:8px;overflow:hidden}
.chat-bar{display:flex;justify-content:space-between;align-items:center;padding:.625rem 1rem;border-bottom:1px solid var(--border);flex-shrink:0;gap:.75rem}
.chat-bar h2{font-size:13.5px;font-weight:700}
.chat-controls{display:flex;gap:.5rem;align-items:center}
.chat-messages{flex:1;overflow-y:auto;padding:1.125rem;scroll-behavior:smooth}
.chat-empty{text-align:center;padding:4rem 1rem;color:var(--text-3)}
.chat-empty h3{font-size:1rem;font-weight:600;color:var(--text-2);margin-bottom:.35rem}
.msg{display:flex;gap:.625rem;margin-bottom:1rem}
.msg-av{width:24px;height:24px;border-radius:5px;display:flex;align-items:center;justify-content:center;font-size:9px;font-weight:700;flex-shrink:0;margin-top:1px;font-family:var(--mono)}
.msg-av.user{background:rgba(99,102,241,.15);color:var(--accent);border:1px solid rgba(99,102,241,.2)}
.msg-av.ai{background:var(--raised);color:var(--text-2);border:1px solid var(--border)}
.msg-body{flex:1}
.msg-meta{font-size:11px;color:var(--text-3);margin-bottom:.25rem;font-family:var(--mono)}
.msg-text{background:var(--raised);border:1px solid var(--border);border-radius:6px;padding:.5rem .75rem;font-size:13.5px;line-height:1.6;color:var(--text);word-wrap:break-word}
.msg.user .msg-text{background:rgba(99,102,241,.06);border-color:rgba(99,102,241,.15)}
.chat-foot{padding:.75rem 1rem;border-top:1px solid var(--border);flex-shrink:0}
.chat-input-row{display:flex;gap:.5rem;align-items:flex-end}
.chat-textarea{flex:1;padding:.5rem .75rem;background:var(--raised);border:1px solid var(--border);border-radius:6px;color:var(--text);font-size:13.5px;resize:none;min-height:38px;max-height:140px;line-height:1.5;transition:border-color .1s}
.chat-textarea:focus{outline:none;border-color:var(--accent)}
.chat-hint{font-size:11px;color:var(--text-3);margin-top:.375rem}
/* ── Login ───────────────────────────────────────────────────────── */
.login-wrap{min-height:100vh;display:flex;align-items:center;justify-content:center;background:var(--bg);padding:1.5rem}
.login-card{width:100%;max-width:360px;background:var(--card);border:1px solid var(--border-2);border-radius:10px;padding:2rem}
.login-logo{display:flex;align-items:center;gap:.625rem;margin-bottom:1.75rem}
.login-mark{width:30px;height:30px;background:var(--accent);border-radius:7px;display:flex;align-items:center;justify-content:center;font-size:11px;font-weight:700;color:#fff}
.login-logo-text h1{font-size:1.0625rem;font-weight:700;letter-spacing:-.01em}
.login-logo-text p{font-size:11.5px;color:var(--text-2)}
.login-footer{margin-top:1.25rem;text-align:center;font-size:11.5px;color:var(--text-3);font-family:var(--mono)}
/* ── Centered form (change pw, etc.) ─────────────────────────────── */
.centered-wrap{min-height:calc(100vh - 44px);display:flex;align-items:center;justify-content:center;padding:1.5rem}
.centered-card{width:100%;max-width:400px;background:var(--card);border:1px solid var(--border);border-radius:10px;padding:1.75rem}
.centered-card h1{font-size:1.0625rem;font-weight:700;margin-bottom:.375rem}
.centered-card .sub{font-size:12.5px;color:var(--text-2);margin-bottom:1.5rem}
/* ── Search bar ──────────────────────────────────────────────────── */
.search-bar{display:flex;gap:.5rem;margin-bottom:1rem}
.search-bar .form-input{flex:1}
/* ── Divider ─────────────────────────────────────────────────────── */
hr{border:none;border-top:1px solid var(--border);margin:1.125rem 0}
/* ── Utils ───────────────────────────────────────────────────────── */
.mono{font-family:var(--mono)}
.muted{color:var(--text-3)}
.dim{color:var(--text-2)}
.small{font-size:12.5px}
.text-green{color:var(--green)}
.text-red{color:var(--red)}
.text-amber{color:var(--amber)}
.flex{display:flex}.items-center{align-items:center}.gap-2{gap:.5rem}.mb-0{margin-bottom:0!important}
/* ── Responsive ──────────────────────────────────────────────────── */
@media(max-width:640px){
.topnav-inner{padding:0 1rem}
.nav-links{gap:0}
.nav-link{padding:.3rem .5rem;font-size:12.5px}
.container{padding:1.25rem 1rem}
}
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>{% block title %}CoderAI Admin{% endblock %}</title>
<link rel="stylesheet" href="/static/admin/style.css">
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>{% block title %}CoderAI{% endblock %}</title>
<link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link href="https://fonts.googleapis.com/css2?family=Plus+Jakarta+Sans:wght@400;500;600;700&family=JetBrains+Mono:wght@400;500&display=swap" rel="stylesheet">
<link rel="stylesheet" href="/static/admin/style.css">
{% block head %}{% endblock %}
</head>
<body>
{% if username %}
<div class="layout">
<aside class="sidebar">
<div class="logo">
<h1>CoderAI</h1>
</div>
<nav class="nav">
<a href="/admin" class="nav-item {% if request.url.path == '/admin' %}active{% endif %}">
<span class="icon">📊</span>
<span>Overview</span>
</a>
{% if is_admin %}
<a href="/admin/models" class="nav-item {% if '/models' in request.url.path %}active{% endif %}">
<span class="icon">🤖</span>
<span>Models</span>
</a>
<a href="/admin/tokens" class="nav-item {% if '/tokens' in request.url.path %}active{% endif %}">
<span class="icon">🔑</span>
<span>API Tokens</span>
</a>
<a href="/admin/users" class="nav-item {% if '/users' in request.url.path %}active{% endif %}">
<span class="icon">👥</span>
<span>Users</span>
</a>
{% endif %}
<a href="/chat" class="nav-item {% if '/chat' in request.url.path %}active{% endif %}">
<span class="icon">💬</span>
<span>Chat</span>
</a>
</nav>
<div class="sidebar-footer">
<div class="user-info">
<span class="icon">👤</span>
<span>{{ username }}</span>
</div>
<a href="/logout" class="logout-btn">Logout</a>
</div>
</aside>
<main class="main-content">
<div class="content-wrapper">
{% block content %}{% endblock %}
</div>
</main>
{% if username %}
<nav class="topnav">
<div class="topnav-inner">
<div class="topnav-left">
<a href="/admin" class="nav-logo">
<div class="nav-logo-mark">AI</div>
<span class="nav-logo-name">CoderAI</span>
</a>
<div class="nav-links">
<a href="/admin" class="nav-link {% if request.url.path == '/admin' %}active{% endif %}">Overview</a>
<a href="/chat" class="nav-link {% if request.url.path == '/chat' %}active{% endif %}">Chat</a>
{% if is_admin|default(false) %}
<a href="/admin/models" class="nav-link {% if '/models' in request.url.path %}active{% endif %}">Models</a>
<a href="/admin/tokens" class="nav-link {% if '/tokens' in request.url.path %}active{% endif %}">Tokens</a>
<a href="/admin/users" class="nav-link {% if '/users' in request.url.path %}active{% endif %}">Users</a>
<a href="/admin/settings" class="nav-link {% if '/settings' in request.url.path %}active{% endif %}">Settings</a>
{% endif %}
</div>
</div>
{% else %}
<div class="content-wrapper">
{% block content %}{% endblock %}
<div class="topnav-right">
<span class="nav-username">{{ username }}</span>
<div class="nav-sep"></div>
<a href="/logout" class="nav-logout">Sign out</a>
</div>
{% endif %}
{% block scripts %}{% endblock %}
</div>
</nav>
<main class="main">
{% endif %}
<div class="{% block wrapper_class %}container{% endblock %}">
{% block content %}{% endblock %}
</div>
{% if username %}
</main>
{% endif %}
{% block scripts %}{% endblock %}
</body>
</html>
{% extends "base.html" %}
{% block title %}Change Password - CoderAI{% endblock %}
{% block title %}Change Password — CoderAI{% endblock %}
{% block content %}
<div class="page-header">
<div class="centered-wrap">
<div class="centered-card">
<h1>Change Password</h1>
{% if must_change %}
<p class="text-warning">You must change your password before continuing.</p>
{% endif %}
</div>
<p class="sub">
{% if must_change %}You must set a new password before continuing.
{% else %}Update your account password.{% endif %}
</p>
{% if error %}
<div class="alert alert-error">
{{ error }}
</div>
{% endif %}
{% if error %}
<div class="alert alert-error">{{ error }}</div>
{% endif %}
<div class="card">
<form method="post" action="/admin/change-password" class="form">
<form method="post" action="/admin/change-password">
{% if not must_change %}
<div class="form-row">
<label class="form-label" for="old_password">Current Password</label>
<input class="form-input" type="password" id="old_password" name="old_password"
placeholder="••••••••" required autocomplete="current-password">
</div>
{% endif %}
<div class="form-row">
<label class="form-label" for="new_password">New Password</label>
<input class="form-input" type="password" id="new_password" name="new_password"
placeholder="••••••••" required minlength="8" autocomplete="new-password">
<span class="form-hint">At least 8 characters</span>
</div>
<div class="form-row">
<label class="form-label" for="confirm_password">Confirm Password</label>
<input class="form-input" type="password" id="confirm_password" name="confirm_password"
placeholder="••••••••" required minlength="8" autocomplete="new-password">
</div>
<div class="form-actions">
<button type="submit" class="btn btn-primary">Update password</button>
{% if not must_change %}
<div class="form-group">
<label for="old_password">Current Password</label>
<input type="password" id="old_password" name="old_password" required>
</div>
<a href="/admin" class="btn btn-ghost">Cancel</a>
{% endif %}
<div class="form-group">
<label for="new_password">New Password</label>
<input type="password" id="new_password" name="new_password" required minlength="8">
<small class="form-text">Minimum 8 characters</small>
</div>
<div class="form-group">
<label for="confirm_password">Confirm New Password</label>
<input type="password" id="confirm_password" name="confirm_password" required minlength="8">
</div>
<div class="form-actions">
<button type="submit" class="btn btn-primary">Change Password</button>
{% if not must_change %}
<a href="/admin" class="btn btn-secondary">Cancel</a>
{% endif %}
</div>
</div>
</form>
</div>
</div>
{% endblock %}
{% extends "base.html" %}
{% block title %}Chat - CoderAI{% endblock %}
{% block title %}Chat — CoderAI{% endblock %}
{% block wrapper_class %}{% endblock %}
{% block content %}
<div class="chat-container">
<div class="chat-header">
<div class="chat-title">
<h2>Chat</h2>
</div>
<div class="chat-controls">
<select id="model-selector" class="form-control">
<option value="">Select a model...</option>
</select>
<button class="btn btn-secondary" onclick="newChat()">New Chat</button>
</div>
<div class="chat-wrap" style="margin:0 1.5rem 1rem;border-radius:8px">
<div class="chat-bar">
<h2>Chat</h2>
<div class="chat-controls">
<select id="model-sel" class="form-input" style="font-size:13px;padding:.3rem .625rem;min-width:200px">
<option value="">Select model…</option>
</select>
<button class="btn btn-ghost btn-sm" onclick="newChat()">Clear</button>
</div>
<div class="chat-messages" id="chat-messages">
<div class="welcome-message">
<h3>Welcome to CoderAI Chat</h3>
<p>Select a model and start chatting</p>
</div>
</div>
<div class="chat-messages" id="chat-msgs">
<div class="chat-empty">
<h3>CoderAI Chat</h3>
<p>Select a model and start typing</p>
</div>
<div class="chat-input-container">
<form id="chat-form" class="chat-input-form">
<textarea id="chat-input" class="chat-input"
placeholder="Type your message..."
rows="3"></textarea>
<button type="submit" class="btn btn-primary" id="send-btn">Send</button>
</form>
</div>
<div class="chat-foot">
<div id="typing" style="font-size:11px;color:var(--text-3);height:14px;margin-bottom:.3rem;font-family:var(--mono)"></div>
<div class="chat-input-row">
<textarea id="chat-in" class="chat-textarea" placeholder="Send a message…" rows="1"></textarea>
<button class="btn btn-primary" id="send-btn" onclick="send()" style="padding:.5rem .75rem;align-self:flex-end">
<svg viewBox="0 0 16 16" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" style="width:13px;height:13px"><line x1="14" y1="2" x2="7" y2="9"/><polygon points="14 2 10 14 7 9 2 6 14 2"/></svg>
</button>
</div>
<div class="chat-hint">Enter to send · Shift+Enter for newline</div>
</div>
</div>
{% endblock %}
{% block scripts %}
<script>
let currentModel = null;
let messages = [];
let history = [];
let busy = false;
async function loadModels() {
try {
const response = await fetch('/v1/models');
const data = await response.json();
const selector = document.getElementById('model-selector');
selector.innerHTML = '<option value="">Select a model...</option>';
data.data.forEach(model => {
const option = document.createElement('option');
option.value = model.id;
option.textContent = model.id;
selector.appendChild(option);
});
} catch (error) {
console.error('Failed to load models:', error);
}
try {
const d = await fetch('/v1/models').then(r => r.json());
const sel = document.getElementById('model-sel');
sel.innerHTML = '<option value="">Select model…</option>';
(d.data || []).forEach(m => {
const o = document.createElement('option');
o.value = o.textContent = m.id;
sel.appendChild(o);
});
} catch {}
}
document.getElementById('model-selector').addEventListener('change', (e) => {
currentModel = e.target.value;
});
function newChat() {
messages = [];
document.getElementById('chat-messages').innerHTML = `
<div class="welcome-message">
<h3>New Chat Started</h3>
<p>Select a model and start chatting</p>
</div>
`;
history = [];
document.getElementById('chat-msgs').innerHTML = '<div class="chat-empty"><h3>New conversation</h3><p>Start typing below</p></div>';
}
function addMessage(role, content) {
const messagesDiv = document.getElementById('chat-messages');
// Remove welcome message if present
const welcome = messagesDiv.querySelector('.welcome-message');
if (welcome) {
welcome.remove();
}
const messageDiv = document.createElement('div');
messageDiv.className = `message message-${role}`;
const avatar = document.createElement('div');
avatar.className = 'message-avatar';
avatar.textContent = role === 'user' ? '👤' : '🤖';
const contentDiv = document.createElement('div');
contentDiv.className = 'message-content';
contentDiv.textContent = content;
messageDiv.appendChild(avatar);
messageDiv.appendChild(contentDiv);
messagesDiv.appendChild(messageDiv);
// Scroll to bottom
messagesDiv.scrollTop = messagesDiv.scrollHeight;
function addMsg(role, text) {
const wrap = document.getElementById('chat-msgs');
wrap.querySelector('.chat-empty')?.remove();
const t = new Date().toLocaleTimeString([],{hour:'2-digit',minute:'2-digit'});
const d = document.createElement('div');
d.className = 'msg ' + role;
d.innerHTML = `
<div class="msg-av ${role === 'user' ? 'user' : 'ai'}">${role === 'user' ? 'YOU' : 'AI'}</div>
<div class="msg-body">
<div class="msg-meta">${role === 'user' ? 'You' : 'Assistant'} · ${t}</div>
<div class="msg-text">${String(text).replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;').replace(/\n/g,'<br>')}</div>
</div>`;
wrap.appendChild(d);
wrap.scrollTop = wrap.scrollHeight;
}
document.getElementById('chat-form').addEventListener('submit', async (e) => {
e.preventDefault();
if (!currentModel) {
alert('Please select a model first');
return;
}
const input = document.getElementById('chat-input');
const message = input.value.trim();
if (!message) {
return;
}
// Add user message
addMessage('user', message);
messages.push({ role: 'user', content: message });
// Clear input
input.value = '';
// Disable send button
const sendBtn = document.getElementById('send-btn');
sendBtn.disabled = true;
sendBtn.textContent = 'Sending...';
try {
const response = await fetch('/v1/chat/completions', {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify({
model: currentModel,
messages: messages,
stream: false
})
});
if (!response.ok) {
throw new Error('Request failed');
}
const data = await response.json();
const assistantMessage = data.choices[0].message.content;
addMessage('assistant', assistantMessage);
messages.push({ role: 'assistant', content: assistantMessage });
} catch (error) {
addMessage('assistant', 'Error: ' + error.message);
} finally {
sendBtn.disabled = false;
sendBtn.textContent = 'Send';
}
});
async function send() {
if (busy) return;
const model = document.getElementById('model-sel').value;
if (!model) { document.getElementById('model-sel').focus(); return; }
const input = document.getElementById('chat-in');
const text = input.value.trim();
if (!text) return;
// Handle Enter key (Shift+Enter for new line)
document.getElementById('chat-input').addEventListener('keydown', (e) => {
if (e.key === 'Enter' && !e.shiftKey) {
e.preventDefault();
document.getElementById('chat-form').dispatchEvent(new Event('submit'));
}
addMsg('user', text);
history.push({role:'user', content:text});
input.value = '';
input.style.height = 'auto';
busy = true;
document.getElementById('send-btn').disabled = true;
document.getElementById('typing').textContent = 'Assistant is typing…';
try {
const r = await fetch('/v1/chat/completions', {
method:'POST', headers:{'Content-Type':'application/json'},
body: JSON.stringify({model, messages: history, stream:false})
});
if (!r.ok) throw new Error('HTTP ' + r.status);
const d = await r.json();
const reply = d.choices[0].message.content;
addMsg('assistant', reply);
history.push({role:'assistant', content:reply});
} catch (e) {
addMsg('assistant', 'Error: ' + e.message);
} finally {
busy = false;
document.getElementById('send-btn').disabled = false;
document.getElementById('typing').textContent = '';
}
}
document.getElementById('chat-in').addEventListener('keydown', e => {
if (e.key === 'Enter' && !e.shiftKey) { e.preventDefault(); send(); }
});
document.getElementById('chat-in').addEventListener('input', function() {
this.style.height = 'auto';
this.style.height = Math.min(this.scrollHeight, 140) + 'px';
});
// Load models on page load
loadModels();
</script>
{% endblock %}
{% extends "base.html" %}
{% block title %}Dashboard - CoderAI{% endblock %}
{% block title %}Overview — CoderAI{% endblock %}
{% block content %}
<div class="page-header">
<div>
<h1>Overview</h1>
<div class="header-actions">
<button class="btn btn-secondary" onclick="reloadConfig()">Reload Config</button>
</div>
<p>System status</p>
</div>
<div class="header-actions">
<span class="live" id="live-label">Live</span>
</div>
</div>
<div class="dashboard-grid">
<div class="card">
<h3>System Status</h3>
<div class="status-grid">
<div class="status-item">
<span class="label">Backend:</span>
<span class="value" id="backend">Loading...</span>
</div>
<div class="status-item">
<span class="label">GPU:</span>
<span class="value" id="gpu">Loading...</span>
</div>
<div class="status-item">
<span class="label">Uptime:</span>
<span class="value" id="uptime">Loading...</span>
</div>
<div class="status-item">
<span class="label">Status:</span>
<span class="value status-ok" id="status">OK</span>
</div>
</div>
</div>
<div class="card">
<h3>Active Models</h3>
<div id="active-models">
<p class="text-muted">No models loaded</p>
</div>
{% if is_admin %}
<a href="/admin/models" class="btn btn-primary btn-sm">Manage Models</a>
{% endif %}
<div class="stat-grid">
<div class="stat">
<div class="stat-label">Status</div>
<div class="stat-value small" id="sys-status" style="font-size:1.125rem"></div>
<div class="stat-sub" id="sys-backend">loading…</div>
</div>
<div class="stat">
<div class="stat-label">Models Loaded</div>
<div class="stat-value" id="models-count"></div>
<div class="stat-sub" id="models-mode"></div>
</div>
<div class="stat">
<div class="stat-label">Requests</div>
<div class="stat-value" id="req-total">0</div>
<div class="stat-sub"><span id="req-active">0</span> active</div>
</div>
<div class="stat">
<div class="stat-label">VRAM</div>
<div class="stat-value" id="vram-pct"></div>
<div class="progress" style="margin-top:.625rem">
<div class="progress-fill" id="vram-bar" style="width:0%"></div>
</div>
<div class="card">
<h3>Request Stats</h3>
<div class="stats-grid">
<div class="stat-item">
<div class="stat-value" id="total-requests">0</div>
<div class="stat-label">Total Requests</div>
</div>
<div class="stat-item">
<div class="stat-value" id="active-requests">0</div>
<div class="stat-label">Active</div>
</div>
<div class="stat-item">
<div class="stat-value" id="queued-requests">0</div>
<div class="stat-label">Queued</div>
</div>
</div>
</div>
<div class="card">
<h3>VRAM Usage</h3>
<div class="progress-bar">
<div class="progress-fill" id="vram-progress" style="width: 0%"></div>
</div>
<p class="text-muted" id="vram-text">0 GB / 0 GB (0%)</p>
<div class="progress-labels">
<span id="vram-used"></span><span id="vram-total"></span>
</div>
</div>
</div>
<div class="card">
<h3>Recent Activity</h3>
<div class="table-responsive">
<table class="table">
<thead>
<tr>
<th>Time</th>
<th>Model</th>
<th>Type</th>
<th>Status</th>
<th>Duration</th>
</tr>
</thead>
<tbody id="activity-table">
<tr>
<td colspan="5" class="text-center text-muted">No recent activity</td>
</tr>
</tbody>
</table>
</div>
<div class="card" style="margin-bottom:1rem">
<div class="card-title">Models</div>
<div id="active-models"><span class="muted small">No models loaded</span></div>
{% if is_admin %}
<div style="margin-top:.875rem">
<a href="/admin/models" class="btn btn-ghost btn-sm">Manage models</a>
</div>
{% endif %}
</div>
<div class="card mb-0">
<div class="card-title">Recent Activity</div>
<div class="table-wrap" style="border:none">
<table>
<thead><tr><th>Time</th><th>Model</th><th>Type</th><th>Status</th><th>Duration</th></tr></thead>
<tbody id="activity-body">
<tr class="empty-row"><td colspan="5">No recent activity</td></tr>
</tbody>
</table>
</div>
</div>
{% endblock %}
{% block scripts %}
<script>
async function loadStatus() {
try {
const response = await fetch('/admin/api/status');
const data = await response.json();
document.getElementById('backend').textContent = data.backend || 'auto';
document.getElementById('uptime').textContent = data.uptime || '0h 0m';
document.getElementById('status').textContent = data.status === 'ok' ? 'OK' : 'Error';
// Update models loaded count
if (data.models_loaded > 0) {
document.getElementById('active-models').innerHTML =
`<p>${data.models_loaded} model(s) loaded</p>`;
}
} catch (error) {
console.error('Failed to load status:', error);
async function poll() {
try {
const d = await fetch('/admin/api/status').then(r => r.json());
const ok = d.status === 'ok';
document.getElementById('sys-status').textContent = ok ? 'Online' : 'Error';
document.getElementById('sys-status').className = 'stat-value small ' + (ok ? 'text-green' : 'text-red');
document.getElementById('sys-backend').textContent = d.backend || d.load_mode || '—';
document.getElementById('models-count').textContent = d.models_loaded ?? '—';
document.getElementById('models-mode').textContent = d.load_mode ? d.load_mode + ' mode' : '';
const loaded = d.loaded_models || [];
const enabled = d.enabled_models || [];
const loadedSet = new Set(loaded);
const notLoaded = enabled.filter(m => !loadedSet.has(m));
let html = '';
if(loaded.length) html += loaded.map(m => `<span class="badge badge-admin" style="margin:.125rem" title="Loaded">● ${m}</span>`).join('');
if(notLoaded.length) html += notLoaded.map(m => `<span class="badge" style="margin:.125rem;opacity:.55" title="Enabled, not loaded">○ ${m}</span>`).join('');
document.getElementById('active-models').innerHTML = html || '<span class="muted small">No models loaded</span>';
if (d.vram) {
const pct = Math.round(d.vram.used / d.vram.total * 100);
document.getElementById('vram-pct').textContent = pct + '%';
document.getElementById('vram-bar').style.width = pct + '%';
document.getElementById('vram-used').textContent = d.vram.used.toFixed(1) + ' GB';
document.getElementById('vram-total').textContent = d.vram.total.toFixed(1) + ' GB';
} else {
document.getElementById('vram-pct').textContent = 'N/A';
}
}
async function reloadConfig() {
if (confirm('Reload configuration from disk? This will not restart the server.')) {
try {
const response = await fetch('/admin/api/system/reload', { method: 'POST' });
if (response.ok) {
alert('Configuration reloaded successfully');
loadStatus();
} else {
alert('Failed to reload configuration');
}
} catch (error) {
alert('Error: ' + error.message);
}
if (d.requests) {
document.getElementById('req-total').textContent = d.requests.total ?? 0;
document.getElementById('req-active').textContent = d.requests.active ?? 0;
}
} catch {
document.getElementById('sys-status').textContent = 'Offline';
document.getElementById('sys-status').className = 'stat-value small text-red';
}
}
// Load status on page load
loadStatus();
// Refresh status every 5 seconds
setInterval(loadStatus, 5000);
poll();
setInterval(poll, 7000);
</script>
{% endblock %}
{% extends "base.html" %}
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Sign in — CoderAI</title>
<link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link href="https://fonts.googleapis.com/css2?family=Plus+Jakarta+Sans:wght@400;500;600;700&family=JetBrains+Mono:wght@400;500&display=swap" rel="stylesheet">
<link rel="stylesheet" href="/static/admin/style.css">
</head>
<body>
<div class="login-wrap">
<div class="login-card">
<div class="login-logo">
<div class="login-mark">AI</div>
<div class="login-logo-text">
<h1>CoderAI</h1>
<p>Local AI Server</p>
</div>
</div>
{% block title %}Login - CoderAI{% endblock %}
{% if error %}
<div class="alert alert-error" style="margin-bottom:1.25rem">{{ error }}</div>
{% endif %}
{% block content %}
<div class="login-container">
<div class="login-box">
<div class="login-header">
<h1>CoderAI</h1>
<p>Admin Dashboard</p>
</div>
{% if error %}
<div class="alert alert-error">
{{ error }}
</div>
{% endif %}
<form method="post" action="/login" class="login-form">
<div class="form-group">
<label for="username">Username</label>
<input type="text" id="username" name="username" required autofocus>
</div>
<div class="form-group">
<label for="password">Password</label>
<input type="password" id="password" name="password" required>
</div>
<button type="submit" class="btn btn-primary btn-block">Login</button>
</form>
<div class="login-footer">
<p class="text-muted">Default credentials: admin / admin</p>
</div>
</div>
<form method="post" action="/login">
<div class="form-row">
<label class="form-label" for="username">Username</label>
<input class="form-input" type="text" id="username" name="username"
placeholder="admin" required autofocus autocomplete="username">
</div>
<div class="form-row">
<label class="form-label" for="password">Password</label>
<input class="form-input" type="password" id="password" name="password"
placeholder="••••••••" required autocomplete="current-password">
</div>
<button type="submit" class="btn btn-primary" style="width:100%;justify-content:center;margin-top:.875rem;padding:.5625rem">
Sign in
</button>
</form>
<div class="login-footer">default: admin / admin</div>
</div>
</div>
{% endblock %}
</body>
</html>
{% extends "base.html" %}
{% block title %}Models — CoderAI{% endblock %}
{% block title %}Models - CoderAI{% endblock %}
{% block head %}
<style>
/* chips */
.chip{display:inline-flex;align-items:center;padding:.2rem .55rem;border-radius:20px;cursor:pointer;background:var(--raised);border:1px solid var(--border);font-size:11px;font-weight:500;user-select:none;white-space:nowrap;transition:background .1s,border-color .1s,color .1s}
.chip:hover{border-color:var(--border-2)}
.chip.on{background:var(--accent-s);border-color:rgba(99,102,241,.35);color:#A5B4FC}
.chip-row{display:flex;flex-wrap:wrap;gap:.3rem;align-items:center}
/* gguf 3-way toggle */
.tog-grp{display:inline-flex;border:1px solid var(--border);border-radius:6px;overflow:hidden}
.tog-btn{border:none!important;border-radius:0!important;background:var(--raised)!important;color:var(--text-2)!important;font-size:11px!important;padding:.25rem .65rem!important;cursor:pointer;font-weight:500;transition:all .1s}
.tog-btn:hover{color:var(--text)!important}
.tog-btn.on{background:var(--accent-s)!important;color:#A5B4FC!important}
.tog-btn+.tog-btn{border-left:1px solid var(--border)!important}
/* filter label */
.fl{font-size:10px;font-weight:700;text-transform:uppercase;letter-spacing:.06em;color:var(--text-2);white-space:nowrap}
/* info drawer */
#info-overlay{display:none;position:fixed;inset:0;background:rgba(0,0,0,.45);z-index:200}
#info-drawer{position:fixed;top:0;right:0;width:540px;max-width:96vw;height:100vh;background:var(--bg);border-left:1px solid var(--border);z-index:201;overflow-y:auto;transform:translateX(100%);transition:transform .22s cubic-bezier(.4,0,.2,1)}
#info-drawer.open{transform:translateX(0)}
#info-sticky{position:sticky;top:0;background:var(--bg);border-bottom:1px solid var(--border);z-index:1;padding:1rem 1.25rem;display:flex;align-items:center;gap:.75rem}
#info-title{font-weight:600;font-size:14px;flex:1;overflow:hidden;text-overflow:ellipsis;white-space:nowrap}
</style>
{% endblock %}
{% block content %}
<div class="page-header">
<div>
<h1>Models</h1>
<div class="header-actions">
<button class="btn btn-primary" onclick="showDownloadModal()">Download Model</button>
</div>
<p>Configure and download AI models</p>
</div>
<div class="header-actions">
<button class="btn btn-secondary" onclick="openModal('upload-modal')">Upload GGUF</button>
<button class="btn btn-primary" onclick="openModal('dl-modal')">Download model</button>
</div>
</div>
<div class="tabs">
<button class="tab-btn active" onclick="showTab('local')">Local Models</button>
<button class="tab-btn" onclick="showTab('search')">Search HuggingFace</button>
<button class="tab-btn" onclick="showTab('config')">Configuration</button>
<button class="tab active" onclick="switchTab('local', this)">Local models</button>
<button class="tab" onclick="switchTab('search', this)">Find on HuggingFace</button>
</div>
<div id="tab-local" class="tab-content active">
<div class="card">
<h3>Text Models</h3>
<div id="text-models-list">
<p class="text-muted">No text models configured</p>
</div>
<!-- active downloads strip (all tabs) -->
<div id="dl-strip" style="display:none;background:var(--raised);border:1px solid var(--border);border-radius:8px;margin-bottom:1rem;padding:.5rem .875rem">
<div style="font-size:10px;font-weight:700;text-transform:uppercase;letter-spacing:.06em;color:var(--text-2);margin-bottom:.4rem">Active downloads</div>
<div id="dl-strip-list"></div>
</div>
<!-- LOCAL -->
<div id="tab-local" class="tab-panel active">
<!-- cache stats -->
<div class="card">
<div style="display:flex;align-items:center;justify-content:space-between;flex-wrap:wrap;gap:.5rem;margin-bottom:1rem">
<div class="card-title" style="margin:0">Storage</div>
<div style="display:flex;gap:.35rem;flex-wrap:wrap">
<button class="btn btn-ghost btn-sm" onclick="refreshLocal()">↺ Refresh</button>
<button class="btn btn-danger btn-sm" onclick="clearCacheConfirm('hf')">Clear HF</button>
<button class="btn btn-danger btn-sm" onclick="clearCacheConfirm('gguf')">Clear GGUF</button>
<button class="btn btn-danger btn-sm" onclick="clearCacheConfirm('all')">Clear all</button>
</div>
</div>
<div class="card">
<h3>Image Models</h3>
<div id="image-models-list">
<p class="text-muted">No image models configured</p>
</div>
<div style="display:grid;grid-template-columns:repeat(3,1fr);gap:.75rem" id="cache-stats-row">
<div style="background:var(--bg);border:1px solid var(--border);border-radius:6px;padding:.75rem 1rem">
<div class="fl">HuggingFace</div>
<div id="stat-hf-size" style="font-size:1.2rem;font-weight:600;margin:.3rem 0"></div>
<div id="stat-hf-count" class="muted small">— models</div>
<div id="stat-hf-free" class="muted small" style="margin-top:.25rem">— free</div>
</div>
<div style="background:var(--bg);border:1px solid var(--border);border-radius:6px;padding:.75rem 1rem">
<div class="fl">GGUF cache</div>
<div id="stat-gguf-size" style="font-size:1.2rem;font-weight:600;margin:.3rem 0"></div>
<div id="stat-gguf-count" class="muted small">— files</div>
<div id="stat-gguf-free" class="muted small" style="margin-top:.25rem">— free</div>
</div>
<div style="background:var(--bg);border:1px solid var(--border);border-radius:6px;padding:.75rem 1rem">
<div class="fl">Total used</div>
<div id="stat-total-size" style="font-size:1.2rem;font-weight:600;margin:.3rem 0"></div>
<div class="muted small">on disk</div>
</div>
</div>
<div class="card">
<h3>Audio Models</h3>
<div id="audio-models-list">
<p class="text-muted">No audio models configured</p>
</div>
</div>
<!-- HF models -->
<div class="card">
<div class="card-title">HuggingFace models <span id="hf-model-badge" class="muted small"></span></div>
<div id="hf-models-list"><span class="muted small">Loading…</span></div>
</div>
<!-- GGUF files -->
<div class="card mb-0">
<div class="card-title">GGUF files <span id="gguf-file-badge" class="muted small"></span></div>
<div id="gguf-models-list"><span class="muted small">Loading…</span></div>
</div>
</div>
<!-- SEARCH -->
<div id="tab-search" class="tab-panel">
<div class="card">
<div class="card-title">Search HuggingFace</div>
<!-- query row -->
<div class="search-bar" style="margin-bottom:.75rem">
<input type="text" id="search-q" class="form-input" placeholder="Search models (e.g. llama, mistral, qwen…)">
<button class="btn btn-secondary" onclick="doSearch()">Search</button>
</div>
<div class="card">
<h3>GGUF Models</h3>
<div id="gguf-models-list">
<p class="text-muted">No GGUF models configured</p>
<!-- filter row 1: toggles + dropdowns -->
<div style="display:flex;flex-wrap:wrap;gap:.625rem;align-items:center;margin-bottom:.625rem">
<div style="display:flex;align-items:center;gap:.35rem">
<span class="fl">Format</span>
<div class="tog-grp">
<button class="tog-btn on" data-val="gguf">GGUF</button>
<button class="tog-btn" data-val="all">All</button>
<button class="tog-btn" data-val="no-gguf">No GGUF</button>
</div>
</div>
<div style="display:flex;align-items:center;gap:.35rem">
<span class="fl">Type</span>
<select id="filter-pipeline" class="form-input" style="padding:.25rem .4rem;font-size:12px">
<option value="">All</option>
<option value="text-generation">Text generation</option>
<option value="text2text-generation">Text-to-text</option>
<option value="text-to-image">Text-to-image</option>
<option value="image-to-text">Image-to-text</option>
<option value="automatic-speech-recognition">Speech recog.</option>
<option value="text-to-speech">TTS</option>
<option value="feature-extraction">Embeddings</option>
</select>
</div>
<div style="display:flex;align-items:center;gap:.35rem">
<span class="fl">Arch</span>
<select id="filter-arch" class="form-input" style="padding:.25rem .4rem;font-size:12px">
<option value="">Any</option>
<option value="moe">MoE</option>
<option value="lora">LoRA</option>
</select>
</div>
<div style="display:flex;align-items:center;gap:.35rem">
<span class="fl">Sort</span>
<select id="filter-sort" class="form-input" style="padding:.25rem .4rem;font-size:12px">
<option value="downloads">Most downloaded</option>
<option value="likes">Most liked</option>
<option value="lastModified">Recently updated</option>
</select>
</div>
</div>
<!-- filter row 2: size chips -->
<div style="display:flex;align-items:flex-start;gap:.5rem;margin-bottom:.5rem">
<span class="fl" style="padding-top:.25rem;min-width:32px">Size</span>
<div class="chip-row" id="size-chips">
<span class="chip" data-val="0.5b">0.5B</span>
<span class="chip" data-val="1b">1B</span>
<span class="chip" data-val="1.5b">1.5B</span>
<span class="chip" data-val="3b">3B</span>
<span class="chip" data-val="4b">4B</span>
<span class="chip" data-val="7b">7B</span>
<span class="chip" data-val="8b">8B</span>
<span class="chip" data-val="9b">9B</span>
<span class="chip" data-val="12b">12B</span>
<span class="chip" data-val="13b">13B</span>
<span class="chip" data-val="14b">14B</span>
<span class="chip" data-val="22b">22B</span>
<span class="chip" data-val="27b">27B</span>
<span class="chip" data-val="32b">32B</span>
<span class="chip" data-val="34b">34B</span>
<span class="chip" data-val="40b">40B</span>
<span class="chip" data-val="70b">70B</span>
<span class="chip" data-val="72b">72B</span>
<span class="chip" data-val="90b">90B</span>
<span class="chip" data-val="123b">123B</span>
<span class="chip" data-val="235b">235B</span>
<span class="chip" data-val="671b">671B</span>
</div>
</div>
<!-- filter row 3: quant chips (file-level filter) -->
<div style="display:flex;align-items:flex-start;gap:.5rem;margin-bottom:1rem">
<span class="fl" style="padding-top:.25rem;min-width:32px">Quant</span>
<div class="chip-row" id="quant-chips">
<span class="chip" data-val="Q2_K">Q2_K</span>
<span class="chip" data-val="Q3_K_M">Q3_K_M</span>
<span class="chip" data-val="Q4_K_S">Q4_K_S</span>
<span class="chip" data-val="Q4_K_M">Q4_K_M ★</span>
<span class="chip" data-val="Q5_K_S">Q5_K_S</span>
<span class="chip" data-val="Q5_K_M">Q5_K_M</span>
<span class="chip" data-val="Q6_K">Q6_K</span>
<span class="chip" data-val="Q8_0">Q8_0</span>
<span class="chip" data-val="F16">F16</span>
<span class="chip" data-val="IQ4_XS">IQ4_XS</span>
<span class="chip" data-val="IQ3_XXS">IQ3_XXS</span>
<span class="chip" data-val="IQ2_XXS">IQ2_XXS</span>
</div>
</div>
<div id="search-results"><span class="muted small">Enter a query above to search</span></div>
</div>
</div>
<div id="tab-search" class="tab-content">
<div class="card">
<h3>Search HuggingFace Models</h3>
<div class="search-form">
<input type="text" id="search-query" placeholder="Search models..." class="form-control">
<div class="filter-group">
<label>
<input type="checkbox" id="filter-gguf" checked>
GGUF only
</label>
<label>
<input type="checkbox" id="filter-text" checked>
Text models
</label>
<label>
<input type="checkbox" id="filter-image">
Image models
</label>
</div>
<button class="btn btn-primary" onclick="searchModels()">Search</button>
<!-- Download modal -->
<div id="dl-modal" class="modal">
<div class="modal-box">
<div class="modal-head">
<span class="modal-title">Download model</span>
<button class="modal-close" onclick="closeModal('dl-modal')">×</button>
</div>
<div class="modal-body">
<div id="dl-form">
<div class="form-row">
<label class="form-label">HuggingFace repo ID or URL</label>
<input type="text" id="dl-id" class="form-input" placeholder="e.g. bartowski/Llama-3.1-8B-Instruct-GGUF">
</div>
<!-- GGUF mode: specific file or pattern -->
<div id="dl-pattern-row" class="form-row">
<label class="form-label">File / pattern</label>
<input type="text" id="dl-pattern" class="form-input" placeholder=".gguf">
<span class="form-hint" id="dl-hint">Exact filename (e.g. <code>model-Q4_K_M.gguf</code>) or pattern (<code>.gguf</code>). Leave blank to download the first .gguf found.</span>
</div>
<!-- Snapshot mode: full repo via HF API -->
<div id="dl-snapshot-note" class="alert alert-info" style="display:none">
Will download the full repository using the HuggingFace snapshot API. This is the correct method for safetensors / non-GGUF models. Large repos may take a while.
</div>
<div id="search-results" class="search-results">
<p class="text-muted">Enter a search query to find models</p>
<div class="form-actions">
<button class="btn btn-primary" onclick="startDownload()">Download</button>
<button class="btn btn-ghost" onclick="closeModal('dl-modal')">Close</button>
</div>
</div>
<div id="dl-progress" style="display:none;padding:.25rem 0">
<div id="dl-filename" style="font-size:13px;font-weight:500;margin-bottom:.5rem;overflow:hidden;text-overflow:ellipsis;white-space:nowrap">Preparing…</div>
<div class="progress" style="margin-top:0;margin-bottom:.35rem;height:5px;background:var(--border-2)"><div id="dl-bar" class="progress-fill" style="width:0%;height:100%"></div></div>
<div style="display:flex;justify-content:space-between;font-size:11px;color:var(--text-2);margin-bottom:.75rem">
<span id="dl-bytes"></span>
<span id="dl-speed"></span>
<span id="dl-eta"></span>
<span id="dl-pct">0%</span>
</div>
<div id="dl-log" style="display:none;background:var(--raised);border-radius:6px;padding:.4rem .6rem;font-size:11px;font-family:monospace;color:var(--text-2);max-height:72px;overflow-y:auto"></div>
</div>
</div>
</div>
</div>
<div id="tab-config" class="tab-content">
<div class="card">
<h3>Model Loading Configuration</h3>
<form id="config-form" class="form">
<div class="form-group">
<label for="load-mode">Load Mode</label>
<select id="load-mode" name="load_mode" class="form-control">
<option value="ondemand">On Demand (default)</option>
<option value="loadall">Load All</option>
<option value="loadswap">Load & Swap</option>
</select>
<small class="form-text">
On Demand: Load one model at a time<br>
Load All: Try to load all models in VRAM<br>
Load & Swap: Keep models in RAM, swap to VRAM as needed
</small>
</div>
<div class="form-group">
<label>Models to Load at Startup</label>
<div id="loaded-models-list">
<p class="text-muted">No models selected</p>
</div>
</div>
<div class="form-group">
<label>Models to Pre-load (RAM)</label>
<div id="preload-models-list">
<p class="text-muted">No models selected</p>
</div>
</div>
<button type="submit" class="btn btn-primary">Save Configuration</button>
</form>
<!-- Upload modal -->
<div id="upload-modal" class="modal">
<div class="modal-box">
<div class="modal-head">
<span class="modal-title">Upload GGUF model</span>
<button class="modal-close" onclick="closeModal('upload-modal')">×</button>
</div>
<div class="modal-body">
<div id="upload-form">
<div class="form-row">
<label class="form-label">Select GGUF file</label>
<input type="file" id="upload-file" class="form-input" accept=".gguf">
</div>
<div class="form-actions">
<button class="btn btn-primary" onclick="startUpload()">Upload</button>
<button class="btn btn-ghost" onclick="closeModal('upload-modal')">Close</button>
</div>
</div>
<div id="upload-progress" style="display:none;padding:.25rem 0">
<div id="upload-filename" style="font-size:13px;font-weight:500;margin-bottom:.5rem">Uploading…</div>
<div class="progress" style="margin-top:0;margin-bottom:.35rem;height:5px;background:var(--border-2)"><div id="upload-bar" class="progress-fill" style="width:0%;height:100%"></div></div>
<div style="display:flex;justify-content:space-between;font-size:11px;color:var(--text-2)">
<span id="upload-pct">0%</span>
<span id="upload-status"></span>
</div>
</div>
</div>
</div>
</div>
<!-- Download Modal -->
<div id="download-modal" class="modal">
<div class="modal-content">
<div class="modal-header">
<h2>Download Model</h2>
<button class="modal-close" onclick="hideDownloadModal()">&times;</button>
</div>
<div class="modal-body">
<form id="download-form">
<div class="form-group">
<label for="model-id">Model ID or URL</label>
<input type="text" id="model-id" class="form-control"
placeholder="e.g., TheBloke/Llama-2-7B-GGUF" required>
<small class="form-text">HuggingFace model ID or direct URL</small>
</div>
<div class="form-group">
<label for="file-pattern">File Pattern (optional)</label>
<input type="text" id="file-pattern" class="form-control"
placeholder=".gguf">
<small class="form-text">Filter files to download (e.g., .gguf, .safetensors)</small>
</div>
<div class="form-actions">
<button type="submit" class="btn btn-primary">Download</button>
<button type="button" class="btn btn-secondary" onclick="hideDownloadModal()">Cancel</button>
</div>
</form>
<div id="download-progress" class="download-progress" style="display: none;">
<p>Downloading...</p>
<div class="progress-bar">
<div class="progress-fill" id="download-progress-bar"></div>
</div>
</div>
<!-- Model configuration modal -->
<div id="cfg-modal" class="modal">
<div class="modal-box" style="max-width:600px;max-height:92vh;overflow-y:auto">
<div class="modal-head" style="position:sticky;top:0;background:var(--card);z-index:1;border-bottom:1px solid var(--border)">
<span class="modal-title" id="cfg-modal-title">Configure model</span>
<button class="modal-close" onclick="closeModal('cfg-modal')">×</button>
</div>
<div class="modal-body">
<input type="hidden" id="cfg-path">
<input type="hidden" id="cfg-orig-type">
<!-- identity -->
<div class="form-row">
<label class="form-label">Model ID / path</label>
<div id="cfg-id-label" style="font-size:12px;font-family:monospace;color:var(--text-2);word-break:break-all;padding:.3rem 0"></div>
</div>
<div style="display:grid;grid-template-columns:1fr 1fr;gap:.75rem">
<div class="form-row" style="margin:0">
<label class="form-label">Type</label>
<select id="cfg-type" class="form-input">
<option value="text_models">Text (LLM)</option>
<option value="image_models">Image generation</option>
<option value="audio_models">Audio</option>
<option value="tts_models">TTS</option>
<option value="vision_models">Vision</option>
</select>
</div>
<div class="form-row" style="margin:0">
<label class="form-label">Alias <span class="muted">(optional)</span></label>
<input type="text" id="cfg-alias" class="form-input" placeholder="Friendly name">
</div>
</div>
<!-- backend -->
<div class="card-title" style="margin-top:1.25rem">Backend</div>
<div style="display:grid;grid-template-columns:1fr 1fr;gap:.75rem">
<div class="form-row" style="margin:0">
<label class="form-label">Compute backend</label>
<select id="cfg-backend" class="form-input">
<option value="auto">Auto-detect</option>
<option value="nvidia">NVIDIA (CUDA)</option>
<option value="vulkan">Vulkan</option>
<option value="opencl">OpenCL</option>
<option value="cpu">CPU only</option>
</select>
</div>
<div class="form-row" style="margin:0">
<label class="form-label">Load mode</label>
<select id="cfg-load-mode" class="form-input">
<option value="load">Load (pre-load in VRAM)</option>
<option value="on-request">On-request (load when needed)</option>
</select>
</div>
</div>
<div style="display:grid;grid-template-columns:1fr 1fr;gap:.75rem;margin-top:.75rem">
<div class="form-row" style="margin:0">
<label class="form-label">Used VRAM <span class="muted">(GB)</span></label>
<input type="number" id="cfg-used-vram" class="form-input" min="0" step="0.1" placeholder="auto-estimate">
<span class="form-hint" id="cfg-used-vram-hint" style="font-size:11px;color:var(--text-3)"></span>
</div>
</div>
<!-- inference -->
<div class="card-title" style="margin-top:1.25rem">Inference</div>
<div style="display:grid;grid-template-columns:1fr 1fr;gap:.75rem">
<div class="form-row" style="margin:0">
<label class="form-label">GPU layers <span class="muted">(-1 = all)</span></label>
<input type="number" id="cfg-gpu-layers" class="form-input" min="-1" value="-1">
</div>
<div class="form-row" style="margin:0">
<label class="form-label">Context size</label>
<input type="number" id="cfg-n-ctx" class="form-input" min="128" step="128" value="2048">
</div>
<div class="form-row" style="margin:0">
<label class="form-label">Max GPU % <span class="muted">(optional)</span></label>
<input type="number" id="cfg-max-gpu" class="form-input" min="1" max="100" placeholder="e.g. 90">
</div>
<div class="form-row" style="margin:0">
<label class="form-label">Manual RAM GB <span class="muted">(optional)</span></label>
<input type="number" id="cfg-ram-gb" class="form-input" min="0" step="0.5" placeholder="auto">
</div>
</div>
<div style="display:flex;gap:1.5rem;flex-wrap:wrap;margin-top:.75rem">
<label style="display:flex;align-items:center;gap:.5rem;cursor:pointer;font-size:13px"><input type="checkbox" id="cfg-4bit"> 4-bit quantization</label>
<label style="display:flex;align-items:center;gap:.5rem;cursor:pointer;font-size:13px"><input type="checkbox" id="cfg-8bit"> 8-bit quantization</label>
<label style="display:flex;align-items:center;gap:.5rem;cursor:pointer;font-size:13px"><input type="checkbox" id="cfg-flash"> Flash Attention 2</label>
<label style="display:flex;align-items:center;gap:.5rem;cursor:pointer;font-size:13px"><input type="checkbox" id="cfg-noram"> No RAM fallback</label>
</div>
<!-- offload -->
<div class="card-title" style="margin-top:1.25rem">Offload</div>
<div style="display:grid;grid-template-columns:1fr 1fr;gap:.75rem">
<div class="form-row" style="margin:0">
<label class="form-label">Strategy</label>
<select id="cfg-offload-strategy" class="form-input">
<option value="auto">Auto</option>
<option value="cpu">CPU RAM</option>
<option value="disk">Disk</option>
<option value="none">None</option>
</select>
</div>
<div class="form-row" style="margin:0">
<label class="form-label">Offload directory</label>
<input type="text" id="cfg-offload-dir" class="form-input" placeholder="./offload">
</div>
</div>
<!-- generation -->
<div class="card-title" style="margin-top:1.25rem">Generation</div>
<div class="form-row">
<label class="form-label">System prompt <span class="muted">(optional)</span></label>
<textarea id="cfg-sysprompt" class="form-input" rows="3"
placeholder="You are a helpful assistant…"
style="resize:vertical;font-family:var(--font-mono,monospace);font-size:12px;line-height:1.5"></textarea>
</div>
<div style="display:grid;grid-template-columns:1fr 1fr;gap:.75rem">
<div class="form-row" style="margin:0">
<label class="form-label">Chat template parser</label>
<select id="cfg-parser" class="form-input">
<option value="auto">Auto-detect</option>
<option value="llama">Llama</option>
<option value="mistral">Mistral</option>
<option value="chatml">ChatML</option>
<option value="phi">Phi</option>
<option value="gemma">Gemma</option>
<option value="qwen">Qwen</option>
</select>
</div>
</div>
<div style="display:flex;gap:1.5rem;flex-wrap:wrap;margin-top:.75rem">
<label style="display:flex;align-items:center;gap:.5rem;cursor:pointer;font-size:13px"><input type="checkbox" id="cfg-tools"> Tools closer prompt</label>
<label style="display:flex;align-items:center;gap:.5rem;cursor:pointer;font-size:13px"><input type="checkbox" id="cfg-grammar"> Grammar-guided generation</label>
</div>
<div class="form-actions" style="margin-top:1.5rem">
<button class="btn btn-primary" onclick="saveModelConfig()">Save</button>
<button class="btn btn-ghost" onclick="closeModal('cfg-modal')">Cancel</button>
</div>
</div>
</div>
</div>
<!-- Model info drawer -->
<div id="info-overlay" onclick="closeInfo()"></div>
<div id="info-drawer">
<div id="info-sticky">
<div id="info-title"></div>
<button class="btn btn-ghost btn-sm" onclick="closeInfo()"></button>
</div>
<div id="info-body" style="padding:1.25rem 1.5rem"></div>
</div>
{% endblock %}
{% block scripts %}
<script>
function showTab(tabName) {
document.querySelectorAll('.tab-content').forEach(tab => {
tab.classList.remove('active');
});
document.querySelectorAll('.tab-btn').forEach(btn => {
btn.classList.remove('active');
/* ── helpers ─────────────────────────────────────────── */
function esc(s){return String(s).replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;')}
function fmtNum(n){if(!n)return'0';return n>=1e6?(n/1e6).toFixed(1)+'M':n>=1000?(n/1000).toFixed(1)+'k':String(n)}
function fmtGB(gb){if(!gb)return'—';return gb>=1?gb.toFixed(1)+' GB':(gb*1024).toFixed(0)+' MB'}
function fmtDate(s){try{return new Date(s).toLocaleDateString(undefined,{year:'numeric',month:'short',day:'numeric'})}catch{return s}}
/* ── tab / modal ─────────────────────────────────────── */
function switchTab(name,btn){
document.querySelectorAll('.tab-panel').forEach(p=>p.classList.remove('active'));
document.querySelectorAll('.tab').forEach(b=>b.classList.remove('active'));
document.getElementById('tab-'+name).classList.add('active');
btn.classList.add('active');
}
function openModal(id){document.getElementById(id).classList.add('show')}
function closeModal(id){document.getElementById(id).classList.remove('show')}
/* ── GGUF format toggle ──────────────────────────────── */
let _ggufMode = 'gguf';
document.querySelectorAll('.tog-btn').forEach(btn=>{
btn.addEventListener('click',()=>{
document.querySelectorAll('.tog-btn').forEach(b=>b.classList.remove('on'));
btn.classList.add('on');
_ggufMode = btn.dataset.val;
});
});
/* ── chip toggles ────────────────────────────────────── */
document.querySelectorAll('.chip').forEach(c=>{
c.addEventListener('click',()=>c.classList.toggle('on'));
});
function getChips(id){return[...document.querySelectorAll('#'+id+' .chip.on')].map(c=>c.dataset.val)}
/* ── search ──────────────────────────────────────────── */
let _results = [];
let _filesCache = {};
let _activeQuants = new Set();
document.getElementById('search-q').addEventListener('keydown',e=>{if(e.key==='Enter')doSearch()});
async function doSearch(){
const q = document.getElementById('search-q').value.trim();
const out = document.getElementById('search-results');
const pipeline = document.getElementById('filter-pipeline').value;
const arch = document.getElementById('filter-arch').value;
const sort = document.getElementById('filter-sort').value;
const sizes = getChips('size-chips').join(',');
_activeQuants = new Set(getChips('quant-chips').map(v=>v.toUpperCase().split(' ')[0])); // strip ★
_filesCache = {};
_results = [];
out.innerHTML = '<span class="muted small">Searching HuggingFace…</span>';
const params = new URLSearchParams({q:q||' ', gguf_mode:_ggufMode, sort});
if(pipeline) params.append('pipeline_tag', pipeline);
if(sizes) params.append('sizes', sizes);
if(arch) params.append('arch', arch);
try{
const r = await fetch('/admin/api/hf-search?'+params);
if(!r.ok){const e=await r.json();throw new Error(e.detail||r.statusText)}
_results = await r.json();
if(!_results.length){out.innerHTML='<span class="muted small">No results. Try different keywords or fewer filters.</span>';return}
out.innerHTML = _results.map((m,i)=>`
<div style="padding:.75rem 0;border-bottom:1px solid var(--border)">
<div style="display:flex;align-items:flex-start;justify-content:space-between;gap:.5rem">
<div style="min-width:0;flex:1">
<div style="font-weight:500;font-size:13px;overflow:hidden;text-overflow:ellipsis;white-space:nowrap"
title="${esc(m.id)}">${esc(m.id)}</div>
<div style="font-size:11px;color:var(--text-3);margin-top:.25rem;display:flex;align-items:center;gap:.5rem;flex-wrap:wrap">
${m.pipeline_tag?`<span class="badge badge-user">${esc(m.pipeline_tag)}</span>`:''}
<span>↓ ${fmtNum(m.downloads)}</span>
<span>♥ ${fmtNum(m.likes)}</span>
</div>
</div>
<div style="display:flex;gap:.3rem;flex-shrink:0">
<button class="btn btn-ghost btn-sm" onclick="openInfo(${i})">Info</button>
<button class="btn btn-ghost btn-sm" onclick="toggleFiles(${i})">▾ Files</button>
<button class="btn btn-secondary btn-sm" onclick="dlFromResult(${i})">Download</button>
</div>
</div>
<div id="fp-${i}" style="display:none;margin-top:.625rem;padding:.5rem .625rem;background:var(--raised);border-radius:6px">
<span class="muted small">Loading…</span>
</div>
</div>`).join('');
}catch(e){
out.innerHTML='<span class="muted small">Error: '+esc(e.message)+'</span>';
}
}
function dlFromResult(i){
const m = _results[i];
if(looksLikeGguf(m.id, '')){
const selQuants = [..._activeQuants];
const pattern = selQuants.length===1 ? selQuants[0]+'.gguf' : '.gguf';
openDownloadFor(m.id, pattern);
} else {
openDownloadFor(m.id, ''); // triggers snapshot download
}
}
/* ── files panel ─────────────────────────────────────── */
async function toggleFiles(i){
const panel = document.getElementById('fp-'+i);
if(panel.style.display!=='none'){panel.style.display='none';return}
panel.style.display='block';
const modelId = _results[i].id;
if(_filesCache[modelId]){renderFiles(panel,modelId,_filesCache[modelId]);return}
panel.innerHTML='<span class="muted small">Fetching file list…</span>';
try{
const r = await fetch('/admin/api/hf-model-files?model_id='+encodeURIComponent(modelId));
if(!r.ok)throw new Error((await r.json()).detail||r.statusText);
const files = await r.json();
_filesCache[modelId]=files;
renderFiles(panel,modelId,files);
}catch(e){panel.innerHTML='<span class="muted small">Error: '+esc(e.message)+'</span>'}
}
function renderFiles(panel,modelId,files){
const shown = _activeQuants.size>0
? files.filter(f=>{
const q=(f.quant||'').toUpperCase();
return _activeQuants.has(q)||[..._activeQuants].some(aq=>f.name.toUpperCase().includes(aq));
})
: files;
if(!shown.length){
panel.innerHTML=`<span class="muted small">${_activeQuants.size?'No matching quant files — deselect quant chips to see all.':'No GGUF files found.'}</span>`;
return;
}
panel.innerHTML=
'<table style="width:100%;border-collapse:collapse;font-size:12px">'+
'<thead><tr style="color:var(--text-3);font-size:10px;text-transform:uppercase;letter-spacing:.04em">'+
'<th style="text-align:left;padding:.2rem .25rem">File</th>'+
'<th style="text-align:center;padding:.2rem .25rem">Quant</th>'+
'<th style="text-align:right;padding:.2rem .25rem">Size</th>'+
'<th style="text-align:right;padding:.2rem .25rem">≈ VRAM</th>'+
'<th></th></tr></thead><tbody>'+
shown.map(f=>
'<tr style="border-top:1px solid var(--border)">'+
`<td style="padding:.3rem .25rem;font-family:monospace;max-width:280px;overflow:hidden;text-overflow:ellipsis;white-space:nowrap" title="${esc(f.name)}">${esc(f.name)}</td>`+
`<td style="text-align:center;padding:.3rem .25rem">${f.quant?'<span class="badge badge-admin">'+esc(f.quant)+'</span>':''}</td>`+
`<td style="text-align:right;padding:.3rem .25rem;white-space:nowrap;color:var(--text-2)">${fmtGB(f.size_gb)}</td>`+
`<td style="text-align:right;padding:.3rem .25rem;white-space:nowrap;color:var(--text-3)">${f.vram_gb?'~'+fmtGB(f.vram_gb):''}</td>`+
`<td style="padding:.3rem .25rem;text-align:right"><button class="btn btn-secondary btn-sm" onclick="openDownloadFor('${esc(modelId)}','${esc(f.name)}')">↓</button></td>`+
'</tr>'
).join('')+
'</tbody></table>';
}
/* ── info drawer ─────────────────────────────────────── */
function openInfo(i){
const m = _results[i];
document.getElementById('info-title').textContent = m.id;
document.getElementById('info-body').innerHTML = '<span class="muted small">Loading…</span>';
document.getElementById('info-overlay').style.display='block';
document.getElementById('info-drawer').classList.add('open');
document.body.style.overflow='hidden';
loadInfo(m.id);
}
function closeInfo(){
document.getElementById('info-overlay').style.display='none';
document.getElementById('info-drawer').classList.remove('open');
document.body.style.overflow='';
}
async function loadInfo(modelId){
try{
const r = await fetch('/admin/api/hf-model-info?model_id='+encodeURIComponent(modelId));
if(!r.ok)throw new Error((await r.json()).detail||r.statusText);
renderInfo(await r.json());
}catch(e){
document.getElementById('info-body').innerHTML='<span class="muted small">Error: '+esc(e.message)+'</span>';
}
}
function renderInfo(d){
const escId = esc(d.id);
let html=`
<div style="display:flex;gap:.4rem;flex-wrap:wrap;margin-bottom:1.25rem">
<a href="https://huggingface.co/${escId}" target="_blank" class="btn btn-ghost btn-sm">HuggingFace ↗</a>
<button class="btn btn-secondary btn-sm" onclick="openDownloadFor('${escId}','${d.gguf_files&&d.gguf_files.length?'.gguf':''}')">Download repo</button>
${d.gated?'<span class="badge badge-admin" style="align-self:center">Gated</span>':''}
${d.private?'<span class="badge badge-user" style="align-self:center">Private</span>':''}
</div>
<div style="display:grid;grid-template-columns:1fr 1fr;gap:.625rem 1.25rem;font-size:13px;margin-bottom:1.25rem">
${d.pipeline_tag?`<div><div class="fl" style="margin-bottom:.2rem">Type</div>${esc(d.pipeline_tag)}</div>`:''}
${d.params_label?`<div><div class="fl" style="margin-bottom:.2rem">Parameters</div>${esc(d.params_label)}</div>`:''}
<div><div class="fl" style="margin-bottom:.2rem">Downloads</div>↓ ${fmtNum(d.downloads)}</div>
<div><div class="fl" style="margin-bottom:.2rem">Likes</div>♥ ${fmtNum(d.likes)}</div>
${d.license?`<div><div class="fl" style="margin-bottom:.2rem">License</div>${esc(d.license)}</div>`:''}
${d.last_modified?`<div><div class="fl" style="margin-bottom:.2rem">Updated</div>${fmtDate(d.last_modified)}</div>`:''}
${d.author?`<div><div class="fl" style="margin-bottom:.2rem">Author</div>${esc(d.author)}</div>`:''}
${d.file_count?`<div><div class="fl" style="margin-bottom:.2rem">Files in repo</div>${d.file_count}</div>`:''}
${d.base_model?`<div style="grid-column:span 2"><div class="fl" style="margin-bottom:.2rem">Base model</div><code style="font-size:11px">${esc(d.base_model)}</code></div>`:''}
${d.language&&d.language.length?`<div style="grid-column:span 2"><div class="fl" style="margin-bottom:.2rem">Languages</div>${d.language.slice(0,8).map(esc).join(', ')}</div>`:''}
</div>`;
const relevantTags = (d.tags||[]).filter(t=>!['transformers','safetensors','gguf','pytorch','jax'].includes(t));
if(relevantTags.length){
html+=`<div style="margin-bottom:1.25rem">
<div class="fl" style="margin-bottom:.4rem">Tags</div>
${relevantTags.slice(0,30).map(t=>`<span class="badge badge-user" style="margin:.1rem">${esc(t)}</span>`).join('')}
</div>`;
}
if(d.gguf_files&&d.gguf_files.length){
html+=`<div class="card-title">GGUF files (${d.gguf_files.length})</div>
<table style="width:100%;border-collapse:collapse;font-size:12px">
<thead><tr style="color:var(--text-3);font-size:10px;text-transform:uppercase;letter-spacing:.04em">
<th style="text-align:left;padding:.25rem .25rem">File</th>
<th style="text-align:center;padding:.25rem .25rem">Quant</th>
<th style="text-align:right;padding:.25rem .25rem">Size</th>
<th style="text-align:right;padding:.25rem .25rem">≈ VRAM</th>
<th></th>
</tr></thead><tbody>
${d.gguf_files.map(f=>`
<tr style="border-top:1px solid var(--border)">
<td style="padding:.35rem .25rem;font-family:monospace;font-size:11px;max-width:220px;overflow:hidden;text-overflow:ellipsis;white-space:nowrap" title="${esc(f.name)}">${esc(f.name)}</td>
<td style="text-align:center;padding:.35rem .25rem">${f.quant?`<span class="badge badge-admin">${esc(f.quant)}</span>`:''}</td>
<td style="text-align:right;padding:.35rem .25rem;white-space:nowrap;color:var(--text-2)">${fmtGB(f.size_gb)}</td>
<td style="text-align:right;padding:.35rem .25rem;white-space:nowrap;color:var(--text-3)">${f.vram_gb?'~'+fmtGB(f.vram_gb):''}</td>
<td style="padding:.35rem .25rem;text-align:right"><button class="btn btn-secondary btn-sm" onclick="openDownloadFor('${escId}','${esc(f.name)}')">↓</button></td>
</tr>`).join('')}
</tbody>
</table>`;
} else {
html+=`<div class="muted small">No GGUF files in this repo.</div>`;
}
document.getElementById('info-title').textContent = d.id;
document.getElementById('info-body').innerHTML = html;
}
/* ── download modal ──────────────────────────────────── */
function looksLikeGguf(modelId, filePattern){
if(filePattern && filePattern.toLowerCase().includes('.gguf')) return true;
if(_ggufMode==='gguf') return true;
if(modelId.toLowerCase().includes('gguf')) return true;
return false;
}
function openDownloadFor(modelId, filePattern){
document.getElementById('dl-id').value = modelId;
const isGguf = looksLikeGguf(modelId, filePattern);
if(isGguf){
document.getElementById('dl-pattern-row').style.display = 'block';
document.getElementById('dl-snapshot-note').style.display = 'none';
document.getElementById('dl-pattern').value = filePattern || '.gguf';
} else {
document.getElementById('dl-pattern-row').style.display = 'none';
document.getElementById('dl-snapshot-note').style.display = 'flex';
document.getElementById('dl-pattern').value = '';
}
openModal('dl-modal');
}
/* progress helpers */
function fmtBytes(n){if(!n&&n!==0)return'—';if(n>=1e9)return(n/1e9).toFixed(2)+' GB';if(n>=1e6)return(n/1e6).toFixed(1)+' MB';if(n>=1e3)return(n/1e3).toFixed(0)+' KB';return n+' B'}
function fmtRate(bps){return bps?(fmtBytes(bps)+'/s'):''}
function fmtEta(s){if(s===null||s===undefined)return'';s=Math.round(s);if(s<60)return s+'s left';const m=Math.floor(s/60),ss=s%60;if(m<60)return m+':'+String(ss).padStart(2,'0')+' left';return Math.floor(m/60)+'h '+(m%60)+'m left'}
let _dlEs = null;
let _dlDone = false;
function _dlReset(){
const bar = document.getElementById('dl-bar');
bar.style.transition = 'none';
bar.style.width = '0%';
document.getElementById('dl-filename').textContent='Preparing…';
document.getElementById('dl-bytes').textContent='—';
document.getElementById('dl-speed').textContent='—';
document.getElementById('dl-eta').textContent='—';
document.getElementById('dl-pct').textContent='0%';
document.getElementById('dl-log').style.display='none';
document.getElementById('dl-log').innerHTML='';
requestAnimationFrame(()=>{ bar.style.transition=''; });
}
function _dlLog(msg){
const el=document.getElementById('dl-log');
el.style.display='block';
const d=document.createElement('div');
d.textContent=msg;
el.appendChild(d);
el.scrollTop=el.scrollHeight;
}
function showDownloadError(msg){
if(_dlEs){_dlEs.close();_dlEs=null}
document.getElementById('dl-form').style.display='block';
document.getElementById('dl-progress').style.display='none';
alert('Download error: '+msg);
}
function handleProgressEvent(evt){
if(evt.type==='start'){
document.getElementById('dl-filename').textContent=evt.filename||'Downloading…';
document.getElementById('dl-bytes').textContent='0 / '+fmtBytes(evt.total);
document.getElementById('dl-bar').style.width='0%';
}else if(evt.type==='progress'){
const pct=evt.percent||0;
document.getElementById('dl-bar').style.width=pct+'%';
document.getElementById('dl-pct').textContent=pct.toFixed(1)+'%';
document.getElementById('dl-bytes').textContent=fmtBytes(evt.downloaded)+' / '+fmtBytes(evt.total);
document.getElementById('dl-speed').textContent=fmtRate(evt.rate);
document.getElementById('dl-eta').textContent=fmtEta(evt.eta);
if(evt.filename) document.getElementById('dl-filename').textContent=evt.filename;
}else if(evt.type==='info'){
_dlLog(evt.message);
}else if(evt.type==='done'){
_dlDone=true;
if(_dlEs){_dlEs.close();_dlEs=null}
document.getElementById('dl-bar').style.width='100%';
document.getElementById('dl-pct').textContent='100%';
document.getElementById('dl-filename').textContent='Download complete!';
refreshLocal();
setTimeout(()=>{
closeModal('dl-modal');
document.getElementById('dl-form').style.display='block';
document.getElementById('dl-progress').style.display='none';
_dlReset();
},1800);
}else if(evt.type==='error'){
_dlDone=true;
showDownloadError(evt.message);
}
// keepalive: ignore
}
async function startDownload(){
const id=document.getElementById('dl-id').value.trim();
if(!id){document.getElementById('dl-id').focus();return}
_dlDone=false;
_dlReset();
document.getElementById('dl-form').style.display='none';
document.getElementById('dl-progress').style.display='block';
try{
const r=await fetch('/admin/api/model-download',{
method:'POST',headers:{'Content-Type':'application/json'},
body:JSON.stringify({model_id:id,file_pattern:document.getElementById('dl-pattern').value||null})
});
document.getElementById('tab-' + tabName).classList.add('active');
event.target.classList.add('active');
if(!r.ok){const e=await r.json();showDownloadError(e.detail||'Request failed');return}
const {session_id}=await r.json();
_dlEs=new EventSource('/admin/api/download-stream/'+session_id);
_dlEs.onmessage=function(e){
try{handleProgressEvent(JSON.parse(e.data))}catch{}
};
_dlEs.onerror=function(){
if(_dlDone) return;
if(_dlEs&&_dlEs.readyState===EventSource.CLOSED) return;
showDownloadError('Connection to download stream lost');
};
}catch(e){showDownloadError(e.message)}
}
function showDownloadModal() {
document.getElementById('download-modal').style.display = 'flex';
/* ── active downloads strip ──────────────────────────── */
let _pollTimer = null;
async function pollDownloads(){
try{
const r = await fetch('/admin/api/downloads');
if(!r.ok) return;
const all = await r.json();
const active = all.filter(d=>d.status!=='done'&&d.status!=='error');
const strip = document.getElementById('dl-strip');
const list = document.getElementById('dl-strip-list');
if(!active.length){ strip.style.display='none'; return; }
strip.style.display='block';
list.innerHTML = active.map(d=>{
const pct = d.percent||0;
const name = d.filename||d.model_id||'';
const spd = d.rate?fmtRate(d.rate):'';
const eta = d.eta!=null?fmtEta(d.eta):'';
return `<div style="display:flex;align-items:center;gap:.75rem;padding:.2rem 0">
<div style="flex:1;min-width:0">
<div style="font-size:12px;font-weight:500;overflow:hidden;text-overflow:ellipsis;white-space:nowrap">${esc(d.model_id)}</div>
<div style="font-size:11px;color:var(--text-2);overflow:hidden;text-overflow:ellipsis;white-space:nowrap">${esc(name)}</div>
<div class="progress" style="margin-top:.3rem;margin-bottom:0;height:3px"><div class="progress-fill" style="width:${pct}%"></div></div>
</div>
<div style="font-size:11px;color:var(--text-2);text-align:right;white-space:nowrap;flex-shrink:0">
<div>${pct.toFixed(1)}%</div>
${spd?`<div>${esc(spd)}</div>`:''}
${eta?`<div class="muted">${esc(eta)}</div>`:''}
</div>
</div>`;
}).join('<div style="border-top:1px solid var(--border);margin:.3rem 0"></div>');
}catch{}
}
function hideDownloadModal() {
document.getElementById('download-modal').style.display = 'none';
function startPolling(){
if(_pollTimer) return;
_pollTimer = setInterval(pollDownloads, 2000);
pollDownloads();
}
async function searchModels() {
const query = document.getElementById('search-query').value;
const resultsDiv = document.getElementById('search-results');
if (!query) {
resultsDiv.innerHTML = '<p class="text-muted">Enter a search query</p>';
return;
startPolling();
/* ── cache stats & local models ──────────────────────── */
async function loadCacheStats(){
try{
const r = await fetch('/admin/api/cache-stats');
if(!r.ok) return;
const s = await r.json();
document.getElementById('stat-hf-size').textContent = fmtBytes(s.hf_bytes);
document.getElementById('stat-hf-count').textContent = s.hf_models+' model'+(s.hf_models!==1?'s':'');
document.getElementById('stat-hf-free').textContent = s.hf_disk_free_bytes!=null ? fmtBytes(s.hf_disk_free_bytes)+' free' : '—';
document.getElementById('stat-gguf-size').textContent = fmtBytes(s.gguf_bytes);
document.getElementById('stat-gguf-count').textContent = s.gguf_files+' file'+(s.gguf_files!==1?'s':'');
document.getElementById('stat-gguf-free').textContent = s.gguf_disk_free_bytes!=null ? fmtBytes(s.gguf_disk_free_bytes)+' free' : '—';
document.getElementById('stat-total-size').textContent = fmtBytes((s.hf_bytes||0)+(s.gguf_bytes||0));
}catch{}
}
let _localModels = [];
async function loadCachedModels(){
_localModels = [];
const hfEl = document.getElementById('hf-models-list');
const ggufEl = document.getElementById('gguf-models-list');
hfEl.innerHTML = ggufEl.innerHTML = '<span class="muted small">Loading…</span>';
try{
const r = await fetch('/admin/api/cached-models');
if(!r.ok) throw new Error((await r.json()).detail||r.statusText);
const d = await r.json();
// HF models
const hf = d.hf||[];
document.getElementById('hf-model-badge').textContent = hf.length ? `(${hf.length})` : '';
if(!hf.length){
hfEl.innerHTML = '<span class="muted small">No HuggingFace models cached.</span>';
}else{
const rows = hf.map(m=>{
const idx = _localModels.length;
_localModels.push({label:m.id, path:m.id, cacheType:'hf', size_gb:m.size_gb||0,
defaultType:m.model_type||'text_models', settings:m.settings||{}, in_config:m.in_config});
const loaded = _loadedKeys.has(m.id) || [..._loadedKeys].some(k=>k.endsWith(':'+m.id)||k===m.id);
return `<tr style="border-top:1px solid var(--border)">
<td style="padding:.4rem .25rem;font-family:monospace;font-size:12px;max-width:260px;overflow:hidden;text-overflow:ellipsis;white-space:nowrap" title="${esc(m.id)}">${esc(m.id)}</td>
<td style="text-align:right;padding:.4rem .25rem;white-space:nowrap;color:var(--text-2)">${fmtGB(m.size_gb)}</td>
<td style="text-align:right;padding:.4rem .25rem;color:var(--text-2)">${m.file_count}</td>
<td style="text-align:center;padding:.4rem .25rem">${m.in_config?'<span class="badge badge-ok">enabled</span>':'<span class="muted small">—</span>'}</td>
<td style="padding:.4rem .25rem;text-align:right;white-space:nowrap">
${m.in_config?(loaded
?`<button class="btn btn-ghost btn-sm" onclick="unloadModel(${idx})">Unload</button>`
:`<button class="btn btn-primary btn-sm" onclick="loadModel(${idx})">Load now</button>`):''}
<button class="btn btn-secondary btn-sm" onclick="openCfgModal(${idx})">${m.in_config?'Configure':'Add to CoderAI'}</button>
${m.in_config?`<button class="btn btn-ghost btn-sm" onclick="disableModel(${idx})">Remove</button>`:''}
<button class="btn btn-danger btn-sm" onclick="deleteModelConfirm(${idx})">Delete</button>
</td>
</tr>`;
});
hfEl.innerHTML = '<table style="width:100%;border-collapse:collapse;font-size:13px">'+
'<thead><tr style="color:var(--text-2);font-size:10px;text-transform:uppercase;letter-spacing:.05em">'+
'<th style="text-align:left;padding:.3rem .25rem;font-weight:700">Model</th>'+
'<th style="text-align:right;padding:.3rem .25rem;font-weight:700">Size</th>'+
'<th style="text-align:right;padding:.3rem .25rem;font-weight:700">Files</th>'+
'<th style="text-align:center;padding:.3rem .25rem;font-weight:700">Config</th>'+
'<th></th></tr></thead><tbody>'+rows.join('')+'</tbody></table>';
}
resultsDiv.innerHTML = '<p>Searching...</p>';
// TODO: Implement actual HuggingFace API search
setTimeout(() => {
resultsDiv.innerHTML = '<p class="text-muted">Search functionality coming soon</p>';
}, 1000);
}
document.getElementById('download-form').addEventListener('submit', async (e) => {
e.preventDefault();
const modelId = document.getElementById('model-id').value;
const filePattern = document.getElementById('file-pattern').value;
document.getElementById('download-progress').style.display = 'block';
// TODO: Implement actual download
setTimeout(() => {
alert('Download functionality coming soon');
hideDownloadModal();
document.getElementById('download-progress').style.display = 'none';
}, 1000);
});
// Load models on page load
async function loadModels() {
// TODO: Implement loading models from API
// GGUF files
const gguf = d.gguf||[];
document.getElementById('gguf-file-badge').textContent = gguf.length ? `(${gguf.length})` : '';
if(!gguf.length){
ggufEl.innerHTML = '<span class="muted small">No GGUF files cached.</span>';
}else{
const rows = gguf.map(f=>{
const idx = _localModels.length;
_localModels.push({label:f.filename, path:f.path, cacheType:'gguf', size_gb:f.size_gb||0,
defaultType:f.model_type||'text_models', settings:f.settings||{}, in_config:f.in_config});
const loaded = _loadedKeys.has(f.path) || _loadedKeys.has(f.filename) || [..._loadedKeys].some(k=>k.endsWith(':'+f.path)||k.endsWith(':'+f.filename));
return `<tr style="border-top:1px solid var(--border)">
<td style="padding:.4rem .25rem;font-family:monospace;font-size:11px;max-width:320px;overflow:hidden;text-overflow:ellipsis;white-space:nowrap" title="${esc(f.filename)}">${esc(f.filename)}</td>
<td style="text-align:right;padding:.4rem .25rem;white-space:nowrap;color:var(--text-2)">${fmtGB(f.size_gb)}</td>
<td style="text-align:center;padding:.4rem .25rem">${f.in_config?'<span class="badge badge-ok">enabled</span>':'<span class="muted small">—</span>'}</td>
<td style="padding:.4rem .25rem;text-align:right;white-space:nowrap">
${f.in_config?(loaded
?`<button class="btn btn-ghost btn-sm" onclick="unloadModel(${idx})">Unload</button>`
:`<button class="btn btn-primary btn-sm" onclick="loadModel(${idx})">Load now</button>`):''}
<button class="btn btn-secondary btn-sm" onclick="openCfgModal(${idx})">${f.in_config?'Configure':'Add to CoderAI'}</button>
${f.in_config?`<button class="btn btn-ghost btn-sm" onclick="disableModel(${idx})">Remove</button>`:''}
<button class="btn btn-danger btn-sm" onclick="deleteModelConfirm(${idx})">Delete</button>
</td>
</tr>`;
});
ggufEl.innerHTML = '<table style="width:100%;border-collapse:collapse;font-size:13px">'+
'<thead><tr style="color:var(--text-2);font-size:10px;text-transform:uppercase;letter-spacing:.05em">'+
'<th style="text-align:left;padding:.3rem .25rem;font-weight:700">File</th>'+
'<th style="text-align:right;padding:.3rem .25rem;font-weight:700">Size</th>'+
'<th style="text-align:center;padding:.3rem .25rem;font-weight:700">Config</th>'+
'<th></th></tr></thead><tbody>'+rows.join('')+'</tbody></table>';
}
}catch(e){
hfEl.innerHTML = ggufEl.innerHTML = `<span class="muted small">Error: ${esc(e.message)}</span>`;
}
}
let _loadedKeys = new Set();
async function refreshLoadedStatus(){
try{
const r = await fetch('/admin/api/model-loaded-status');
if(r.ok){ const d = await r.json(); _loadedKeys = new Set(d.loaded||[]); }
}catch{}
}
async function refreshLocal(){
await refreshLoadedStatus();
loadCacheStats();
loadCachedModels();
}
refreshLocal();
async function clearCacheConfirm(type){
const labels = {hf:'HuggingFace', gguf:'GGUF', all:'ALL'};
if(!confirm(`Delete ${labels[type]} model cache? This cannot be undone.`)) return;
try{
const r = await fetch('/admin/api/cache?cache_type='+type, {method:'DELETE'});
const d = await r.json();
if(d.success){
refreshLocal();
alert(`Cache cleared. Freed ${fmtBytes(d.freed_bytes||0)}.`);
}else alert('Error clearing cache');
}catch(e){alert('Error: '+e.message)}
}
async function deleteModelConfirm(idx){
const m = _localModels[idx];
if(!confirm(`Delete "${m.label}" from local cache? This cannot be undone.`)) return;
const idForUrl = m.cacheType === 'hf' ? m.path : m.label;
try{
const r = await fetch('/admin/api/cached-models/'+encodeURIComponent(idForUrl)+'?cache_type='+m.cacheType, {method:'DELETE'});
const d = await r.json();
if(d.success) refreshLocal();
else alert('Error: '+(d.detail||'Unknown'));
}catch(e){alert('Error: '+e.message)}
}
function openCfgModal(idx){
const m = _localModels[idx];
const s = m.settings || {};
document.getElementById('cfg-modal-title').textContent = m.in_config ? 'Configure model' : 'Add to CoderAI';
document.getElementById('cfg-id-label').textContent = m.label;
document.getElementById('cfg-path').value = m.path;
document.getElementById('cfg-orig-type').value = m.defaultType;
// Map legacy gguf_models to text_models
const rawType = s.model_type || m.defaultType;
document.getElementById('cfg-type').value = rawType === 'gguf_models' ? 'text_models' : rawType;
document.getElementById('cfg-alias').value = s.alias || '';
document.getElementById('cfg-backend').value = s.backend || 'auto';
document.getElementById('cfg-load-mode').value = s.load_mode || 'on-request';
// Used VRAM
const usedVram = s.used_vram_gb != null ? s.used_vram_gb : null;
document.getElementById('cfg-used-vram').value = usedVram != null ? usedVram : '';
// Show estimate hint from file size (GGUF: ~1.1x file size; HF: from size_gb)
const estVram = _estimateVram(m);
document.getElementById('cfg-used-vram-hint').textContent = estVram ? `Estimated: ~${estVram.toFixed(1)} GB` : '';
document.getElementById('cfg-gpu-layers').value = s.n_gpu_layers !== undefined ? s.n_gpu_layers : -1;
document.getElementById('cfg-n-ctx').value = s.n_ctx || 2048;
document.getElementById('cfg-max-gpu').value = s.max_gpu_percent != null ? s.max_gpu_percent : '';
document.getElementById('cfg-ram-gb').value = s.manual_ram_gb != null ? s.manual_ram_gb : '';
document.getElementById('cfg-4bit').checked = !!s.load_in_4bit;
document.getElementById('cfg-8bit').checked = !!s.load_in_8bit;
document.getElementById('cfg-flash').checked = !!s.flash_attention;
document.getElementById('cfg-noram').checked = !!s.no_ram;
document.getElementById('cfg-offload-strategy').value = s.offload_strategy || 'auto';
document.getElementById('cfg-offload-dir').value = s.offload_dir || './offload';
document.getElementById('cfg-sysprompt').value = s.system_prompt || '';
document.getElementById('cfg-parser').value = s.parser || 'auto';
document.getElementById('cfg-tools').checked = !!s.tools_closer_prompt;
document.getElementById('cfg-grammar').checked = !!s.grammar_guided;
openModal('cfg-modal');
}
function _estimateVram(m) {
// Estimate VRAM from file size: GGUF ~1.1x, HF safetensors ~1.2x
if (m.size_gb) return m.size_gb * (m.cacheType === 'gguf' ? 1.1 : 1.2);
return null;
}
async function saveModelConfig(){
const path = document.getElementById('cfg-path').value;
const maxGpu = parseFloat(document.getElementById('cfg-max-gpu').value);
const ramGb = parseFloat(document.getElementById('cfg-ram-gb').value);
const usedVram = parseFloat(document.getElementById('cfg-used-vram').value);
const data = {
path,
model_type: document.getElementById('cfg-type').value,
alias: document.getElementById('cfg-alias').value.trim() || null,
backend: document.getElementById('cfg-backend').value,
load_mode: document.getElementById('cfg-load-mode').value,
used_vram_gb: isNaN(usedVram) ? null : usedVram,
n_gpu_layers: parseInt(document.getElementById('cfg-gpu-layers').value) || -1,
n_ctx: parseInt(document.getElementById('cfg-n-ctx').value) || 2048,
max_gpu_percent: isNaN(maxGpu) ? null : maxGpu,
manual_ram_gb: isNaN(ramGb) ? null : ramGb,
load_in_4bit: document.getElementById('cfg-4bit').checked,
load_in_8bit: document.getElementById('cfg-8bit').checked,
flash_attention: document.getElementById('cfg-flash').checked,
no_ram: document.getElementById('cfg-noram').checked,
offload_strategy: document.getElementById('cfg-offload-strategy').value,
offload_dir: document.getElementById('cfg-offload-dir').value.trim() || './offload',
system_prompt: document.getElementById('cfg-sysprompt').value.trim() || null,
parser: document.getElementById('cfg-parser').value,
tools_closer_prompt: document.getElementById('cfg-tools').checked,
grammar_guided: document.getElementById('cfg-grammar').checked,
};
try{
const r = await fetch('/admin/api/model-configure',{
method:'POST', headers:{'Content-Type':'application/json'},
body: JSON.stringify(data)
});
const d = await r.json();
if(d.success){ closeModal('cfg-modal'); loadCachedModels(); }
else alert('Error: '+(d.detail||'Unknown'));
}catch(e){ alert('Error: '+e.message); }
}
loadModels();
async function loadModel(idx){
const m = _localModels[idx];
// Find the button and show loading state
const btn = document.querySelector(`button[onclick="loadModel(${idx})"]`);
if(btn){ btn.disabled = true; btn.textContent = 'Loading…'; }
try{
const r = await fetch('/admin/api/model-load',{
method:'POST', headers:{'Content-Type':'application/json'},
body: JSON.stringify({path: m.path})
});
const d = await r.json();
if(d.success) refreshLocal();
else{ if(btn){ btn.disabled=false; btn.textContent='Load now'; } alert('Error: '+(d.detail||'Unknown')); }
}catch(e){ if(btn){ btn.disabled=false; btn.textContent='Load now'; } alert('Error: '+e.message); }
}
async function unloadModel(idx){
const m = _localModels[idx];
try{
const r = await fetch('/admin/api/model-unload',{
method:'POST', headers:{'Content-Type':'application/json'},
body: JSON.stringify({path: m.path})
});
const d = await r.json();
if(d.success) refreshLocal();
else alert('Error: '+(d.detail||'Unknown'));
}catch(e){ alert('Error: '+e.message); }
}
async function disableModel(idx){
const m = _localModels[idx];
if(!confirm('Remove this model from CoderAI config? It will stay in the local cache.')) return;
try{
const r = await fetch('/admin/api/model-disable',{
method:'POST', headers:{'Content-Type':'application/json'},
body: JSON.stringify({path: m.path})
});
const d = await r.json();
if(d.success) loadCachedModels();
else alert('Error: '+(d.detail||'Unknown'));
}catch(e){ alert('Error: '+e.message); }
}
async function startUpload(){
const fileInput = document.getElementById('upload-file');
const file = fileInput.files[0];
if(!file){ alert('Please select a file'); return; }
if(!file.name.endsWith('.gguf')){ alert('Only .gguf files are supported'); return; }
document.getElementById('upload-form').style.display='none';
document.getElementById('upload-progress').style.display='block';
document.getElementById('upload-filename').textContent = file.name;
const chunkSize = 512 * 1024; // 512KB
const totalChunks = Math.ceil(file.size / chunkSize);
const uploadId = Date.now() + '_' + file.name;
for(let i=0; i<totalChunks; i++){
const start = i * chunkSize;
const end = Math.min(start + chunkSize, file.size);
const chunk = file.slice(start, end);
const formData = new FormData();
formData.append('chunk', chunk);
formData.append('filename', file.name);
formData.append('chunk_index', i);
formData.append('total_chunks', totalChunks);
formData.append('upload_id', uploadId);
try{
const r = await fetch('/admin/api/model-upload',{method:'POST', body:formData});
const d = await r.json();
if(!d.success){ alert('Upload failed: '+(d.detail||'Unknown')); return; }
const pct = Math.round((i+1)/totalChunks*100);
document.getElementById('upload-bar').style.width = pct+'%';
document.getElementById('upload-pct').textContent = pct+'%';
document.getElementById('upload-status').textContent = `Chunk ${i+1}/${totalChunks}`;
if(d.complete){
document.getElementById('upload-status').textContent = 'Complete!';
setTimeout(()=>{
closeModal('upload-modal');
document.getElementById('upload-form').style.display='block';
document.getElementById('upload-progress').style.display='none';
fileInput.value='';
refreshLocal();
}, 1500);
return;
}
}catch(e){ alert('Upload error: '+e.message); return; }
}
}
</script>
{% endblock %}
{% extends "base.html" %}
{% block title %}Settings — CoderAI{% endblock %}
{% block content %}
<div class="page-header">
<div>
<h1>Settings</h1>
<p>Server configuration — restart CoderAI to apply changes</p>
</div>
<div class="header-actions">
<span id="save-status" class="muted small" style="margin-right:.5rem"></span>
<button class="btn btn-primary" onclick="saveSettings()">Save changes</button>
</div>
</div>
<div id="settings-alert" style="display:none"></div>
<!-- Server -->
<div class="card mb-0">
<div class="card-title">Server</div>
<div style="display:grid;grid-template-columns:1fr 160px;gap:1rem;align-items:start">
<div class="form-row" style="margin:0">
<label class="form-label">Listen host</label>
<input type="text" id="s-host" class="form-input" placeholder="0.0.0.0">
<span class="form-hint">IP address or hostname to bind to (0.0.0.0 = all interfaces)</span>
</div>
<div class="form-row" style="margin:0">
<label class="form-label">Port</label>
<input type="number" id="s-port" class="form-input" placeholder="8000" min="1" max="65535">
</div>
</div>
<div class="form-row" style="margin-top:1rem;margin-bottom:.25rem">
<label style="display:flex;align-items:center;gap:.5rem;cursor:pointer">
<input type="checkbox" id="s-https" onchange="toggleHttps()">
<span style="font-size:13px;font-weight:500">Enable HTTPS</span>
</label>
</div>
<div id="https-fields" style="display:none;margin-top:.75rem">
<div class="form-row">
<label class="form-label">SSL key path <span class="muted">(leave blank to auto-generate)</span></label>
<input type="text" id="s-key" class="form-input" placeholder="/path/to/key.pem">
</div>
<div class="form-row" style="margin:0">
<label class="form-label">SSL certificate path</label>
<input type="text" id="s-cert" class="form-input" placeholder="/path/to/cert.pem">
</div>
</div>
</div>
<!-- Storage -->
<div class="card mb-0" style="margin-top:1rem">
<div class="card-title">Storage</div>
<div class="form-row">
<label class="form-label">HuggingFace cache directory <span class="muted">(leave blank for default ~/.cache/huggingface)</span></label>
<input type="text" id="s-hf-cache" class="form-input" placeholder="e.g. /data/models/huggingface">
</div>
<div class="form-row" style="margin:0">
<label class="form-label">GGUF cache directory <span class="muted">(leave blank for default ~/.cache/coderai/models)</span></label>
<input type="text" id="s-gguf-cache" class="form-input" placeholder="e.g. /data/models/gguf">
</div>
</div>
{% endblock %}
{% block scripts %}
<script>
function toggleHttps(){
document.getElementById('https-fields').style.display =
document.getElementById('s-https').checked ? 'block' : 'none';
}
function showAlert(type, msg){
const el = document.getElementById('settings-alert');
el.className = 'alert alert-' + (type === 'error' ? 'error' : 'info');
el.textContent = msg;
el.style.display = 'flex';
if(type !== 'error') setTimeout(()=>{ el.style.display='none'; }, 4000);
}
async function loadSettings(){
try{
const d = await fetch('/admin/api/settings').then(r=>r.json());
document.getElementById('s-host').value = d.server?.host ?? '0.0.0.0';
document.getElementById('s-port').value = d.server?.port ?? 8000;
document.getElementById('s-https').checked = !!d.server?.https;
document.getElementById('s-key').value = d.server?.https_key_path ?? '';
document.getElementById('s-cert').value = d.server?.https_cert_path ?? '';
document.getElementById('s-hf-cache').value = d.models?.hf_cache_dir ?? '';
document.getElementById('s-gguf-cache').value = d.models?.gguf_cache_dir ?? '';
toggleHttps();
}catch(e){ showAlert('error','Failed to load settings: '+e.message); }
}
async function saveSettings(){
const strOrNull = id => document.getElementById(id).value.trim() || null;
const data = {
server:{
host: document.getElementById('s-host').value.trim() || '0.0.0.0',
port: parseInt(document.getElementById('s-port').value) || 8000,
https: document.getElementById('s-https').checked,
https_key_path: strOrNull('s-key'),
https_cert_path: strOrNull('s-cert'),
},
models:{
hf_cache_dir: strOrNull('s-hf-cache'),
gguf_cache_dir: strOrNull('s-gguf-cache'),
}
};
try{
const r = await fetch('/admin/api/settings',{
method:'POST', headers:{'Content-Type':'application/json'},
body: JSON.stringify(data)
});
if(r.ok) showAlert('info','Settings saved. Restart CoderAI to apply.');
else{ const e=await r.json(); showAlert('error', e.detail||'Save failed'); }
}catch(e){ showAlert('error','Error: '+e.message); }
}
loadSettings();
</script>
{% endblock %}
{% extends "base.html" %}
{% block title %}API Tokens - CoderAI{% endblock %}
{% block title %}Tokens — CoderAI{% endblock %}
{% block content %}
<div class="page-header">
<div>
<h1>API Tokens</h1>
<div class="header-actions">
<button class="btn btn-primary" onclick="showCreateTokenModal()">Create Token</button>
</div>
<p>Access tokens for API clients</p>
</div>
<div class="header-actions">
<button class="btn btn-primary" onclick="openModal('create-modal')">New token</button>
</div>
</div>
<div class="card">
<div class="table-responsive">
<table class="table">
<thead>
<tr>
<th>Name</th>
<th>Token</th>
<th>Provider</th>
<th>Created</th>
<th>Last Used</th>
<th>Actions</th>
</tr>
</thead>
<tbody id="tokens-table">
<tr>
<td colspan="6" class="text-center text-muted">No tokens created</td>
</tr>
</tbody>
</table>
</div>
<div class="table-wrap">
<table>
<thead>
<tr><th>Name</th><th>Token</th><th>Format</th><th>Created</th><th>Last used</th><th></th></tr>
</thead>
<tbody id="tokens-body">
<tr class="empty-row"><td colspan="6">No tokens — create one to get started</td></tr>
</tbody>
</table>
</div>
<!-- Create Token Modal -->
<div id="create-token-modal" class="modal">
<div class="modal-content">
<div class="modal-header">
<h2>Create API Token</h2>
<button class="modal-close" onclick="hideCreateTokenModal()">&times;</button>
</div>
<div class="modal-body">
<form id="create-token-form">
<div class="form-group">
<label for="token-name">Token Name</label>
<input type="text" id="token-name" class="form-control"
placeholder="e.g., Production API" required>
<small class="form-text">A descriptive name for this token</small>
</div>
<div class="form-group">
<label for="token-provider">Provider Format</label>
<select id="token-provider" class="form-control">
<option value="openai">OpenAI (sk-...)</option>
<option value="anthropic">Anthropic (sk-ant-...)</option>
<option value="custom">Custom</option>
</select>
</div>
<div class="form-actions">
<button type="submit" class="btn btn-primary">Create Token</button>
<button type="button" class="btn btn-secondary" onclick="hideCreateTokenModal()">Cancel</button>
</div>
</form>
</div>
<div id="create-modal" class="modal">
<div class="modal-box">
<div class="modal-head">
<span class="modal-title">New API token</span>
<button class="modal-close" onclick="closeModal('create-modal')">×</button>
</div>
<div class="modal-body">
<div class="form-row">
<label class="form-label">Name</label>
<input type="text" id="t-name" class="form-input" placeholder="e.g. My App">
<span class="form-hint">A label to identify this token</span>
</div>
<div class="form-row">
<label class="form-label">Format</label>
<select id="t-provider" class="form-input">
<option value="openai">OpenAI (sk-coderai-…)</option>
<option value="anthropic">Anthropic</option>
<option value="custom">Custom</option>
</select>
</div>
<div class="form-actions">
<button class="btn btn-primary" onclick="createToken()">Generate</button>
<button class="btn btn-ghost" onclick="closeModal('create-modal')">Cancel</button>
</div>
</div>
</div>
</div>
<!-- Show Token Modal -->
<div id="show-token-modal" class="modal">
<div class="modal-content">
<div class="modal-header">
<h2>Token Created</h2>
<button class="modal-close" onclick="hideShowTokenModal()">&times;</button>
</div>
<div class="modal-body">
<div class="alert alert-warning">
<strong>Important:</strong> Copy this token now. You won't be able to see it again!
</div>
<div class="token-display">
<code id="new-token-value"></code>
<button class="btn btn-secondary btn-sm" onclick="copyToken()">Copy</button>
</div>
<div class="form-actions">
<button class="btn btn-primary" onclick="hideShowTokenModal()">Done</button>
</div>
</div>
<div id="show-modal" class="modal">
<div class="modal-box">
<div class="modal-head">
<span class="modal-title">Token created</span>
<button class="modal-close" onclick="closeModal('show-modal')">×</button>
</div>
<div class="modal-body">
<div class="alert alert-warning">Copy this now — it won't be shown again.</div>
<div class="token-box">
<code id="new-token"></code>
<button class="btn btn-ghost btn-sm" id="copy-btn" onclick="copyToken()">Copy</button>
</div>
<div class="form-actions">
<button class="btn btn-primary" onclick="closeModal('show-modal')">Done</button>
</div>
</div>
</div>
</div>
{% endblock %}
{% block scripts %}
<script>
function showCreateTokenModal() {
document.getElementById('create-token-modal').style.display = 'flex';
}
function hideCreateTokenModal() {
document.getElementById('create-token-modal').style.display = 'none';
document.getElementById('create-token-form').reset();
}
function showShowTokenModal(token) {
document.getElementById('new-token-value').textContent = token;
document.getElementById('show-token-modal').style.display = 'flex';
function openModal(id) { document.getElementById(id).classList.add('show'); }
function closeModal(id) {
document.getElementById(id).classList.remove('show');
if (id === 'create-modal') document.getElementById('t-name').value = '';
}
function hideShowTokenModal() {
document.getElementById('show-token-modal').style.display = 'none';
function copyToken() {
navigator.clipboard.writeText(document.getElementById('new-token').textContent).then(() => {
const btn = document.getElementById('copy-btn');
btn.textContent = 'Copied!'; setTimeout(() => btn.textContent = 'Copy', 2000);
});
}
function copyToken() {
const token = document.getElementById('new-token-value').textContent;
navigator.clipboard.writeText(token).then(() => {
alert('Token copied to clipboard');
});
function fmt(s) {
try { return new Date(s).toLocaleDateString(undefined, {month:'short',day:'numeric',year:'numeric'}); } catch { return s; }
}
function esc(s) { return String(s).replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;'); }
async function loadTokens() {
try {
const response = await fetch('/admin/api/tokens');
const tokens = await response.json();
const tbody = document.getElementById('tokens-table');
if (tokens.length === 0) {
tbody.innerHTML = '<tr><td colspan="6" class="text-center text-muted">No tokens created</td></tr>';
return;
}
tbody.innerHTML = tokens.map(token => `
<tr>
<td>${token.name}</td>
<td><code>${token.token.substring(0, 20)}...</code></td>
<td>${token.provider}</td>
<td>${new Date(token.created_at).toLocaleDateString()}</td>
<td>${token.last_used ? new Date(token.last_used).toLocaleDateString() : 'Never'}</td>
<td>
<button class="btn btn-danger btn-sm" onclick="deleteToken(${token.id})">Delete</button>
</td>
</tr>
`).join('');
} catch (error) {
console.error('Failed to load tokens:', error);
try {
const tokens = await fetch('/admin/api/tokens').then(r => r.json());
const tbody = document.getElementById('tokens-body');
if (!tokens.length) {
tbody.innerHTML = '<tr class="empty-row"><td colspan="6">No tokens — create one to get started</td></tr>';
return;
}
tbody.innerHTML = tokens.map(t => `
<tr>
<td class="td-name">${esc(t.name)}</td>
<td><code>${esc(t.token.substring(0,28))}…</code></td>
<td><span class="badge badge-user">${esc(t.provider)}</span></td>
<td class="mono small dim">${fmt(t.created_at)}</td>
<td class="dim small">${t.last_used ? fmt(t.last_used) : 'Never'}</td>
<td style="text-align:right"><button class="btn btn-danger btn-sm" onclick="delToken(${t.id})">Delete</button></td>
</tr>`).join('');
} catch {}
}
document.getElementById('create-token-form').addEventListener('submit', async (e) => {
e.preventDefault();
const name = document.getElementById('token-name').value;
const provider = document.getElementById('token-provider').value;
try {
const response = await fetch('/admin/api/tokens', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ name, provider })
});
if (response.ok) {
const data = await response.json();
hideCreateTokenModal();
showShowTokenModal(data.token);
loadTokens();
} else {
alert('Failed to create token');
}
} catch (error) {
alert('Error: ' + error.message);
async function createToken() {
const name = document.getElementById('t-name').value.trim();
if (!name) { document.getElementById('t-name').focus(); return; }
try {
const r = await fetch('/admin/api/tokens', {
method:'POST', headers:{'Content-Type':'application/json'},
body: JSON.stringify({name, provider: document.getElementById('t-provider').value})
});
if (r.ok) {
const d = await r.json();
closeModal('create-modal');
document.getElementById('new-token').textContent = d.token;
openModal('show-modal');
loadTokens();
} else {
const e = await r.json(); alert(e.detail || 'Failed');
}
});
} catch (e) { alert(e.message); }
}
async function deleteToken(tokenId) {
if (!confirm('Are you sure you want to delete this token? This cannot be undone.')) {
return;
}
try {
const response = await fetch(`/admin/api/tokens/${tokenId}`, {
method: 'DELETE'
});
if (response.ok) {
loadTokens();
} else {
alert('Failed to delete token');
}
} catch (error) {
alert('Error: ' + error.message);
}
async function delToken(id) {
if (!confirm('Delete this token? Clients using it will lose access immediately.')) return;
const r = await fetch('/admin/api/tokens/'+id, {method:'DELETE'});
if (r.ok) loadTokens(); else alert('Failed to delete');
}
loadTokens();
......
{% extends "base.html" %}
{% block title %}Users - CoderAI{% endblock %}
{% block title %}Users — CoderAI{% endblock %}
{% block content %}
<div class="page-header">
<div>
<h1>Users</h1>
<div class="header-actions">
<button class="btn btn-primary" onclick="showCreateUserModal()">Create User</button>
</div>
<p>{{ users|length }} account{{ 's' if users|length != 1 else '' }}</p>
</div>
<div class="header-actions">
<button class="btn btn-primary" onclick="openModal('add-modal')">Add user</button>
</div>
</div>
<div class="card">
<div class="table-responsive">
<table class="table">
<thead>
<tr>
<th>Username</th>
<th>Role</th>
<th>Created</th>
<th>Actions</th>
</tr>
</thead>
<tbody>
{% for user in users %}
<tr>
<td>{{ user.username }}</td>
<td>
<span class="badge {% if user.role == 'admin' %}badge-primary{% else %}badge-secondary{% endif %}">
{{ user.role }}
</span>
</td>
<td>{{ user.created_at[:10] }}</td>
<td>
{% if user.username != username %}
<button class="btn btn-danger btn-sm" onclick="deleteUser({{ user.id }}, '{{ user.username }}')">Delete</button>
{% else %}
<a href="/admin/change-password" class="btn btn-secondary btn-sm">Change Password</a>
{% endif %}
</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
<div class="table-wrap">
<table>
<thead>
<tr><th>User</th><th>Role</th><th>Created</th><th></th></tr>
</thead>
<tbody>
{% for user in users %}
<tr>
<td class="td-name">
{{ user.username }}
{% if user.username == username %}<span class="badge badge-user" style="margin-left:.375rem">you</span>{% endif %}
</td>
<td><span class="badge {% if user.role == 'admin' %}badge-admin{% else %}badge-user{% endif %}">{{ user.role }}</span></td>
<td class="mono small dim">{{ user.created_at[:10] }}</td>
<td style="text-align:right">
{% if user.username == username %}
<a href="/admin/change-password" class="btn btn-ghost btn-sm">Change password</a>
{% else %}
<button class="btn btn-danger btn-sm" onclick="delUser({{ user.id }}, '{{ user.username }}')">Delete</button>
{% endif %}
</td>
</tr>
{% else %}
<tr class="empty-row"><td colspan="4">No users found</td></tr>
{% endfor %}
</tbody>
</table>
</div>
<!-- Create User Modal -->
<div id="create-user-modal" class="modal">
<div class="modal-content">
<div class="modal-header">
<h2>Create User</h2>
<button class="modal-close" onclick="hideCreateUserModal()">&times;</button>
</div>
<div class="modal-body">
<form id="create-user-form">
<div class="form-group">
<label for="new-username">Username</label>
<input type="text" id="new-username" class="form-control" required>
</div>
<div class="form-group">
<label for="new-password">Password</label>
<input type="password" id="new-password" class="form-control" required minlength="8">
<small class="form-text">Minimum 8 characters</small>
</div>
<div class="form-group">
<label for="new-role">Role</label>
<select id="new-role" class="form-control">
<option value="user">User</option>
<option value="admin">Admin</option>
</select>
</div>
<div class="form-actions">
<button type="submit" class="btn btn-primary">Create User</button>
<button type="button" class="btn btn-secondary" onclick="hideCreateUserModal()">Cancel</button>
</div>
</form>
</div>
<div id="add-modal" class="modal">
<div class="modal-box">
<div class="modal-head">
<span class="modal-title">Add user</span>
<button class="modal-close" onclick="closeModal('add-modal')">×</button>
</div>
<div class="modal-body">
<div id="add-err" class="alert alert-error" style="display:none"></div>
<div class="form-row">
<label class="form-label">Username</label>
<input type="text" id="new-uname" class="form-input" placeholder="username" autocomplete="off">
</div>
<div class="form-row">
<label class="form-label">Password</label>
<input type="password" id="new-pwd" class="form-input" placeholder="••••••••" autocomplete="new-password">
<span class="form-hint">Minimum 8 characters</span>
</div>
<div class="form-row">
<label class="form-label">Role</label>
<select id="new-role" class="form-input">
<option value="user">User</option>
<option value="admin">Admin</option>
</select>
</div>
<div class="form-actions">
<button class="btn btn-primary" onclick="addUser()">Create</button>
<button class="btn btn-ghost" onclick="closeModal('add-modal')">Cancel</button>
</div>
</div>
</div>
</div>
{% endblock %}
{% block scripts %}
<script>
function showCreateUserModal() {
document.getElementById('create-user-modal').style.display = 'flex';
function openModal(id) { document.getElementById(id).classList.add('show'); }
function closeModal(id) {
document.getElementById(id).classList.remove('show');
document.getElementById('add-err').style.display = 'none';
document.getElementById('new-uname').value = '';
document.getElementById('new-pwd').value = '';
}
function hideCreateUserModal() {
document.getElementById('create-user-modal').style.display = 'none';
document.getElementById('create-user-form').reset();
async function addUser() {
const uname = document.getElementById('new-uname').value.trim();
const pwd = document.getElementById('new-pwd').value;
const errEl = document.getElementById('add-err');
errEl.style.display = 'none';
if (!uname) { errEl.textContent = 'Username required'; errEl.style.display = 'flex'; return; }
if (pwd.length < 8) { errEl.textContent = 'Password must be at least 8 characters'; errEl.style.display = 'flex'; return; }
try {
const r = await fetch('/admin/api/users', {
method: 'POST', headers: {'Content-Type':'application/json'},
body: JSON.stringify({username: uname, password: pwd, role: document.getElementById('new-role').value})
});
if (r.ok) { location.reload(); }
else { const e = await r.json(); errEl.textContent = e.detail || 'Failed'; errEl.style.display = 'flex'; }
} catch (e) { errEl.textContent = e.message; errEl.style.display = 'flex'; }
}
document.getElementById('create-user-form').addEventListener('submit', async (e) => {
e.preventDefault();
const username = document.getElementById('new-username').value;
const password = document.getElementById('new-password').value;
const role = document.getElementById('new-role').value;
try {
const response = await fetch('/admin/api/users', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ username, password, role })
});
if (response.ok) {
hideCreateUserModal();
location.reload();
} else {
const error = await response.json();
alert('Failed to create user: ' + (error.detail || 'Unknown error'));
}
} catch (error) {
alert('Error: ' + error.message);
}
});
async function deleteUser(userId, username) {
if (!confirm(`Are you sure you want to delete user "${username}"?`)) {
return;
}
try {
const response = await fetch(`/admin/api/users/${userId}`, {
method: 'DELETE'
});
if (response.ok) {
location.reload();
} else {
const error = await response.json();
alert('Failed to delete user: ' + (error.detail || 'Unknown error'));
}
} catch (error) {
alert('Error: ' + error.message);
}
async function delUser(id, name) {
if (!confirm('Delete user "' + name + '"?')) return;
const r = await fetch('/admin/api/users/'+id, {method:'DELETE'});
if (r.ok) location.reload();
else { const e = await r.json(); alert(e.detail || 'Failed'); }
}
</script>
{% endblock %}
......@@ -91,6 +91,16 @@ app.include_router(text_router)
app.include_router(admin_router)
@app.exception_handler(401)
async def unauthorized_redirect(request: Request, exc: HTTPException):
"""Redirect browser clients to login page on 401; return JSON for API clients."""
accept = request.headers.get("accept", "")
if "text/html" in accept:
from fastapi.responses import RedirectResponse
return RedirectResponse(url="/login", status_code=302)
return JSONResponse(status_code=401, content={"detail": exc.detail})
@app.get("/v1/models", response_model=ModelList)
async def list_models():
"""List available models."""
......
......@@ -476,41 +476,56 @@ async def _generate_with_sdcpp(sd_model, request, global_args, http_request=None
}
def _load_sdcpp_model(model_path: str, global_args):
def _load_sdcpp_model(model_path: str, global_args, model_config: dict = None):
"""
Try to load a model using stable-diffusion-cpp-python.
Returns the loaded StableDiffusion model or None.
"""
from stable_diffusion_cpp import StableDiffusion
# Check for --no-ram mode
no_ram = getattr(global_args, 'no_ram', False) if global_args else False
print(f"Loading sd.cpp model from: {model_path}")
# Build sd.cpp constructor args from config
kwargs = {
'model_path': model_path,
'offload_params_to_cpu': False, # Use GPU by default
'keep_clip_on_cpu': False,
'keep_control_net_on_cpu': False,
'keep_vae_on_cpu': False,
}
# Add optional paths from CLI args
if global_args:
if hasattr(global_args, 'vae_path') and global_args.vae_path:
kwargs['vae_path'] = global_args.vae_path
if hasattr(global_args, 'llm_path') and global_args.llm_path:
kwargs['lora_model_dir'] = global_args.llm_path
# --no-ram mode: maximize GPU offloading for sd.cpp
# If backend is explicitly cpu, offload to CPU
backend = (model_config or {}).get('backend', 'auto') if model_config else 'auto'
if backend == 'cpu':
kwargs['offload_params_to_cpu'] = True
kwargs['keep_clip_on_cpu'] = True
kwargs['keep_vae_on_cpu'] = True
if no_ram:
# stable-diffusion-cpp-python supports n_threads and gpu-related params
# Force full GPU offload by keeping all operations on GPU
kwargs['keep_clip_on_cpu'] = False # Don't offload CLIP to CPU
kwargs['keep_control_net_cpu'] = False # Don't offload ControlNet to CPU
kwargs['keep_vae_on_cpu'] = False # Don't offload VAE to CPU
print("--no-ram mode: sd.cpp maximizing GPU usage (no CPU offload for CLIP/VAE/ControlNet)")
sd_model = StableDiffusion(**kwargs)
try:
sd_model = StableDiffusion(**kwargs)
except Exception as e:
if 'cpu' not in str(backend) and ('memory' in str(e).lower() or 'cuda' in str(e).lower() or 'out of' in str(e).lower()):
print(f"GPU load failed ({e}), retrying with CPU offload...")
kwargs['offload_params_to_cpu'] = True
kwargs['keep_clip_on_cpu'] = True
kwargs['keep_vae_on_cpu'] = True
sd_model = StableDiffusion(**kwargs)
else:
raise
return sd_model
......@@ -665,7 +680,8 @@ async def create_image_generation(request: ImageGenerationRequest, http_request:
# Only use sd.cpp if we have a local file path
if resolved_path and os.path.isfile(resolved_path):
sd_model = _load_sdcpp_model(resolved_path, global_args)
cfg = multi_model_manager.config.get(model_key) or multi_model_manager.config.get(model_name) or {}
sd_model = _load_sdcpp_model(resolved_path, global_args, model_config=cfg)
if sd_model is not None:
# Cache the loaded model in the manager
......
......@@ -123,12 +123,13 @@ def setup_default_config(config_dir: Path):
# Default auth.json with admin / admin
from pathlib import Path
import secrets
from argon2 import PasswordHasher
if hasattr(argon2, 'PasswordHasher'):
ph = argon2.PasswordHasher()
try:
from argon2 import PasswordHasher
ph = PasswordHasher()
default_admin_hash = ph.hash("admin")
else:
default_admin_hash = "argon2id$v=19$m=65536,t=3,p=4$...admin_hash_placeholder"
except ImportError:
from codai.admin.auth import hash_password
default_admin_hash = hash_password("admin")
default_auth = {
"users": [{
......
......@@ -29,12 +29,54 @@ class BackendConfig:
class ModelsConfig:
"""Models configuration."""
default_load_mode: str = "ondemand"
hf_cache_dir: Optional[str] = None
gguf_cache_dir: Optional[str] = None
@dataclass
class OffloadConfig:
"""Offload configuration."""
directory: str = "./offload"
strategy: str = "auto"
max_gpu_percent: Optional[float] = None
no_ram: bool = False
load_in_4bit: bool = False
load_in_8bit: bool = False
manual_ram_gb: Optional[float] = None
flash_attention: bool = False
@dataclass
class VulkanConfig:
"""Vulkan backend configuration."""
n_gpu_layers: int = -1
n_ctx: int = 2048
device_id: int = 0
single_gpu: bool = False
@dataclass
class ImageConfig:
"""Image generation configuration."""
llm_path: Optional[str] = None
vae_path: Optional[str] = None
sample_method: str = "res_multistep"
steps: int = 4
width: int = 512
height: int = 512
cfg_scale: float = 1.0
precision: str = "f32"
cpu_offload: bool = False
seed: Optional[int] = None
vae_tiling: bool = False
clip_on_cpu: bool = False
@dataclass
class WhisperConfig:
"""Whisper ASR configuration."""
server_path: Optional[str] = None
server_port: int = 8744
@dataclass
......@@ -45,6 +87,9 @@ class Config:
backend: BackendConfig = field(default_factory=BackendConfig)
models: ModelsConfig = field(default_factory=ModelsConfig)
offload: OffloadConfig = field(default_factory=OffloadConfig)
vulkan: VulkanConfig = field(default_factory=VulkanConfig)
image: ImageConfig = field(default_factory=ImageConfig)
whisper: WhisperConfig = field(default_factory=WhisperConfig)
system_prompt: Optional[str] = None
tools_closer_prompt: bool = False
grammar_guided: bool = False
......@@ -140,7 +185,8 @@ class ConfigManager:
ph = PasswordHasher()
default_admin_hash = ph.hash("admin")
except ImportError:
default_admin_hash = "argon2id$v=19$m=65536,t=3,p=4$...admin_hash_placeholder"
from codai.admin.auth import hash_password
default_admin_hash = hash_password("admin")
default_auth = {
"users": [{
......@@ -182,6 +228,9 @@ class ConfigManager:
backend=BackendConfig(**config_data.get("backend", {})),
models=ModelsConfig(**config_data.get("models", {})),
offload=OffloadConfig(**config_data.get("offload", {})),
vulkan=VulkanConfig(**config_data.get("vulkan", {})),
image=ImageConfig(**config_data.get("image", {})),
whisper=WhisperConfig(**config_data.get("whisper", {})),
system_prompt=config_data.get("system_prompt"),
tools_closer_prompt=config_data.get("tools_closer_prompt", False),
grammar_guided=config_data.get("grammar_guided", False),
......@@ -242,10 +291,43 @@ class ConfigManager:
"tts_backend": self.config.backend.tts_backend
},
"models": {
"default_load_mode": self.config.models.default_load_mode
"default_load_mode": self.config.models.default_load_mode,
"hf_cache_dir": self.config.models.hf_cache_dir,
"gguf_cache_dir": self.config.models.gguf_cache_dir,
},
"offload": {
"directory": self.config.offload.directory
"directory": self.config.offload.directory,
"strategy": self.config.offload.strategy,
"max_gpu_percent": self.config.offload.max_gpu_percent,
"no_ram": self.config.offload.no_ram,
"load_in_4bit": self.config.offload.load_in_4bit,
"load_in_8bit": self.config.offload.load_in_8bit,
"manual_ram_gb": self.config.offload.manual_ram_gb,
"flash_attention": self.config.offload.flash_attention
},
"vulkan": {
"n_gpu_layers": self.config.vulkan.n_gpu_layers,
"n_ctx": self.config.vulkan.n_ctx,
"device_id": self.config.vulkan.device_id,
"single_gpu": self.config.vulkan.single_gpu
},
"image": {
"llm_path": self.config.image.llm_path,
"vae_path": self.config.image.vae_path,
"sample_method": self.config.image.sample_method,
"steps": self.config.image.steps,
"width": self.config.image.width,
"height": self.config.image.height,
"cfg_scale": self.config.image.cfg_scale,
"precision": self.config.image.precision,
"cpu_offload": self.config.image.cpu_offload,
"seed": self.config.image.seed,
"vae_tiling": self.config.image.vae_tiling,
"clip_on_cpu": self.config.image.clip_on_cpu
},
"whisper": {
"server_path": self.config.whisper.server_path,
"server_port": self.config.whisper.server_port
},
"system_prompt": self.config.system_prompt,
"tools_closer_prompt": self.config.tools_closer_prompt,
......@@ -255,7 +337,7 @@ class ConfigManager:
"reasoning_options": self.config.reasoning_options,
"parser": self.config.parser
}
with open(self.config_path, 'w') as f:
json.dump(config_dict, f, indent=2)
......
......@@ -5,7 +5,7 @@ import os
# Import configuration from codai modules
from codai.cli import parse_args
from codai.config import ConfigManager
from codai.admin.routes import init_session_manager
from codai.admin.routes import init_session_manager, set_config_manager
def main():
......@@ -31,10 +31,18 @@ def main():
config_dir = args.config
config_mgr = ConfigManager(config_dir)
config = config_mgr.load()
# Initialize admin session manager
# Apply cache directory overrides from config before any cache module is used
if config.models.hf_cache_dir:
os.environ['HF_HOME'] = config.models.hf_cache_dir
os.environ['HUGGINGFACE_HUB_CACHE'] = config.models.hf_cache_dir
if config.models.gguf_cache_dir:
os.environ['CODERAI_CACHE_DIR'] = config.models.gguf_cache_dir
# Initialize admin session manager and expose config to admin routes
from pathlib import Path
init_session_manager(Path(config_dir))
set_config_manager(config_mgr)
# Handle early exit options (before heavy imports)
if args.list_cached_models:
......@@ -294,106 +302,128 @@ def main():
kwargs['n_gpu_layers'] = model_cfg.get('n_gpu_layers', -1)
return kwargs
# Load text models (main LLM)
# =========================================================================
# Register and optionally pre-load all configured models
# Models with load_mode == "load" are pre-loaded at startup.
# Models with load_mode == "on-request" (default) are loaded on demand.
# =========================================================================
def _model_id(m):
"""Return the model path/id from a config entry (dict or str)."""
if isinstance(m, str):
return m
return m.get("path") or m.get("id") or ""
def _model_cfg(m, mtype):
cfg = build_kwargs_from_config(m, mtype) if isinstance(m, dict) else {}
if isinstance(m, dict):
for k in ("load_mode", "used_vram_gb", "alias"):
if k in m:
cfg[k] = m[k]
return cfg
# Text models
text_models = models_config.get("text_models", [])
text_model_names = [m["id"] for m in text_models if m.get("enabled", True)]
if text_model_names:
print(f"\nMain text model(s): {text_model_names}")
for idx, model_name in enumerate(text_models):
multi_model_manager.set_default_model(
model_name["id"],
config=build_kwargs_from_config(model_name, "text"),
backend_type=model_name.get("backend", "auto")
)
# Load preload list
preload_list = models_config.get("preload", [])
loaded_list = models_config.get("loaded", [])
# Determine which models to preload at startup
# loaded: models to load into VRAM (or CPU for loadswap) immediately
# preload: models to keep in CPU RAM for fast swapping
nopreload = False # Config-based loading, no CLI preload skip
# Pre-load models at startup based on config
if not nopreload and load_mode in ("loadall", "loadswap"):
all_startup_models = loaded_list + preload_list
elif not nopreload and load_mode == "ondemand":
all_startup_models = loaded_list[:1] if loaded_list else []
else:
all_startup_models = []
# Pre-load process
# Also include legacy gguf_models entries (treated as text)
text_models = text_models + models_config.get("gguf_models", [])
text_model_names = [_model_id(m) for m in text_models if _model_id(m)]
if text_model_names:
first_text = text_models[0]["id"] if text_models else None
if not nopreload and load_mode == "ondemand" and first_text:
# Preload first model into VRAM
try:
print(f"Preloading first model into VRAM: {first_text}...")
mm = multi_model_manager._load_default_model()
if mm is not None and mm.backend is not None:
multi_model_manager.active_in_vram = multi_model_manager.default_model
print(f"Model loaded successfully: {first_text}")
else:
print(f"Warning: Model {first_text} failed to load")
except Exception as e:
print(f"Warning: Failed to preload model: {e}")
print(f"Model will load on first request")
# Load audio models (registered, load on first request)
print(f"\nText model(s): {text_model_names}")
for i, m in enumerate(text_models):
mid = _model_id(m)
if not mid:
continue
cfg = _model_cfg(m, "text")
if i == 0:
# Only the first text model becomes the default
multi_model_manager.set_default_model(
mid, config=cfg,
backend_type=m.get("backend", "auto") if isinstance(m, dict) else "auto"
)
else:
# Additional text models: register config only, no default override
multi_model_manager.config[mid] = cfg
multi_model_manager.model_backend_types[mid] = (
m.get("backend", "auto") if isinstance(m, dict) else "auto"
)
# Audio models
audio_models = models_config.get("audio_models", [])
for audio_m in audio_models:
if audio_m.get("enabled", True):
multi_model_manager.set_audio_model(
audio_m["id"],
config=build_kwargs_from_config(audio_m, "audio")
)
# Load image models
for m in audio_models:
mid = _model_id(m)
if mid:
multi_model_manager.set_audio_model(mid, config=_model_cfg(m, "audio"))
# Image models
image_models = models_config.get("image_models", [])
for img_m in image_models:
if img_m.get("enabled", True):
multi_model_manager.set_image_model(
img_m["id"],
config=build_kwargs_from_config(img_m, "image")
)
# Load vision models
for m in image_models:
mid = _model_id(m)
if mid:
multi_model_manager.set_image_model(mid, config=_model_cfg(m, "image"))
# Vision models
vision_models = models_config.get("vision_models", [])
for vis_m in vision_models:
if vis_m.get("enabled", True):
multi_model_manager.set_vision_model(
vis_m["id"],
config=build_kwargs_from_config(vis_m, "vision")
)
# Load TTS model
tts_model = models_config.get("tts_models", [])
if tts_model:
for tts_m in tts_model:
if tts_m.get("enabled", True):
multi_model_manager.set_tts_model(tts_m["id"], {})
for m in vision_models:
mid = _model_id(m)
if mid:
multi_model_manager.set_vision_model(mid, config=_model_cfg(m, "vision"))
# TTS models
tts_models = models_config.get("tts_models", [])
for m in tts_models:
mid = _model_id(m)
if mid:
multi_model_manager.set_tts_model(mid, config=_model_cfg(m, "tts") if isinstance(m, dict) else {})
# Register aliases
aliases = models_config.get("aliases", {})
for alias, model in aliases.items():
multi_model_manager.set_model_alias(alias, model)
# Pre-load models marked as load_mode == "load" across ALL types
all_model_entries = (
[("text", m) for m in text_models] +
[("audio", m) for m in audio_models] +
[("image", m) for m in image_models] +
[("vision", m) for m in vision_models] +
[("tts", m) for m in tts_models]
)
for mtype, m in all_model_entries:
mid = _model_id(m)
if not mid:
continue
per_load_mode = m.get("load_mode", "on-request") if isinstance(m, dict) else "on-request"
if per_load_mode != "load":
print(f" '{mid}' — on-request (will load when needed)")
continue
print(f" Pre-loading '{mid}' (load mode)...")
try:
if mtype == "text":
mm = multi_model_manager._load_model_by_name(mid)
if mm is not None and mm.backend is not None:
multi_model_manager.active_in_vram = mid
print(f" Loaded: {mid}")
else:
print(f" Warning: {mid} failed to load")
# image/audio/vision/tts pre-loading is handled by their respective
# API modules on first request; we just log intent here.
else:
print(f" Note: pre-loading for {mtype} models happens on first request")
except Exception as e:
print(f" Warning: failed to pre-load '{mid}': {e}")
# Print startup summary
print(f"\nBackend: {backend}")
print(f"Load mode: {load_mode}")
available_models = multi_model_manager.list_models()
print(f"\nAvailable models: {[m.id for m in available_models]}")
# Register custom aliases from config
print(f"Available models: {[m.id for m in available_models]}")
if aliases:
print(f"\nModel aliases:")
print("Model aliases:")
for alias, target in aliases.items():
print(f" {alias} -> {target}")
# Set global args for backward compatibility with existing code
class ArgsCompat:
pass
......@@ -438,10 +468,10 @@ def main():
global_args.force_reasoning = config.reasoning_options
global_args.model = text_model_names
global_args.language_model = text_model_names
global_args.image_model = [m["id"] for m in image_models if m.get("enabled")]
global_args.audio_model = [m["id"] for m in audio_models if m.get("enabled")]
global_args.vision_model = [m["id"] for m in vision_models if m.get("enabled")]
global_args.tts_model = tts_model[0]["id"] if tts_model else None
global_args.image_model = [_model_id(m) for m in image_models]
global_args.audio_model = [_model_id(m) for m in audio_models]
global_args.vision_model = [_model_id(m) for m in vision_models]
global_args.tts_model = _model_id(tts_models[0]) if tts_models else None
global_args.model_aliases = [(k, v) for k, v in aliases.items()]
global_args.whisper_server = config.whisper.server_path
global_args.whisper_server_port = config.whisper.server_port
......@@ -458,86 +488,46 @@ def main():
global_args.vulkan_list_devices = False
global_args.loadall = False
global_args.loadswap = False
global_args.nopreload = nopreload
global_args.nopreload = False
set_global_args(global_args)
set_global_args_text(global_args)
set_load_mode_app(load_mode)
# Set image module global args
from codai.api.images import set_global_args as set_images_global_args
set_images_global_args(global_args)
# Vulkan list devices
if args.vulkan_list_devices:
print("\nListing Vulkan devices...")
# Pre-load image models marked as load_mode == "load"
for m in image_models:
mid = _model_id(m)
if not mid:
continue
per_load_mode = m.get("load_mode", "on-request") if isinstance(m, dict) else "on-request"
if per_load_mode != "load":
continue
model_key = f"image:{mid}"
if model_key in multi_model_manager.models:
continue
try:
import subprocess
result = subprocess.run(['vulkaninfo', '--summary'], capture_output=True, text=True)
if result.returncode == 0:
print(result.stdout)
from codai.api.images import _load_diffusers_pipeline, _is_gguf_model, _load_sdcpp_model
print(f"Pre-loading image model '{mid}' (load mode)...")
if _is_gguf_model(mid):
resolved_path = multi_model_manager.load_model(mid)
if resolved_path and os.path.isfile(resolved_path):
sd_model = _load_sdcpp_model(resolved_path, global_args)
if sd_model:
multi_model_manager.add_model(model_key, sd_model)
print(f" Image model loaded: {mid}")
else:
print("Could not run vulkaninfo.")
pipeline = _load_diffusers_pipeline(mid, global_args)
if pipeline:
multi_model_manager.add_model(model_key, pipeline)
print(f" Image model loaded: {mid}")
except Exception as e:
print(f"Error: {e}")
sys.exit(0)
# Startup: Preload configured models (non-text) for loadall/loadswap
if not nopreload and load_mode in ("loadall", "loadswap"):
first_loaded = multi_model_manager.active_in_vram is not None
if image_models:
print(f"\n=== Pre-loading image model(s) ===")
for img_m in image_models:
if not img_m.get("enabled", True):
continue
model_key = f"image:{img_m['id']}"
if model_key in multi_model_manager.models:
continue
try:
from codai.api.images import _load_diffusers_pipeline, _is_gguf_model, _load_sdcpp_model
if load_mode == "loadall":
print(f"Preloading image model into VRAM: {img_m['id']}...")
if _is_gguf_model(img_m['id']):
resolved_path = multi_model_manager.load_model(img_m['id'])
if resolved_path and os.path.isfile(resolved_path):
sd_model = _load_sdcpp_model(resolved_path, global_args)
if sd_model:
multi_model_manager.add_model(model_key, sd_model)
print(f"Image model loaded (VRAM): {img_m['id']}")
else:
try:
pipeline = _load_diffusers_pipeline(img_m['id'], global_args)
if pipeline:
multi_model_manager.add_model(model_key, pipeline)
print(f"Image model loaded (VRAM): {img_m['id']}")
except Exception as e:
em = str(e).lower()
if any(x in em for x in ['out of memory', 'oom', 'cuda error']):
print(f"VRAM full for {img_m['id']}, will load on demand")
else:
print(f"Warning: {e}")
elif load_mode == "loadswap" and not first_loaded:
print(f"Preloading image model: {img_m['id']}...")
if _is_gguf_model(img_m['id']):
resolved_path = multi_model_manager.load_model(img_m['id'])
if resolved_path and os.path.isfile(resolved_path):
sd_model = _load_sdcpp_model(resolved_path, global_args)
if sd_model:
multi_model_manager.add_model(model_key, sd_model)
first_loaded = True
print(f"Image model loaded: {img_m['id']}")
else:
try:
pipeline = _load_diffusers_pipeline(img_m['id'], global_args)
if pipeline:
multi_model_manager.add_model(model_key, pipeline)
first_loaded = True
print(f"Image model loaded: {img_m['id']}")
except Exception as e:
print(f"Warning: {e}")
except Exception as e:
print(f"Warning: {e}")
print(f" Warning: failed to pre-load image model '{mid}': {e}")
# Start the server
import uvicorn
......
......@@ -30,9 +30,11 @@ import time
def get_model_cache_dir() -> str:
"""Get or create the model cache directory."""
# Use XDG_CACHE_HOME if set, otherwise use ~/.cache/coderai
cache_home = os.environ.get('XDG_CACHE_HOME', os.path.expanduser('~/.cache'))
cache_dir = os.path.join(cache_home, 'coderai', 'models')
if os.environ.get('CODERAI_CACHE_DIR'):
cache_dir = os.environ['CODERAI_CACHE_DIR']
else:
cache_home = os.environ.get('XDG_CACHE_HOME', os.path.expanduser('~/.cache'))
cache_dir = os.path.join(cache_home, 'coderai', 'models')
pathlib.Path(cache_dir).mkdir(parents=True, exist_ok=True)
return cache_dir
......@@ -43,20 +45,24 @@ def get_all_cache_dirs() -> dict:
cache_home = os.environ.get('XDG_CACHE_HOME', os.path.expanduser('~/.cache'))
# Coderai GGUF cache
coderai_cache = os.path.join(cache_home, 'coderai', 'models')
coderai_cache = get_model_cache_dir()
if os.path.exists(coderai_cache):
caches['coderai'] = coderai_cache
# HuggingFace cache (for .safetensors, PyTorch models, etc.)
# Check both the main directory and the hub subdirectory
hf_cache = os.path.join(cache_home, 'huggingface')
hf_hub_cache = os.path.join(hf_cache, 'hub')
if os.path.exists(hf_hub_cache):
caches['huggingface'] = hf_hub_cache # Use hub directory if it exists
elif os.path.exists(hf_cache):
caches['huggingface'] = hf_cache
# Local diffusers cache (often stored locally by apps)
# HuggingFace cache — respect HF_HOME override
hf_home = os.environ.get('HF_HOME') or os.environ.get('HUGGINGFACE_HUB_CACHE')
if hf_home:
hf_hub_cache = os.path.join(hf_home, 'hub') if not hf_home.endswith('hub') else hf_home
caches['huggingface'] = hf_hub_cache if os.path.exists(hf_hub_cache) else hf_home
else:
hf_cache = os.path.join(cache_home, 'huggingface')
hf_hub_cache = os.path.join(hf_cache, 'hub')
if os.path.exists(hf_hub_cache):
caches['huggingface'] = hf_hub_cache
elif os.path.exists(hf_cache):
caches['huggingface'] = hf_cache
# Local diffusers cache
local_diffusers = os.path.expanduser('~/.cache/diffusers')
if os.path.exists(local_diffusers):
caches['diffusers'] = local_diffusers
......
......@@ -384,7 +384,8 @@ class MultiModelManager:
self.tool_parser = ModelParserAdapter()
self.current_model_key: Optional[str] = None
self.load_mode: str = "ondemand"
self.active_in_vram: Optional[str] = None
self.active_in_vram: Optional[str] = None # most-recently-used model key
self.models_in_vram: set = set() # all models currently in VRAM
self.model_aliases: Dict[str, str] = {}
self.whisper_server: Optional[WhisperServerManager] = None
self.model_backend_types: Dict[str, str] = {}
......@@ -675,9 +676,7 @@ class MultiModelManager:
def get_all_allowed_identifiers(self) -> set:
"""
Return the set of all model names, aliases, and identifiers that are
valid for API requests. This includes every identifier that
``list_models()`` would return as well as the raw model paths/names
registered via the command line.
valid for API requests.
"""
allowed = set()
......@@ -719,6 +718,25 @@ class MultiModelManager:
for alias in self.model_aliases:
allowed.add(alias)
# Also include all models from config (covers configured-but-not-yet-loaded models)
try:
from codai.admin.routes import config_manager
if config_manager is not None:
md = config_manager.models_data
for cat in ("text_models", "image_models", "audio_models",
"gguf_models", "tts_models", "vision_models"):
for m in md.get(cat, []):
mid = (m if isinstance(m, str) else
m.get("alias") or m.get("path") or m.get("id") or "")
raw = (m if isinstance(m, str) else m.get("path") or m.get("id") or "")
for val in (mid, raw):
if val:
allowed.add(val)
short = val.split("/")[-1] if "/" in val else val
allowed.add(short)
except Exception:
pass
return allowed
def is_allowed_model(self, requested_or_resolved: str, model_type: str = None) -> bool:
......@@ -1112,22 +1130,76 @@ class MultiModelManager:
except Exception as e:
print(f" Warning during VRAM load of '{model_key}': {e}")
def _get_free_vram_gb(self) -> float:
"""Return estimated free VRAM in GB, or a large number if unavailable."""
try:
import torch
if torch.cuda.is_available():
free, total = torch.cuda.mem_get_info()
return free / 1e9
except Exception:
pass
return 999.0 # Unknown — assume enough
def _get_model_used_vram_gb(self, model_key: str) -> float:
"""Return the configured used_vram_gb for a model, or 0 if unknown."""
cfg = self.config.get(model_key, {})
return float(cfg.get("used_vram_gb") or 0)
def _evict_models_for_vram(self, needed_gb: float):
"""Unload loaded models (LRU first) until we have at least needed_gb free VRAM."""
if needed_gb <= 0:
return
def _evict_key(key):
model_obj = self.models.pop(key, None)
self.models_in_vram.discard(key)
if model_obj is not None:
try:
if hasattr(model_obj, 'cleanup'):
model_obj.cleanup()
elif hasattr(model_obj, 'to'):
model_obj.to('cpu')
except Exception as e:
print(f" Warning during eviction of '{key}': {e}")
gc.collect()
try:
import torch
if torch.cuda.is_available():
torch.cuda.empty_cache()
except Exception:
pass
# First pass: evict non-active models in LRU order
for key in list(self.models.keys()):
if key == self.active_in_vram:
continue
if self._get_free_vram_gb() >= needed_gb:
break
print(f"On-request VRAM eviction: unloading '{key}' to free VRAM")
_evict_key(key)
# Second pass: evict active model if still not enough
if self._get_free_vram_gb() < needed_gb and self.active_in_vram and self.active_in_vram in self.models:
print(f"On-request VRAM eviction: unloading active model '{self.active_in_vram}' to free VRAM")
_evict_key(self.active_in_vram)
self.active_in_vram = None
def request_model(self, requested_model: str, model_type: str = None) -> Dict[str, Any]:
"""
Central method for API modules to request a model.
Handles three load modes:
Handles per-model load modes:
**loadall**: All models are pre-loaded at startup. Just return the
already-loaded model. No VRAM management needed.
**load**: Model is pre-loaded at startup and stays in VRAM.
**loadswap**: All models stay loaded (in CPU RAM or VRAM). When a
different model is requested, the current VRAM model is moved to CPU
RAM and the requested model is moved from CPU RAM to VRAM.
**on-request**: Model is loaded when first needed. Before loading,
checks free VRAM against the model's used_vram_gb config. If not
enough VRAM, evicts other loaded models until there is enough, then
loads the model.
**ondemand** (default when no flag specified): Only one model in memory
at a time. When a different model is requested, the current model is
fully unloaded (deleted) and the new one is loaded from scratch.
Legacy global modes (ondemand/loadall/loadswap) are still supported
for backward compatibility.
Args:
requested_model: The model name/alias from the API request
......@@ -1226,7 +1298,63 @@ class MultiModelManager:
# Step 3: Check if already loaded in self.models
existing_model = self.models.get(model_key)
# =====================================================================
# PER-MODEL LOAD MODE: Check per-model config first.
# Per-model "load" = pre-loaded (treat as loadall for this model).
# Per-model "on-request" = load when needed with VRAM management.
# =====================================================================
per_model_cfg = self.config.get(model_key, {})
per_model_load_mode = per_model_cfg.get("load_mode") # "load" | "on-request" | None
if per_model_load_mode == "on-request":
if existing_model is not None:
# Already loaded — just return it
self.current_model_key = model_key
self.active_in_vram = model_key
self.models_in_vram.add(model_key)
return {
'model_key': model_key,
'model_name': resolved_name,
'model_object': existing_model,
'config': per_model_cfg,
'already_loaded': True,
}
# Not loaded — check VRAM and evict if needed
needed_gb = self._get_model_used_vram_gb(model_key)
if needed_gb > 0:
free_gb = self._get_free_vram_gb()
if free_gb < needed_gb:
print(f"On-request: need {needed_gb:.1f} GB VRAM, have {free_gb:.1f} GB free — evicting models")
self._evict_models_for_vram(needed_gb)
return {
'model_key': model_key,
'model_name': resolved_name,
'model_object': None,
'config': per_model_cfg,
'already_loaded': False,
}
if per_model_load_mode == "load":
# Pre-loaded model — just return it (or signal caller to load it)
if existing_model is not None:
self.current_model_key = model_key
self.active_in_vram = model_key
return {
'model_key': model_key,
'model_name': resolved_name,
'model_object': existing_model,
'config': per_model_cfg,
'already_loaded': True,
}
return {
'model_key': model_key,
'model_name': resolved_name,
'model_object': None,
'config': per_model_cfg,
'already_loaded': False,
}
# =====================================================================
# LOADALL MODE: All models should be pre-loaded. Just return it.
# =====================================================================
......@@ -1443,6 +1571,7 @@ class MultiModelManager:
# Reset tracking state
self.current_model_key = None
self.active_in_vram = None
self.models_in_vram = set()
# Force garbage collection
for _ in range(3):
......@@ -1466,6 +1595,7 @@ class MultiModelManager:
"""Add a model (ModelManager, diffusers pipeline, sd.cpp model, etc.) for a specific key."""
self.models[key] = manager
self.active_in_vram = key
self.models_in_vram.add(key)
def get_model(self, key: str) -> Optional[ModelManager]:
"""Get a model manager by key."""
......@@ -1480,43 +1610,77 @@ class MultiModelManager:
return None
def list_models(self) -> List[ModelInfo]:
"""List all available models."""
"""List all available models (configured + runtime aliases)."""
models = []
# Add default model(s)
if self.default_model:
seen_ids: set = set()
def _add(model_id: str):
if model_id not in seen_ids:
seen_ids.add(model_id)
models.append(ModelInfo(id=model_id))
# --- Models from config (the authoritative source) ---
try:
from codai.admin.routes import config_manager
if config_manager is not None:
md = config_manager.models_data
for cat in ("text_models", "vision_models", "image_models",
"audio_models", "tts_models", "gguf_models"):
for m in md.get(cat, []):
if isinstance(m, str):
mid = m
else:
mid = m.get("alias") or m.get("path") or m.get("id") or ""
# Also expose the raw path/id
raw = m.get("path") or m.get("id") or ""
if raw and raw != mid:
_add(raw)
# Short name
short = raw.split("/")[-1] if "/" in raw else raw
if short != raw:
_add(short)
if mid:
_add(mid)
short = mid.split("/")[-1] if "/" in mid else mid
if short != mid:
_add(short)
except Exception:
pass
# --- Fallback: runtime default_model (if config_manager unavailable) ---
if not models and self.default_model:
model_id = self.default_model
if not (model_id.startswith("http://") or model_id.startswith("https://")):
short_name = self.default_model.split("/")[-1] if "/" in self.default_model else self.default_model
if short_name != self.default_model:
models.append(ModelInfo(id=short_name))
models.append(ModelInfo(id=model_id))
models.append(ModelInfo(id="default"))
# Add aliases for first/default models
short_name = model_id.split("/")[-1] if "/" in model_id else model_id
if short_name != model_id:
_add(short_name)
_add(model_id)
_add("default")
# --- Runtime-registered non-text models (image, audio, tts, vision) ---
if self.audio_models:
models.append(ModelInfo(id="audio"))
_add("audio")
for audio_id in self.audio_models:
models.append(ModelInfo(id=f"audio:{audio_id}"))
_add(f"audio:{audio_id}")
if self.tts_model:
models.append(ModelInfo(id="tts"))
models.append(ModelInfo(id=f"tts:{self.tts_model}"))
_add("tts")
_add(f"tts:{self.tts_model}")
if self.image_models:
models.append(ModelInfo(id="image"))
_add("image")
for image_id in self.image_models:
models.append(ModelInfo(id=f"image:{image_id}"))
_add(f"image:{image_id}")
if self.vision_models:
models.append(ModelInfo(id="vision"))
_add("vision")
for vision_id in self.vision_models:
models.append(ModelInfo(id=f"vision:{vision_id}"))
# Add any custom aliases
_add(f"vision:{vision_id}")
# --- Custom aliases ---
for alias in self.model_aliases:
models.append(ModelInfo(id=alias))
_add(alias)
return models
......
......@@ -31,3 +31,8 @@ llama-cpp-python>=0.2.0
# Requires specific CUDA versions and may need manual installation
# Install with: pip install flash-attn --no-build-isolation
# flash-attn>=2.5.0
# Optional: fast path for linear attention models (RWKV, Mamba, etc.)
causal-conv1d
# flash-linear-attention requires CUDA and must be installed from source:
# pip install git+https://github.com/fla-org/flash-linear-attention --no-build-isolation
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment