Rewrite done

7a49911c · Stefy Lanza (nextime / spora ) · 80b1c060 · 7a49911c · 80b1c060 · 7a49911c
Commit 7a49911c authored May 05, 2026 by Stefy Lanza (nextime / spora )
23 changed files
--- a/build.sh
+++ b/build.sh
@@ -209,17 +209,20 @@ elif [ "$BACKEND" = "vulkan" ]; then
        echo -e "${GREEN}✓ Found Vulkan shader compiler: $GLSLC_CMD${NC}"
    fi
    
-    # Build with Vulkan support
+    # Build with Vulkan support (add CUDA too if available)
    echo -e "${YELLOW}Building llama-cpp-python with Vulkan support...${NC}"
-    CMAKE_ARGS="-DGGML_VULKAN=ON" pip install --upgrade llama-cpp-python --no-cache-dir || {
+    _LLAMA_CMAKE="-DGGML_VULKAN=ON"
+    if command -v nvcc &> /dev/null || [ -d "/usr/local/cuda" ]; then
+        _LLAMA_CMAKE="$_LLAMA_CMAKE -DGGML_CUDA=ON"
+        echo -e "${GREEN}  ✓ Also enabling CUDA support (NVIDIA detected)${NC}"
+    fi
+    CMAKE_ARGS="$_LLAMA_CMAKE" pip install --upgrade llama-cpp-python --no-cache-dir || {
        echo -e "${RED}Build failed!${NC}"
        exit 1
    }
    
    echo -e "${YELLOW}Installing Vulkan-specific requirements...${NC}"
    pip install -r requirements-vulkan.txt
-    
-    # Build whispercpp Python package with Vulkan support for GPU-accelerated audio transcription
    echo -e "${YELLOW}Building whispercpp with Vulkan support for GPU-accelerated transcription...${NC}"
    
    # First, uninstall any existing whispercpp (pip version doesn't have Vulkan)
@@ -318,11 +321,16 @@ elif [ "$BACKEND" = "vulkan-nvidia" ]; then
        echo -e "${GREEN}✓ Found Vulkan shader compiler: $GLSLC_CMD${NC}"
    fi
    
-    # Build with Vulkan support
+    # Build with Vulkan support (add CUDA too if available)
    # Note: llama.cpp doesn't have a compile-time option to disable specific GPUs
    # The device selection happens at runtime via environment variables
    echo -e "${YELLOW}Building llama-cpp-python with Vulkan support...${NC}"
-    CMAKE_ARGS="-DGGML_VULKAN=ON" pip install --upgrade llama-cpp-python --no-cache-dir || {
+    _LLAMA_CMAKE="-DGGML_VULKAN=ON"
+    if command -v nvcc &> /dev/null || [ -d "/usr/local/cuda" ]; then
+        _LLAMA_CMAKE="$_LLAMA_CMAKE -DGGML_CUDA=ON"
+        echo -e "${GREEN}  ✓ Also enabling CUDA support (NVIDIA detected)${NC}"
+    fi
+    CMAKE_ARGS="$_LLAMA_CMAKE" pip install --upgrade llama-cpp-python --no-cache-dir || {
        echo -e "${RED}Build failed!${NC}"
        exit 1
    }
@@ -378,10 +386,15 @@ elif [ "$BACKEND" = "cuda" ]; then
        echo -e "${GREEN}✓ Found CUDA at /usr/local/cuda${NC}"
    fi
    
-    # Build llama-cpp-python with CUDA support
+    # Build llama-cpp-python with CUDA support (add Vulkan too if available)
    echo -e "${YELLOW}Building llama-cpp-python with CUDA support...${NC}"
    echo -e "${YELLOW}This may take several minutes...${NC}"
-    CMAKE_ARGS="-DGGML_CUDA=ON" pip install --upgrade llama-cpp-python --no-cache-dir || {
+    _LLAMA_CMAKE="-DGGML_CUDA=ON"
+    if pkg-config --exists vulkan 2>/dev/null; then
+        _LLAMA_CMAKE="$_LLAMA_CMAKE -DGGML_VULKAN=ON"
+        echo -e "${GREEN}  ✓ Also enabling Vulkan support (Vulkan detected)${NC}"
+    fi
+    CMAKE_ARGS="$_LLAMA_CMAKE" pip install --upgrade llama-cpp-python --no-cache-dir || {
        echo ""
        echo -e "${RED}Build failed!${NC}"
        echo -e "${YELLOW}Make sure CUDA toolkit is installed:${NC}"

--- a/build.sh~
+++ b/build.sh~
-#!/bin/bash
-# Build script for CoderAI - Supports NVIDIA (CUDA), Vulkan, OpenCL, and CPU backends
-# Usage: ./build.sh [nvidia|vulkan|vulkan-nvidia|cuda|opencl|all] [--flash] [--venv <venv>]
-# Default: all (installs all backends)
-# --flash: Enable and install Flash Attention 2 (for NVIDIA GPUs)
-# --venv <venv>: Specify custom virtual environment name
-
-set -e
-
-# Colors for output
-RED='\033[0;31m'
-GREEN='\033[0;32m'
-YELLOW='\033[1;33m'
-BLUE='\033[0;34m'
-NC='\033[0m' # No Color
-
-# Determine backend and flags
-BACKEND="${1:-all}"
-FLASH=false
-CUSTOM_VENV=""
-
-# Parse arguments
-i=1
-for arg in "$@"; do
-    case $arg in
-        --flash) 
-            FLASH=true
-            ;;
-        --venv)
-            i=$((i + 1))
-            eval "CUSTOM_VENV=\${$i}"
-            ;;
-    esac
-    i=$((i + 1))
-done
-
-BACKEND=$(echo "$BACKEND" | tr '[:upper:]' '[:lower:]')
-
-if [[ "$BACKEND" != "nvidia" && "$BACKEND" != "vulkan" && "$BACKEND" != "vulkan-nvidia" && "$BACKEND" != "cuda" && "$BACKEND" != "opencl" && "$BACKEND" != "all" ]]; then
-    echo -e "${RED}Error: Invalid backend '$BACKEND'${NC}"
-    echo "Usage: ./build.sh [nvidia|vulkan|vulkan-nvidia|cuda|opencl|all] [--flash]"
-    echo "  nvidia       - Use PyTorch with CUDA for NVIDIA GPUs"
-    echo "  vulkan      - Use llama-cpp-python with Vulkan for AMD GPUs"
-    echo "  vulkan-nvidia - Use llama-cpp-python with Vulkan for NVIDIA GPU only"
-    echo "  cuda        - Use llama-cpp-python with CUDA for NVIDIA GPUs"
-    echo "  opencl      - Use stable-diffusion-cpp-python with OpenCL"
-    echo "  all         - Install all backends (nvidia, cuda, vulkan, opencl, cpu) - DEFAULT"
-    echo ""
-    echo "Options:"
-    echo "  --flash     - Install Flash Attention 2 for faster inference (NVIDIA only)"
-    exit 1
-fi
-
-echo -e "${BLUE}========================================${NC}"
-echo -e "${BLUE}  CoderAI Build Script${NC}"
-echo -e "${BLUE}  Backend: ${GREEN}$BACKEND${NC}"
-if [ "$FLASH" = true ]; then
-    echo -e "${BLUE}  Flash Attention 2: ${GREEN}ENABLED${NC}"
-fi
-echo -e "${BLUE}========================================${NC}"
-echo ""
-
-# Check Python version
-PYTHON_VERSION=$(python3 --version 2>&1 | grep -oP '\d+\.\d+' | head -1)
-REQUIRED_VERSION="3.8"
-
-if [ "$(printf '%s\n' "$REQUIRED_VERSION" "$PYTHON_VERSION" | sort -V | head -n1)" != "$REQUIRED_VERSION" ]; then
-    echo -e "${RED}Error: Python 3.8+ required, found $PYTHON_VERSION${NC}"
-    exit 1
-fi
-
-echo -e "${GREEN}✓ Python version: $PYTHON_VERSION${NC}"
-
-# Determine cmake args for stable-diffusion-cpp-python.
-# The pip release is missing the libwebm/build/ cmake submodule files.
-# If libwebm-dev is installed system-wide we can link against it; otherwise disable WebM.
-if ldconfig -p 2>/dev/null | grep -q "libwebm" || pkg-config --exists libwebm 2>/dev/null; then
-    SD_CMAKE_ARGS="-DSD_USE_SYSTEM_WEBM=ON"
-    echo -e "${GREEN}✓ Found system libwebm — stable-diffusion-cpp-python will use it${NC}"
-else
-    SD_CMAKE_ARGS="-DSD_WEBM=OFF"
-    echo -e "${YELLOW}Note: libwebm-dev not found — WebM video output disabled for stable-diffusion-cpp-python${NC}"
-    echo -e "${YELLOW}      Install libwebm-dev to enable WebM support${NC}"
-fi
-
-# Determine venv directory based on backend
-if [ -n "$CUSTOM_VENV" ]; then
-    VENV_DIR="$CUSTOM_VENV"
-    echo -e "${BLUE}Using custom virtual environment: $VENV_DIR${NC}"
-elif [ "$BACKEND" = "nvidia" ]; then
-    VENV_DIR="venv_nvidia"
-elif [ "$BACKEND" = "vulkan" ]; then
-    VENV_DIR="venv_vulkan"
-elif [ "$BACKEND" = "vulkan-nvidia" ]; then
-    VENV_DIR="venv_vulkan_nvidia"
-elif [ "$BACKEND" = "cuda" ]; then
-    VENV_DIR="venv_cuda"
-elif [ "$BACKEND" = "opencl" ]; then
-    VENV_DIR="venv_opencl"
-elif [ "$BACKEND" = "all" ]; then
-    VENV_DIR="venv_all"
-fi
-
-# Create virtual environment if it doesn't exist
-if [ -n "$CUSTOM_VENV" ]; then
-    echo -e "${YELLOW}Creating custom virtual environment: $VENV_DIR${NC}"
-else
-    echo -e "${YELLOW}Creating virtual environment: $VENV_DIR${NC}"
-fi
-if [ ! -d "$VENV_DIR" ]; then
-    python3 -m venv "$VENV_DIR"
-    echo -e "${GREEN}✓ Created virtual environment: $VENV_DIR${NC}"
-else
-    echo -e "${YELLOW}Using existing virtual environment: $VENV_DIR${NC}"
-fi
-
-# Activate virtual environment
-echo -e "${YELLOW}Activating virtual environment...${NC}"
-source "$VENV_DIR/bin/activate"
-
-# Force pip to use this venv and install packages
-export PIP_NO_INPUT=1
-export PIP_REQUIRE_VIRTUALENV=1
-
-# Upgrade pip
-echo -e "${YELLOW}Upgrading pip...${NC}"
-pip install --upgrade pip
-
-echo ""
-echo -e "${BLUE}Installing dependencies for $BACKEND backend...${NC}"
-echo ""
-
-if [ "$BACKEND" = "nvidia" ]; then
-    # NVIDIA/CUDA backend
-    echo -e "${YELLOW}Installing PyTorch with CUDA support...${NC}"
-    pip install "torch>=2.0.0" "torchvision>=0.15.0" "torchaudio>=2.0.0"
-    
-    echo -e "${YELLOW}Installing NVIDIA-specific requirements...${NC}"
-    pip install -r requirements-nvidia.txt || {
-        echo -e "${YELLOW}Warning: Some NVIDIA packages failed to install${NC}"
-    }
-    
-    # Install Flash Attention 2 if requested
-    if [ "$FLASH" = true ]; then
-        echo ""
-        echo -e "${YELLOW}Installing Flash Attention 2...${NC}"
-        echo -e "${YELLOW}This may take several minutes and requires CUDA 11.6+${NC}"
-        MAX_JOBS=5 NVCC_THREADS=2  pip install flash-attn --no-build-isolation || {
-            echo -e "${RED}Warning: Flash Attention 2 installation failed${NC}"
-            echo -e "${YELLOW}Requirements: CUDA 11.6+, Linux, Ampere/Ada/Hopper GPU${NC}"
-            echo -e "${YELLOW}Continuing without Flash Attention...${NC}"
-        }
-    fi
-    
-    echo ""
-    echo -e "${GREEN}========================================${NC}"
-    echo -e "${GREEN}  NVIDIA/CUDA build complete!${NC}"
-    echo -e "${GREEN}========================================${NC}"
-    echo ""
-    echo "Usage:"
-    echo "  source $VENV_DIR/bin/activate"
-    echo "  python coderai --model <huggingface-model-name>"
-    if [ "$FLASH" = true ]; then
-        echo ""
-        echo "Flash Attention 2 enabled - use --flash-attn flag when running"
-    fi
-    echo ""
-    echo "Example:"
-    echo "  python coderai --model microsoft/DialoGPT-medium"
-    echo ""
-    echo -e "${GREEN}========================================${NC}"
-    echo -e "${GREEN}  NVIDIA/CUDA build complete!${NC}"
-    echo -e "${GREEN}========================================${NC}"
-    echo ""
-    echo "Usage:"
-    echo "  source $VENV_DIR/bin/activate"
-    echo "  python coderai --model <huggingface-model-name>"
-    echo ""
-    echo "Example:"
-    echo "  python coderai --model microsoft/DialoGPT-medium"
-    echo ""
-    
-elif [ "$BACKEND" = "vulkan" ]; then
-    # Vulkan backend (all GPUs)
-    echo -e "${YELLOW}Installing llama-cpp-python with Vulkan support (all GPUs)...${NC}"
-    
-    # Check for required Vulkan development libraries
-    if ! pkg-config --exists vulkan 2>/dev/null; then
-        echo -e "${YELLOW}Warning: Vulkan development libraries not found via pkg-config${NC}"
-        echo -e "${YELLOW}You may need to install Vulkan drivers and SDK:${NC}"
-        echo "  Debian/Ubuntu: sudo apt install libvulkan-dev vulkan-tools"
-        echo "  Fedora: sudo dnf install vulkan-loader-devel vulkan-tools"
-        echo "  Arch: sudo pacman -S vulkan-headers vulkan-icd-loader"
-        echo ""
-        echo -e "${YELLOW}Attempting installation anyway...${NC}"
-    fi
-    
-    # Check for glslc (Vulkan shader compiler)
-    GLSLC_CMD=""
-    if command -v glslc &> /dev/null; then
-        GLSLC_CMD="glslc"
-    elif command -v glslangValidator &> /dev/null; then
-        GLSLC_CMD="glslangValidator"
-    fi
-    
-    if [ -z "$GLSLC_CMD" ]; then
-        echo -e "${YELLOW}Warning: glslc/glslangValidator not found in PATH${NC}"
-    else
-        echo -e "${GREEN}✓ Found Vulkan shader compiler: $GLSLC_CMD${NC}"
-    fi
-    
-    # Build with Vulkan support
-    echo -e "${YELLOW}Building llama-cpp-python with Vulkan support...${NC}"
-    CMAKE_ARGS="-DGGML_VULKAN=ON" pip install --upgrade llama-cpp-python --no-cache-dir || {
-        echo -e "${RED}Build failed!${NC}"
-        exit 1
-    }
-    
-    echo -e "${YELLOW}Installing Vulkan-specific requirements...${NC}"
-    pip install -r requirements-vulkan.txt
-    
-    # Build whispercpp Python package with Vulkan support for GPU-accelerated audio transcription
-    echo -e "${YELLOW}Building whispercpp with Vulkan support for GPU-accelerated transcription...${NC}"
-    
-    # First, uninstall any existing whispercpp (pip version doesn't have Vulkan)
-    pip uninstall -y whispercpp 2>/dev/null || true
-    
-    # Clone and build whisper.cpp with Vulkan for Python bindings
-    WHISPERCPP_DIR="$HOME/whisper.cpp"
-    if [ ! -d "$WHISPERCPP_DIR" ]; then
-        echo "Cloning whisper.cpp..."
-        git clone --depth 1 https://github.com/ggerganov/whisper.cpp "$WHISPERCPP_DIR" 2>/dev/null || {
-            echo -e "${YELLOW}Warning: Could not clone whisper.cpp${NC}"
-        }
-    fi
-    
-    if [ -d "$WHISPERCPP_DIR/bindings/python" ]; then
-        cd "$WHISPERCPP_DIR/bindings/python"
-        
-        # Build with Vulkan support
-        # Set CMAKE_ARGS to enable Vulkan for ggml (whisper uses ggml library internally)
-        CMAKE_ARGS="-DWHISPER_VULKAN=ON -DGGML_VULKAN=ON" pip install . --no-cache-dir --force-reinstall 2>/dev/null || {
-            # If Vulkan build fails, try without (will fall back to CPU)
-            echo -e "${YELLOW}Warning: whispercpp Vulkan build failed, will use CPU${NC}"
-            pip install . --no-cache-dir --force-reinstall 2>/dev/null || {
-                echo -e "${YELLOW}Warning: Could not install whispercpp at all${NC}"
-            }
-        }
-        cd "$OLDPWD"
-        echo -e "${GREEN}✓ whispercpp with Vulkan support installed!${NC}"
-    else
-        echo -e "${YELLOW}Warning: whisper.cpp Python bindings not found${NC}"
-    fi
-    
-    # Also build the main whisper.cpp C++ with Vulkan for standalone usage
-    echo -e "${YELLOW}Building whisper.cpp C++ with Vulkan support (optional)...${NC}"
-    WHISPER_DIR="$HOME/whisper.cpp"
-    if [ -d "$WHISPER_DIR" ]; then
-        echo "Using existing whisper.cpp installation"
-    else
-        echo "Cloning whisper.cpp..."
-        git clone https://github.com/ggerganov/whisper.cpp "$WHISPER_DIR" 2>/dev/null || {
-            echo -e "${YELLOW}Warning: Could not clone whisper.cpp. Audio transcription will use CPU.${NC}"
-        }
-    fi
-    
-    if [ -d "$WHISPER_DIR" ]; then
-        cd "$WHISPER_DIR"
-        mkdir -p build 2>/dev/null
-        cd build
-        cmake -DGGML_VULKAN=ON .. >/dev/null 2>&1 || {
-            echo -e "${YELLOW}Warning: Vulkan build failed, building with OpenBLAS${NC}"
-            cmake -DBUILD_SHARED_LIBS=ON .. >/dev/null 2>&1
-        }
-        make -j$(nproc) >/dev/null 2>&1 || {
-            echo -e "${YELLOW}Warning: Build failed. Audio transcription will use CPU.${NC}"
-        }
-        cd "$OLDPWD"
-        
-        if [ ! -f "$WHISPER_DIR/models/ggml-base.bin" ]; then
-            echo "Downloading Whisper base model..."
-            bash "$WHISPER_DIR/models/download-ggml-model.sh" base 2>/dev/null || {
-                echo -e "${YELLOW}Warning: Could not download Whisper model.${NC}"
-            }
-        fi
-        echo -e "${GREEN}✓ whisper.cpp ready for audio transcription!${NC}"
-    fi
-    
-    echo ""
-    echo -e "${GREEN}========================================${NC}"
-    echo -e "${GREEN}  Vulkan build complete!${NC}"
-    echo -e "${GREEN}========================================${NC}"
-    echo ""
-    echo "Usage:"
-    echo "  python coderai --model <gguf-model> --backend vulkan"
-    echo ""
-    
-elif [ "$BACKEND" = "vulkan-nvidia" ]; then
-    # Vulkan backend (NVIDIA only)
-    echo -e "${YELLOW}Installing llama-cpp-python with Vulkan support (NVIDIA-only)...${NC}"
-    
-    # Check for required Vulkan development libraries
-    if ! pkg-config --exists vulkan 2>/dev/null; then
-        echo -e "${YELLOW}Warning: Vulkan development libraries not found via pkg-config${NC}"
-    fi
-    
-    # Check for glslc (Vulkan shader compiler)
-    GLSLC_CMD=""
-    if command -v glslc &> /dev/null; then
-        GLSLC_CMD="glslc"
-    elif command -v glslangValidator &> /dev/null; then
-        GLSLC_CMD="glslangValidator"
-    fi
-    
-    if [ -z "$GLSLC_CMD" ]; then
-        echo -e "${YELLOW}Warning: glslc/glslangValidator not found in PATH${NC}"
-    else
-        echo -e "${GREEN}✓ Found Vulkan shader compiler: $GLSLC_CMD${NC}"
-    fi
-    
-    # Build with Vulkan support
-    # Note: llama.cpp doesn't have a compile-time option to disable specific GPUs
-    # The device selection happens at runtime via environment variables
-    echo -e "${YELLOW}Building llama-cpp-python with Vulkan support...${NC}"
-    CMAKE_ARGS="-DGGML_VULKAN=ON" pip install --upgrade llama-cpp-python --no-cache-dir || {
-        echo -e "${RED}Build failed!${NC}"
-        exit 1
-    }
-    
-    echo -e "${YELLOW}Installing Vulkan-specific requirements...${NC}"
-    pip install -r requirements-vulkan.txt
-    
-    # Build whispercpp Python package with Vulkan support for GPU-accelerated audio transcription
-    echo -e "${YELLOW}Building whispercpp with Vulkan support for GPU-accelerated transcription...${NC}"
-    pip uninstall -y whispercpp 2>/dev/null || true
-    WHISPERCPP_DIR="$HOME/whisper.cpp"
-    if [ ! -d "$WHISPERCPP_DIR" ]; then
-        git clone --depth 1 https://github.com/ggerganov/whisper.cpp "$WHISPERCPP_DIR" 2>/dev/null || true
-    fi
-    if [ -d "$WHISPERCPP_DIR/bindings/python" ]; then
-        cd "$WHISPERCPP_DIR/bindings/python"
-        CMAKE_ARGS="-DWHISPER_VULKAN=ON -DGGML_VULKAN=ON" pip install . --no-cache-dir --force-reinstall 2>/dev/null || {
-            pip install . --no-cache-dir --force-reinstall 2>/dev/null || true
-        }
-        cd "$OLDPWD"
-        echo -e "${GREEN}✓ whispercpp with Vulkan support installed!${NC}"
-    fi
-    
-    echo ""
-    echo -e "${GREEN}========================================${NC}"
-    echo -e "${GREEN}  Vulkan (NVIDIA-only) build complete!${NC}"
-    echo -e "${GREEN}========================================${NC}"
-    echo ""
-    echo "Usage:"
-    echo "  VK_ICD_FILENAMES=/usr/share/vulkan/icd.d/nvidia_icd.json \\"
-    echo "  python coderai --model <gguf-model> --backend vulkan"
-    echo ""
-    echo "Note: This build includes both AMD and NVIDIA Vulkan support."
-    echo "      At runtime, use VK_ICD_FILENAMES to select only NVIDIA."
-    echo ""
- 
-elif [ "$BACKEND" = "cuda" ]; then
-    # llama-cpp-python with CUDA backend (NVIDIA only)
-    echo -e "${YELLOW}Installing llama-cpp-python with CUDA support...${NC}"
-    
-    # Check for CUDA toolkit
-    if ! command -v nvcc &> /dev/null; then
-        echo -e "${YELLOW}Warning: CUDA toolkit (nvcc) not found in PATH${NC}"
-        echo -e "${YELLOW}You may need to install CUDA toolkit:${NC}"
-        echo "  Download from: https://developer.nvidia.com/cuda-downloads"
-    else
-        CUDA_VERSION=$(nvcc --version | grep "release" | sed -n 's/.*release \([0-9.]*\),.*/\1/p')
-        echo -e "${GREEN}✓ Found CUDA $CUDA_VERSION${NC}"
-    fi
-    
-    # Check for CUDA libraries
-    if [ -d "/usr/local/cuda" ]; then
-        echo -e "${GREEN}✓ Found CUDA at /usr/local/cuda${NC}"
-    fi
-    
-    # Build llama-cpp-python with CUDA support
-    echo -e "${YELLOW}Building llama-cpp-python with CUDA support...${NC}"
-    echo -e "${YELLOW}This may take several minutes...${NC}"
-    CMAKE_ARGS="-DGGML_CUDA=ON" pip install --upgrade llama-cpp-python --no-cache-dir || {
-        echo ""
-        echo -e "${RED}Build failed!${NC}"
-        echo -e "${YELLOW}Make sure CUDA toolkit is installed:${NC}"
-        echo "  sudo apt install cuda-toolkit-12"
-        echo "  or"
-        echo "  Download from: https://developer.nvidia.com/cuda-downloads"
-        exit 1
-    }
-    
-    echo -e "${YELLOW}Installing Vulkan-specific requirements...${NC}"
-    pip install -r requirements-vulkan.txt
-    
-    echo ""
-    echo -e "${GREEN}========================================${NC}"
-    echo -e "${GREEN}  llama-cpp-python CUDA build complete!${NC}"
-    echo -e "${GREEN}========================================${NC}"
-    echo ""
-    echo "Usage:"
-    echo "  source $VENV_DIR/bin/activate"
-    echo "  python coderai --model <gguf-model> --backend vulkan --vulkan-device 0"
-    echo ""
-    echo "Note: With CUDA backend, llama-cpp-python will only use NVIDIA GPUs."
-    echo ""
-elif [ "$BACKEND" = "opencl" ]; then
-    # stable-diffusion-cpp-python with OpenCL backend
-    echo -e "${YELLOW}Installing stable-diffusion-cpp-python with OpenCL support...${NC}"
-    
-    # Check for OpenCL
-    if ! command -v clinfo &> /dev/null && ! ls /usr/lib/*/libOpenCL* &> /dev/null; then
-        echo -e "${YELLOW}Warning: OpenCL not found in system${NC}"
-        echo -e "${YELLOW}You may need to install OpenCL runtime:${NC}"
-        echo "  Debian/Ubuntu: sudo apt install ocl-icd-opencl-dev"
-        echo "  Fedora: sudo dnf install ocl-icd-devel"
-    else
-        echo -e "${GREEN}✓ Found OpenCL${NC}"
-    fi
-    
-    # Install base requirements
-    echo -e "${YELLOW}Installing base requirements...${NC}"
-    pip install -r requirements.txt
-    
-    # Install stable-diffusion-cpp-python with OpenCL (disable WebM to avoid libwebm cmake issue)
-    echo -e "${YELLOW}Installing stable-diffusion-cpp-python with OpenCL...${NC}"
-    CMAKE_ARGS="$SD_CMAKE_ARGS" pip install stable-diffusion-cpp-python || {
-        echo ""
-        echo -e "${YELLOW}Note: If stable-diffusion-cpp-python is not available with pip,${NC}"
-        echo -e "${YELLOW}you may need to build from source.${NC}"
-    }
-    
-    # Install additional requirements for OpenCL
-    echo -e "${YELLOW}Installing additional requirements for OpenCL...${NC}"
-    pip install numpy pillow
-    
-    echo ""
-    echo -e "${GREEN}========================================${NC}"
-    echo -e "${GREEN}  OpenCL build complete!${NC}"
-    echo -e "${GREEN}========================================${NC}"
-    echo ""
-    echo "Usage:"
-    echo "  source $VENV_DIR/bin/activate"
-    echo "  python coderai --model <model> --image-backend opencl"
-    echo ""
-    echo "Note: With OpenCL backend, stable-diffusion-cpp-python can use various GPUs."
-    echo ""
-
-elif [ "$BACKEND" = "all" ]; then
-    # Install ALL backends: nvidia (CUDA), vulkan, opencl, and cpu
-    echo -e "${BLUE}========================================${NC}"
-    echo -e "${BLUE}  Installing ALL backends${NC}"
-    echo -e "${BLUE}  (NVIDIA/CUDA, Vulkan, OpenCL, CPU)${NC}"
-    echo -e "${BLUE}========================================${NC}"
-    echo ""
-    
-    # Install base requirements
-    echo -e "${YELLOW}Installing base requirements...${NC}"
-    pip install --upgrade pip
-    
-    # Install requirements with error handling for problematic packages
-    echo -e "${YELLOW}Installing core dependencies...${NC}"
-    pip install -r requirements.txt || {
-        echo -e "${YELLOW}Some packages failed to install, trying individually...${NC}"
-        
-        # Install core packages that should always work
-        pip install fastapi uvicorn pydantic requests python-multipart psutil || {
-            echo -e "${RED}Failed to install core dependencies${NC}"
-            exit 1
-        }
-        
-        # Try optional packages individually
-        echo -e "${YELLOW}Installing optional packages...${NC}"
-        pip install transformers accelerate diffusers safetensors || echo -e "${YELLOW}Warning: Some ML packages failed${NC}"
-        pip install faster-whisper || echo -e "${YELLOW}Warning: faster-whisper failed${NC}"
-        pip install whispercpp || echo -e "${YELLOW}Warning: whispercpp failed${NC}"
-        pip install litellm || echo -e "${YELLOW}Warning: litellm failed${NC}"
-        
-        pip install setproctitle || echo -e "${YELLOW}Warning: setproctitle failed (optional)${NC}"
-
-        # Try stable-diffusion-cpp-python (disable WebM to avoid missing libwebm cmake submodule)
-        CMAKE_ARGS="$SD_CMAKE_ARGS" pip install stable-diffusion-cpp-python || echo -e "${YELLOW}Warning: stable-diffusion-cpp-python failed (optional)${NC}"
-    }
-    
-    # Install PyTorch with CUDA support (for nvidia backend)
-    echo -e "${YELLOW}Installing PyTorch with CUDA support (NVIDIA backend)...${NC}"
-    pip install torch torchvision torchaudio || {
-        echo -e "${YELLOW}Warning: PyTorch installation failed, will try CPU version${NC}"
-        pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu || {
-            echo -e "${RED}Failed to install PyTorch${NC}"
-            exit 1
-        }
-    }
-    
-    echo -e "${YELLOW}Installing NVIDIA-specific requirements...${NC}"
-    pip install -r requirements-nvidia.txt || {
-        echo -e "${YELLOW}Warning: Some NVIDIA packages failed to install${NC}"
-    }
-    
-    # Check for Vulkan development libraries
-    VULKAN_AVAILABLE=false
-    if pkg-config --exists vulkan 2>/dev/null; then
-        VULKAN_AVAILABLE=true
-        echo -e "${GREEN}✓ Found Vulkan development libraries${NC}"
-    else
-        echo -e "${YELLOW}Warning: Vulkan development libraries not found${NC}"
-        echo -e "${YELLOW}  Vulkan support will be limited${NC}"
-    fi
-    
-    # Check for CUDA
-    CUDA_AVAILABLE=false
-    if command -v nvcc &> /dev/null || [ -d "/usr/local/cuda" ]; then
-        CUDA_AVAILABLE=true
-        echo -e "${GREEN}✓ Found CUDA toolkit${NC}"
-    else
-        echo -e "${YELLOW}Warning: CUDA toolkit not found${NC}"
-        echo -e "${YELLOW}  CUDA support will be limited${NC}"
-    fi
-    
-    # Check for OpenCL
-    OPENCL_AVAILABLE=false
-    if command -v clinfo &> /dev/null || ls /usr/lib/*/libOpenCL* &> /dev/null 2>&1; then
-        OPENCL_AVAILABLE=true
-        echo -e "${GREEN}✓ Found OpenCL${NC}"
-    else
-        echo -e "${YELLOW}Warning: OpenCL not found${NC}"
-        echo -e "${YELLOW}  OpenCL support will be limited${NC}"
-    fi
-    
-    # Build llama-cpp-python with both CUDA and Vulkan support
-    echo -e "${YELLOW}Building llama-cpp-python with CUDA and Vulkan support...${NC}"
-    echo -e "${YELLOW}This may take several minutes...${NC}"
-    
-    # Determine CMAKE_ARGS based on available hardware
-    CMAKE_ARGS=""
-    if [ "$CUDA_AVAILABLE" = true ]; then
-        CMAKE_ARGS="-DGGML_CUDA=ON"
-        echo -e "${GREEN}  ✓ Enabling CUDA support${NC}"
-    fi
-    
-    if [ "$VULKAN_AVAILABLE" = true ]; then
-        if [ -n "$CMAKE_ARGS" ]; then
-            CMAKE_ARGS="$CMAKE_ARGS -DGGML_VULKAN=ON"
-        else
-            CMAKE_ARGS="-DGGML_VULKAN=ON"
-        fi
-        echo -e "${GREEN}  ✓ Enabling Vulkan support${NC}"
-    fi
-    
-    if [ -n "$CMAKE_ARGS" ]; then
-        echo -e "${YELLOW}  Building with: $CMAKE_ARGS${NC}"
-        CMAKE_ARGS="$CMAKE_ARGS" pip install --upgrade llama-cpp-python --no-cache-dir || {
-            echo -e "${YELLOW}Warning: llama-cpp-python build failed, installing from pip${NC}"
-            pip install llama-cpp-python
-        }
-    else
-        echo -e "${YELLOW}Warning: No GPU backends available, installing CPU version${NC}"
-        pip install llama-cpp-python
-    fi
-    
-    # Install Vulkan-specific requirements
-    echo -e "${YELLOW}Installing Vulkan-specific requirements...${NC}"
-    pip install -r requirements-vulkan.txt || {
-        echo -e "${YELLOW}Warning: Some Vulkan packages failed to install${NC}"
-    }
-    
-    # Try to install stable-diffusion-cpp-python with OpenCL
-    if [ "$OPENCL_AVAILABLE" = true ]; then
-        echo -e "${YELLOW}Installing stable-diffusion-cpp-python with OpenCL support...${NC}"
-        CMAKE_ARGS="$SD_CMAKE_ARGS" pip install stable-diffusion-cpp-python || {
-            echo -e "${YELLOW}Warning: stable-diffusion-cpp-python not available (requires CMake and build tools)${NC}"
-        }
-    else
-        echo -e "${YELLOW}Skipping OpenCL (stable-diffusion-cpp-python) - OpenCL not available${NC}"
-    fi
-
-    # Install additional requirements
-    echo -e "${YELLOW}Installing additional requirements...${NC}"
-    pip install numpy pillow || {
-        echo -e "${YELLOW}Warning: Some additional packages failed${NC}"
-    }
-
-    # Install setproctitle for process naming (Python 3.13 compatible)
-    echo -e "${YELLOW}Installing setproctitle...${NC}"
-    pip install setproctitle || {
-        echo -e "${YELLOW}Note: setproctitle failed to install (optional package, not critical)${NC}"
-    }
-    
-    # Install Flash Attention 2 if requested and CUDA is available
-    if [ "$FLASH" = true ] && [ "$CUDA_AVAILABLE" = true ]; then
-        echo ""
-        echo -e "${YELLOW}Installing Flash Attention 2...${NC}"
-        echo -e "${YELLOW}This may take several minutes and requires CUDA 11.6+${NC}"
-        MAX_JOBS=6 pip install flash-attn --no-build-isolation || {
-            echo -e "${RED}Warning: Flash Attention 2 installation failed${NC}"
-            echo -e "${YELLOW}Requirements: CUDA 11.6+, Linux, Ampere/Ada/Hopper GPU${NC}"
-            echo -e "${YELLOW}Continuing without Flash Attention...${NC}"
-        }
-    elif [ "$FLASH" = true ]; then
-        echo -e "${YELLOW}Warning: Flash Attention 2 requires CUDA backend${NC}"
-        echo -e "${YELLOW}Skipping Flash Attention installation${NC}"
-    fi
-    
-    echo ""
-    echo -e "${GREEN}========================================${NC}"
-    echo -e "${GREEN}  ALL backends build complete!${NC}"
-    echo -e "${GREEN}========================================${NC}"
-    echo ""
-    echo "Available backends:"
-    [ "$CUDA_AVAILABLE" = true ] && echo "  ✓ NVIDIA/CUDA (PyTorch)"
-    [ "$CUDA_AVAILABLE" = true ] && echo "  ✓ CUDA (llama-cpp-python)"
-    [ "$VULKAN_AVAILABLE" = true ] && echo "  ✓ Vulkan (llama-cpp-python)"
-    [ "$OPENCL_AVAILABLE" = true ] && echo "  ✓ OpenCL (stable-diffusion-cpp-python)"
-    echo "  ✓ CPU (fallback for all)"
-    if [ "$FLASH" = true ] && [ "$CUDA_AVAILABLE" = true ]; then
-        echo ""
-        echo "  ✓ Flash Attention 2 (NVIDIA)"
-    fi
-    echo ""
-    echo "Usage:"
-    echo "  source $VENV_DIR/bin/activate"
-    echo ""
-    echo "  # For text models with NVIDIA:"
-    echo "  python coderai --model <model> --backend nvidia"
-    if [ "$FLASH" = true ]; then
-        echo "  python coderai --model <model> --backend nvidia --flash-attn"
-    fi
-    echo ""
-    echo "  # For GGUF models with CUDA:"
-    echo "  python coderai --model <gguf-model> --backend vulkan"
-    echo ""
-    echo "  # For GGUF models with Vulkan:"
-    echo "  python coderai --model <gguf-model> --backend vulkan"
-    echo ""
-    echo "  # For image generation with OpenCL:"
-    echo "  python coderai --model <model> --image-backend opencl"
-    echo ""
-fi
-
-# Create .backend file to track which backend was used
-echo "$BACKEND" > .backend
-
-echo -e "${GREEN}Build completed successfully!${NC}"
-echo ""
-echo "To activate the environment in the future, run:"
-echo "  source $VENV_DIR/bin/activate"
--- a/codai/admin/auth.py
+++ b/codai/admin/auth.py
@@ -65,23 +65,37 @@ class SessionManager:
        self.config_dir = config_dir
        self.secret = get_or_create_secret(config_dir)
        self.session_timeout = timedelta(minutes=session_timeout_minutes)
+        self._lock = __import__('threading').Lock()
    
    def _load_auth_data(self) -> Dict[str, Any]:
        """Load auth.json data."""
        auth_path = self.config_dir / "auth.json"
        if auth_path.exists():
-            with open(auth_path, 'r') as f:
-                return json.load(f)
+            try:
+                with open(auth_path, 'r') as f:
+                    content = f.read()
+                if content.strip():
+                    return json.loads(content)
+            except (json.JSONDecodeError, OSError):
+                pass
        return {"users": [], "tokens": [], "sessions": {}}
    
    def _save_auth_data(self, auth_data: Dict[str, Any]):
-        """Save auth.json data."""
+        """Save auth.json data atomically."""
        auth_path = self.config_dir / "auth.json"
-        # Atomic write
-        temp_path = auth_path.with_suffix('.tmp')
-        with open(temp_path, 'w') as f:
-            json.dump(auth_data, f, indent=2)
-        temp_path.replace(auth_path)
+        with self._lock:
+            import os, tempfile
+            fd, tmp = tempfile.mkstemp(dir=str(self.config_dir), suffix='.tmp')
+            try:
+                with os.fdopen(fd, 'w') as f:
+                    json.dump(auth_data, f, indent=2)
+                os.replace(tmp, str(auth_path))
+            except Exception:
+                try:
+                    os.unlink(tmp)
+                except OSError:
+                    pass
+                raise
    
    def create_session(self, username: str) -> str:
        """Create a new session for a user.

--- a/codai/admin/routes.py
+++ b/codai/admin/routes.py
@@ -3,10 +3,14 @@ from pathlib import Path
 from typing import Optional

 from fastapi import APIRouter, Request, Response, Form, HTTPException, Depends
-from fastapi.responses import HTMLResponse, RedirectResponse, JSONResponse
+from fastapi.responses import HTMLResponse, RedirectResponse, JSONResponse, StreamingResponse
 from fastapi.templating import Jinja2Templates

 from codai.admin.auth import SessionManager
+import queue as _q
+import threading as _t
+import uuid as _uuid
+import json as _j


 router = APIRouter()
@@ -17,6 +21,9 @@ templates = Jinja2Templates(directory=str(templates_dir))

 # Session manager (will be initialized in main.py)
 session_manager: Optional[SessionManager] = None
+config_manager = None  # set via set_config_manager()
+_download_sessions: dict = {}
+_download_status: dict = {}   # session_id → latest progress state (survives SSE disconnect)


 def init_session_manager(config_dir: Path):
@@ -25,6 +32,12 @@ def init_session_manager(config_dir: Path):
    session_manager = SessionManager(config_dir)


+def set_config_manager(mgr):
+    """Set the shared ConfigManager instance."""
+    global config_manager
+    config_manager = mgr
+
+
 def get_current_user(request: Request) -> Optional[str]:
    """Get the current logged-in user from session cookie."""
    if session_manager is None:
@@ -65,10 +78,7 @@ async def login_page(request: Request):
    if username:
        return RedirectResponse(url="/admin", status_code=302)
    
-    return templates.TemplateResponse("login.html", {
-        "request": request,
-        "error": None
-    })
+    return templates.TemplateResponse(request, "login.html", {"error": None})


 @router.post("/login")
@@ -80,14 +90,11 @@ async def login(
    """Handle login form submission."""
    if session_manager is None:
        raise HTTPException(status_code=500, detail="Session manager not initialized")
-    
+
    session_cookie = session_manager.authenticate(username, password)
-    
+
    if not session_cookie:
-        return templates.TemplateResponse("login.html", {
-            "request": request,
-            "error": "Invalid username or password"
-        })
+        return templates.TemplateResponse(request, "login.html", {"error": "Invalid username or password"})
    
    # Check if must change password
    must_change = session_cookie.endswith(".MUST_CHANGE")
@@ -123,110 +130,82 @@ async def logout(request: Request):

 @router.get("/admin/change-password", response_class=HTMLResponse)
 async def change_password_page(request: Request, username: str = Depends(require_auth)):
-    """Display password change page."""
    user = session_manager.get_user(username)
    must_change = user.get("must_change_password", False) if user else False
-    
-    return templates.TemplateResponse("change_password.html", {
-        "request": request,
+    return templates.TemplateResponse(request, "change_password.html", {
        "username": username,
        "must_change": must_change,
-        "error": None
+        "is_admin": session_manager.is_admin(username),
+        "error": None,
    })


 @router.post("/admin/change-password")
 async def change_password(
    request: Request,
-    old_password: str = Form(...),
+    old_password: Optional[str] = Form(None),
    new_password: str = Form(...),
    confirm_password: str = Form(...),
    username: str = Depends(require_auth)
 ):
-    """Handle password change."""
-    if new_password != confirm_password:
-        return templates.TemplateResponse("change_password.html", {
-            "request": request,
-            "username": username,
-            "must_change": False,
-            "error": "Passwords do not match"
+    user = session_manager.get_user(username)
+    is_admin = session_manager.is_admin(username)
+    must_change = user.get("must_change_password", False) if user else False
+
+    def render_error(msg: str):
+        return templates.TemplateResponse(request, "change_password.html", {
+            "username": username, "must_change": must_change,
+            "is_admin": is_admin, "error": msg,
        })
-    
+
+    if new_password != confirm_password:
+        return render_error("Passwords do not match")
    if len(new_password) < 8:
-        return templates.TemplateResponse("change_password.html", {
-            "request": request,
-            "username": username,
-            "must_change": False,
-            "error": "Password must be at least 8 characters"
-        })
-    
-    # Check if this is a forced change (first login)
-    user = session_manager.get_user(username)
-    if user and user.get("must_change_password"):
-        # Force change without verifying old password
+        return render_error("Password must be at least 8 characters")
+
+    if must_change:
        success = session_manager.force_password_change(username, new_password)
    else:
+        if not old_password:
+            return render_error("Current password is required")
        success = session_manager.change_password(username, old_password, new_password)
-    
+
    if not success:
-        return templates.TemplateResponse("change_password.html", {
-            "request": request,
-            "username": username,
-            "must_change": False,
-            "error": "Current password is incorrect"
-        })
-    
+        return render_error("Current password is incorrect")
+
    return RedirectResponse(url="/admin", status_code=302)


 @router.get("/admin", response_class=HTMLResponse)
 async def admin_dashboard(request: Request, username: str = Depends(require_auth)):
-    """Display admin dashboard."""
    is_admin = session_manager.is_admin(username)
-    
-    return templates.TemplateResponse("dashboard.html", {
-        "request": request,
-        "username": username,
-        "is_admin": is_admin
+    return templates.TemplateResponse(request, "dashboard.html", {
+        "username": username, "is_admin": is_admin,
    })


 @router.get("/admin/models", response_class=HTMLResponse)
 async def models_page(request: Request, username: str = Depends(require_admin)):
-    """Display models management page."""
-    return templates.TemplateResponse("models.html", {
-        "request": request,
-        "username": username
-    })
+    return templates.TemplateResponse(request, "models.html", {"username": username, "is_admin": True})


 @router.get("/admin/tokens", response_class=HTMLResponse)
 async def tokens_page(request: Request, username: str = Depends(require_admin)):
-    """Display API tokens management page."""
-    return templates.TemplateResponse("tokens.html", {
-        "request": request,
-        "username": username
-    })
+    return templates.TemplateResponse(request, "tokens.html", {"username": username, "is_admin": True})


 @router.get("/admin/users", response_class=HTMLResponse)
 async def users_page(request: Request, username: str = Depends(require_admin)):
-    """Display users management page."""
    users = session_manager.list_users()
-    
-    return templates.TemplateResponse("users.html", {
-        "request": request,
-        "username": username,
-        "users": users
+    return templates.TemplateResponse(request, "users.html", {
+        "username": username, "is_admin": True, "users": users,
    })


 @router.get("/chat", response_class=HTMLResponse)
 async def chat_page(request: Request, username: str = Depends(require_auth)):
-    """Display chat interface."""
-    return templates.TemplateResponse("chat.html", {
-        "request": request,
-        "username": username
+    return templates.TemplateResponse(request, "chat.html", {
+        "username": username, "is_admin": session_manager.is_admin(username),
    })


@@ -234,12 +213,62 @@ async def chat_page(request: Request, username: str = Depends(require_auth)):
 @router.get("/admin/api/status")
 async def api_status(username: str = Depends(require_auth)):
    """Get system status."""
-    # TODO: Implement actual status gathering
+    from codai.models.manager import multi_model_manager
+    from codai.api.state import get_load_mode
+
+    loaded_keys = list(multi_model_manager.models.keys())
+
+    # VRAM info
+    vram = None
+    try:
+        import torch
+        if torch.cuda.is_available():
+            free, total = torch.cuda.mem_get_info()
+            used = total - free
+            vram = {"used": round(used / 1e9, 2), "total": round(total / 1e9, 2)}
+    except Exception:
+        pass
+
+    # Request stats from queue manager
+    req_total = 0
+    req_active = 0
+    try:
+        from codai.queue.manager import queue_manager
+        req_active = 1 if queue_manager._processing else 0
+    except Exception:
+        pass
+
+    # Backend info
+    backend = "—"
+    try:
+        from codai.models.manager import model_manager
+        if model_manager.backend_type:
+            backend = model_manager.backend_type
+    except Exception:
+        pass
+
+    # Enabled (configured) models
+    enabled_models = []
+    try:
+        if config_manager:
+            md = config_manager.models_data
+            for cat in ("text_models", "image_models", "audio_models", "vision_models", "tts_models"):
+                for m in md.get(cat, []):
+                    mid = (m.get("path") or m.get("id") or m) if isinstance(m, dict) else m
+                    if mid and mid not in enabled_models:
+                        enabled_models.append(mid)
+    except Exception:
+        pass
+
    return {
        "status": "ok",
-        "backend": "auto",
-        "models_loaded": 0,
-        "uptime": "0h 0m"
+        "backend": backend,
+        "load_mode": get_load_mode(),
+        "models_loaded": len(loaded_keys),
+        "loaded_models": loaded_keys,
+        "enabled_models": enabled_models,
+        "vram": vram,
+        "requests": {"total": req_total, "active": req_active},
    }


@@ -359,10 +388,194 @@ async def api_delete_token(token_id: int, username: str = Depends(require_admin)
 async def api_list_models(username: str = Depends(require_admin)):
    """List all configured models with details."""
    models_data = session_manager._load_auth_data()  # TODO: move to ModelManager
-    # For now, load from models file directly
-    models_path = Path.cwd() / "codai" / "admin" / "templates"  # hack
-    # Actually use config_mgr
-    pass
+    from codai.models.manager import multi_model_manager
+    try:
+        return multi_model_manager.list_models()
+    except Exception:
+        return []
+
+
+def _make_tqdm_class(pq, status=None):
+    """Return a tqdm-compatible class that forwards progress events to pq and optionally updates a status dict."""
+    import time as _time
+
+    class _PQTqdm:
+        def __init__(self, iterable=None, desc=None, total=None, initial=0, **kwargs):
+            self.iterable = iterable
+            self.desc = str(desc or 'downloading')
+            self.total = int(total) if total else 0
+            self.n = int(initial) if initial else 0
+            self._start = _time.time()
+            if self.total:
+                pq.put({"type": "start", "filename": self.desc, "total": self.total})
+                if status is not None:
+                    status.update({"status": "downloading", "filename": self.desc,
+                                   "total": self.total, "downloaded": self.n, "percent": 0})
+
+        def update(self, n=1):
+            self.n += n
+            elapsed = (_time.time() - self._start) or 0.001
+            rate = self.n / elapsed
+            eta = (self.total - self.n) / rate if rate and self.total else None
+            pct = round(self.n / self.total * 100, 1) if self.total else 0
+            evt = {
+                "type": "progress",
+                "filename": self.desc,
+                "downloaded": self.n,
+                "total": self.total,
+                "percent": pct,
+                "rate": round(rate),
+                "eta": round(eta) if eta is not None else None,
+            }
+            pq.put(evt)
+            if status is not None:
+                status.update({"status": "downloading", "filename": self.desc,
+                               "percent": pct, "rate": round(rate), "eta": evt["eta"],
+                               "downloaded": self.n, "total": self.total})
+
+        def close(self): pass
+        def refresh(self, nolock=False, lock_args=None): pass
+        def clear(self, nolock=False): pass
+        def display(self, msg=None, pos=None): pass
+        def unpause(self): pass
+        def moveto(self, n): pass
+        def set_postfix(self, *a, **kw): pass
+        def set_description(self, desc=None, **kw):
+            if desc: self.desc = str(desc)
+        def set_postfix_str(self, *a, **kw): pass
+        def reset(self, total=None):
+            self.n = 0
+            self._start = _time.time()
+            if total is not None: self.total = int(total)
+        def __enter__(self): return self
+        def __exit__(self, *a): self.close()
+        def __iter__(self):
+            for obj in (self.iterable or []):
+                yield obj
+        def write(self, s, **kw):
+            pq.put({"type": "info", "message": str(s)})
+
+        monitor_interval = 0
+        monitor = None
+        _lock = None
+
+        @classmethod
+        def get_lock(cls):
+            import threading
+            if cls._lock is None:
+                cls._lock = threading.RLock()
+            return cls._lock
+
+        @classmethod
+        def set_lock(cls, lock):
+            cls._lock = lock
+
+    return _PQTqdm
+
+
+def _run_download_thread(session_id: str, model_id: str, file_pattern: str, pq):
+    """Background thread: download model via HF snapshot_download and stream progress events."""
+    import time
+    import os
+
+    status = {"session_id": session_id, "model_id": model_id, "status": "starting",
+              "percent": 0, "filename": "", "rate": 0, "eta": None}
+    _download_status[session_id] = status
+
+    def push(evt):
+        pq.put(evt)
+        t = evt.get("type")
+        if t == "start":
+            status.update({"status": "downloading", "filename": evt.get("filename", ""),
+                           "total": evt.get("total", 0), "downloaded": 0, "percent": 0})
+        elif t == "progress":
+            status.update({"status": "downloading",
+                           "filename": evt.get("filename", status.get("filename", "")),
+                           "percent": evt.get("percent", 0), "rate": evt.get("rate", 0),
+                           "eta": evt.get("eta"), "downloaded": evt.get("downloaded", 0),
+                           "total": evt.get("total", 0)})
+        elif t == "done":
+            status.update({"status": "done", "percent": 100, "path": evt.get("path", "")})
+        elif t == "error":
+            status.update({"status": "error", "error": evt.get("message", "")})
+        elif t == "info":
+            status["last_info"] = evt.get("message", "")
+
+    try:
+        from codai.models.cache import is_huggingface_model_id
+        from huggingface_hub import snapshot_download
+
+        tqdm_cls = _make_tqdm_class(pq, status=status)
+
+        if is_huggingface_model_id(model_id):
+            if file_pattern:
+                # Convert suffix/quant pattern to fnmatch glob for allow_patterns
+                if file_pattern.startswith('.'):
+                    allow = [f"*{file_pattern}"]          # ".gguf" → "*.gguf"
+                elif '/' in file_pattern:
+                    allow = [file_pattern]                  # exact subpath
+                else:
+                    allow = [f"*{file_pattern}"]          # "Q4_K_M.gguf" → "*Q4_K_M.gguf"
+                push({"type": "info", "message": f"Downloading {allow[0]} from {model_id}…"})
+                path = snapshot_download(model_id, allow_patterns=allow, tqdm_class=tqdm_cls)
+            else:
+                push({"type": "info", "message": f"Downloading full repository {model_id}…"})
+                path = snapshot_download(model_id, tqdm_class=tqdm_cls)
+
+        else:
+            # Direct URL download (non-HF source)
+            import requests as _req
+            import hashlib
+            from codai.models.cache import get_model_cache_dir
+
+            cache_dir = get_model_cache_dir()
+            url_path = model_id.split('?')[0]
+            filename = os.path.basename(url_path) or "model.bin"
+            url_hash = hashlib.sha256(model_id.encode()).hexdigest()
+            dest = os.path.join(cache_dir, f"{url_hash}_{filename}")
+
+            if os.path.exists(dest):
+                push({"type": "done", "path": dest})
+                return
+
+            resp = _req.get(model_id, stream=True, timeout=60, allow_redirects=True)
+            resp.raise_for_status()
+            total = int(resp.headers.get('content-length', 0))
+            push({"type": "start", "filename": filename, "total": total})
+
+            downloaded = 0
+            start_t = time.time()
+            last_evt = 0.0
+            with open(dest, 'wb') as f:
+                for chunk in resp.iter_content(chunk_size=524288):
+                    if chunk:
+                        f.write(chunk)
+                        downloaded += len(chunk)
+                        now = time.time()
+                        if now - last_evt >= 0.25:
+                            last_evt = now
+                            elapsed = (now - start_t) or 0.001
+                            rate = downloaded / elapsed
+                            eta = (total - downloaded) / rate if rate and total else None
+                            push({
+                                "type": "progress", "filename": filename,
+                                "downloaded": downloaded, "total": total,
+                                "percent": round(downloaded / total * 100, 1) if total else 0,
+                                "rate": round(rate),
+                                "eta": round(eta) if eta is not None else None,
+                            })
+            path = dest
+
+        push({"type": "done", "path": str(path)})
+
+    except Exception as exc:
+        push({"type": "error", "message": str(exc)})
+    finally:
+        def _gc():
+            time.sleep(300)
+            _download_sessions.pop(session_id, None)
+            _download_status.pop(session_id, None)
+        _t.Thread(target=_gc, daemon=True).start()


 @router.post("/admin/api/model-download")
@@ -370,35 +583,60 @@ async def api_download_model(
    request: Request,
    username: str = Depends(require_admin)
 ):
-    """Download a model from HuggingFace."""
+    """Start a background download; returns session_id for SSE progress streaming."""
    data = await request.json()
    model_id = data.get("model_id")
-    file_pattern = data.get("file_pattern")
-    
+    file_pattern = (data.get("file_pattern") or "").strip()
+
    if not model_id:
        raise HTTPException(status_code=400, detail="Model ID required")
-    
-    from codai.models.cache import download_model, is_huggingface_model_id
-    
-    try:
-        if is_huggingface_model_id(model_id):
-            if file_pattern:
-                cached = download_model(model_id, file_pattern=file_pattern)
-            else:
-                cached = download_model(model_id, file_pattern='.gguf')
-                if not cached:
-                    # Download full repo
-                    from huggingface_hub import snapshot_download
-                    cached = snapshot_download(model_id)
-        else:
-            cached = download_model(model_id, file_pattern=file_pattern or '.gguf')
-        
-        if cached:
-            return {"success": True, "path": cached}
-        else:
-            raise HTTPException(status_code=500, detail="Download failed")
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=f"Error: {str(e)}")
+
+    session_id = str(_uuid.uuid4())
+    pq = _q.Queue()
+    _download_sessions[session_id] = pq
+
+    _t.Thread(
+        target=_run_download_thread,
+        args=(session_id, model_id, file_pattern, pq),
+        daemon=True,
+    ).start()
+
+    return {"session_id": session_id}
+
+
+@router.get("/admin/api/download-stream/{session_id}")
+async def api_download_stream(
+    session_id: str,
+    request: Request,
+    username: str = Depends(require_admin),
+):
+    """Server-Sent Events stream for download progress."""
+    import asyncio
+
+    pq = _download_sessions.get(session_id)
+    if pq is None:
+        raise HTTPException(status_code=404, detail="Download session not found")
+
+    async def _generate():
+        loop = asyncio.get_event_loop()
+        while True:
+            try:
+                evt = await loop.run_in_executor(None, lambda: pq.get(timeout=2))
+                yield f"data: {_j.dumps(evt)}\n\n"
+                if evt.get("type") in ("done", "error"):
+                    break
+            except _q.Empty:
+                yield 'data: {"type":"keepalive"}\n\n'
+
+    return StreamingResponse(
+        _generate(),
+        media_type="text/event-stream",
+        headers={
+            "Cache-Control": "no-cache",
+            "X-Accel-Buffering": "no",
+            "Connection": "keep-alive",
+        },
+    )


 @router.delete("/admin/api/models/{model_identifier}")
@@ -418,6 +656,548 @@ async def api_delete_model(
        raise HTTPException(status_code=500, detail=str(e))


+# --- Download status / cache management ---
+
+@router.get("/admin/api/downloads")
+async def api_list_downloads(username: str = Depends(require_admin)):
+    """Return status of all active and recently completed download sessions."""
+    return list(_download_status.values())
+
+
+@router.post("/admin/api/model-upload")
+async def api_model_upload(request: Request, username: str = Depends(require_admin)):
+    """Upload a GGUF model file in chunks."""
+    from codai.models.cache import get_model_cache_dir
+    import tempfile
+    
+    form = await request.form()
+    chunk = form.get("chunk")
+    filename = form.get("filename", "model.gguf")
+    chunk_index = int(form.get("chunk_index", 0))
+    total_chunks = int(form.get("total_chunks", 1))
+    
+    if not chunk or not hasattr(chunk, "read"):
+        raise HTTPException(status_code=400, detail="No file chunk provided")
+    
+    cache_dir = get_model_cache_dir()
+    temp_dir = tempfile.gettempdir()
+    upload_id = form.get("upload_id", filename)
+    temp_path = os.path.join(temp_dir, f"upload_{upload_id}.part")
+    
+    # Append chunk
+    chunk_data = await chunk.read()
+    with open(temp_path, "ab") as f:
+        f.write(chunk_data)
+    
+    # If last chunk, move to final location
+    if chunk_index == total_chunks - 1:
+        final_path = os.path.join(cache_dir, filename)
+        os.replace(temp_path, final_path)
+        return {"success": True, "complete": True, "path": final_path}
+    
+    return {"success": True, "complete": False, "chunk_index": chunk_index}
+
+
+# ── cache scan helpers (run in thread pool) ──────────────────────────────────
+
+def _scan_caches() -> dict:
+    import os
+    result: dict = {"hf": [], "gguf": []}
+
+    from codai.models.cache import get_all_cache_dirs, get_model_cache_dir
+    caches = get_all_cache_dirs()
+
+    # Collect configured models: key (path/id) → (settings_dict, model_type)
+    configured_settings: dict = {}
+    if config_manager:
+        md = config_manager.models_data
+        for cat in ("text_models", "image_models", "audio_models",
+                    "gguf_models", "tts_models", "vision_models"):
+            for m in md.get(cat, []):
+                if isinstance(m, str):
+                    p = m
+                    configured_settings[p] = ({}, cat)
+                else:
+                    p = m.get("path") or m.get("id") or ""
+                    if p:
+                        configured_settings[p] = (m, cat)
+
+    # HuggingFace cache
+    hf_dir = caches.get("huggingface")
+    if hf_dir:
+        try:
+            from huggingface_hub import scan_cache_dir
+            info = scan_cache_dir(hf_dir)
+            for repo in sorted(info.repos, key=lambda r: r.repo_id):
+                revs = sorted(repo.revisions, key=lambda r: r.commit_hash)
+                size_bytes = sum(r.size_on_disk for r in repo.revisions)
+                files = sorted(f.file_name for f in revs[-1].files) if revs else []
+
+                # If ALL model files are .gguf, treat as GGUF entries not HF entries
+                model_files = [f for f in files if not f.endswith(('.json', '.txt', '.md', '.py', '.gitattributes'))]
+                if model_files and all(f.endswith('.gguf') for f in model_files):
+                    for rev in revs[-1:]:
+                        for hf_file in rev.files:
+                            if not hf_file.file_name.endswith('.gguf'):
+                                continue
+                            fpath = str(hf_file.file_path)
+                            fname = hf_file.file_name
+                            fsize = hf_file.size_on_disk
+                            cfg = (configured_settings.get(fpath)
+                                   or configured_settings.get(fname)
+                                   or ({}, None))
+                            result["gguf"].append({
+                                "filename": fname,
+                                "path": fpath,
+                                "size_gb": round(fsize / 1e9, 2),
+                                "size_bytes": fsize,
+                                "in_config": fpath in configured_settings or fname in configured_settings,
+                                "model_type": cfg[1] if cfg[1] and cfg[1] != "gguf_models" else "text_models",
+                                "settings": cfg[0] if isinstance(cfg[0], dict) else {},
+                            })
+                    continue  # skip adding to hf list
+
+                cfg = configured_settings.get(repo.repo_id, ({}, None))
+                result["hf"].append({
+                    "id": repo.repo_id,
+                    "size_gb": round(size_bytes / 1e9, 2),
+                    "size_bytes": size_bytes,
+                    "revision_count": len(list(repo.revisions)),
+                    "files": files[:30],
+                    "file_count": len(files),
+                    "in_config": repo.repo_id in configured_settings,
+                    "model_type": cfg[1] if cfg[1] and cfg[1] != "gguf_models" else "text_models",
+                    "settings": cfg[0] if isinstance(cfg[0], dict) else {},
+                })
+        except Exception as e:
+            result["hf_error"] = str(e)
+
+    # GGUF cache (coderai-specific)
+    gguf_dir = caches.get("coderai") or get_model_cache_dir()
+    if gguf_dir and os.path.exists(gguf_dir):
+        for fname in sorted(os.listdir(gguf_dir)):
+            fpath = os.path.join(gguf_dir, fname)
+            if os.path.isfile(fpath):
+                size = os.path.getsize(fpath)
+                cfg = (configured_settings.get(fpath)
+                       or configured_settings.get(fname)
+                       or ({}, None))
+                result["gguf"].append({
+                    "filename": fname,
+                    "path": fpath,
+                    "size_gb": round(size / 1e9, 2),
+                    "size_bytes": size,
+                    "in_config": fpath in configured_settings or fname in configured_settings,
+                    "model_type": cfg[1] if cfg[1] and cfg[1] != "gguf_models" else "text_models",
+                    "settings": cfg[0] if isinstance(cfg[0], dict) else {},
+                })
+
+    # Add configured GGUF models not yet in the list (e.g., HF repo IDs or external paths)
+    existing_paths = {m["path"] for m in result["gguf"]}
+    for path, (settings, mtype) in configured_settings.items():
+        if path in existing_paths:
+            continue
+        # Check if it's a GGUF model (ends with .gguf or is in a GGUF repo)
+        is_gguf = path.endswith('.gguf') or 'gguf' in path.lower() or mtype == "gguf_models"
+        if is_gguf:
+            # Try to get size if it's a local file
+            size_bytes = 0
+            if os.path.isfile(path):
+                size_bytes = os.path.getsize(path)
+            result["gguf"].append({
+                "filename": os.path.basename(path) if '/' in path else path,
+                "path": path,
+                "size_gb": round(size_bytes / 1e9, 2) if size_bytes else 0,
+                "size_bytes": size_bytes,
+                "in_config": True,
+                "model_type": mtype if mtype and mtype != "gguf_models" else "text_models",
+                "settings": settings if isinstance(settings, dict) else {},
+            })
+
+    return result
+
+
+def _get_cache_stats() -> dict:
+    import os
+    stats = {"hf_bytes": 0, "hf_models": 0, "gguf_bytes": 0, "gguf_files": 0,
+             "hf_disk_free_bytes": None, "hf_disk_total_bytes": None,
+             "gguf_disk_free_bytes": None, "gguf_disk_total_bytes": None}
+    from codai.models.cache import get_all_cache_dirs, get_model_cache_dir
+    caches = get_all_cache_dirs()
+
+    hf_dir = caches.get("huggingface")
+    if hf_dir:
+        try:
+            from huggingface_hub import scan_cache_dir
+            info = scan_cache_dir(hf_dir)
+            # Only count non-GGUF repos
+            for repo in info.repos:
+                revs = list(repo.revisions)
+                if not revs:
+                    continue
+                files = [f.file_name for f in revs[-1].files]
+                model_files = [f for f in files if not f.endswith(('.json', '.txt', '.md', '.py', '.gitattributes'))]
+                # Skip if all model files are GGUF
+                if model_files and all(f.endswith('.gguf') for f in model_files):
+                    continue
+                stats["hf_bytes"] += sum(r.size_on_disk for r in repo.revisions)
+                stats["hf_models"] += 1
+        except Exception:
+            pass
+        # HF disk space
+        try:
+            sv = os.statvfs(hf_dir)
+            stats["hf_disk_free_bytes"] = sv.f_bavail * sv.f_frsize
+            stats["hf_disk_total_bytes"] = sv.f_blocks * sv.f_frsize
+        except Exception:
+            pass
+
+    gguf_dir = caches.get("coderai") or get_model_cache_dir()
+    if gguf_dir and os.path.exists(gguf_dir):
+        files = [f for f in os.listdir(gguf_dir)
+                 if os.path.isfile(os.path.join(gguf_dir, f))]
+        stats["gguf_files"] = len(files)
+        stats["gguf_bytes"] = sum(os.path.getsize(os.path.join(gguf_dir, f)) for f in files)
+        # GGUF disk space
+        try:
+            sv = os.statvfs(gguf_dir)
+            stats["gguf_disk_free_bytes"] = sv.f_bavail * sv.f_frsize
+            stats["gguf_disk_total_bytes"] = sv.f_blocks * sv.f_frsize
+        except Exception:
+            pass
+
+    # Also count GGUF files in HF cache
+    if hf_dir:
+        try:
+            from huggingface_hub import scan_cache_dir
+            info = scan_cache_dir(hf_dir)
+            for repo in info.repos:
+                revs = list(repo.revisions)
+                if not revs:
+                    continue
+                files = [f.file_name for f in revs[-1].files]
+                model_files = [f for f in files if not f.endswith(('.json', '.txt', '.md', '.py', '.gitattributes'))]
+                # If all model files are GGUF, count them in gguf_bytes
+                if model_files and all(f.endswith('.gguf') for f in model_files):
+                    for rev in repo.revisions:
+                        for hf_file in rev.files:
+                            if hf_file.file_name.endswith('.gguf'):
+                                stats["gguf_bytes"] += hf_file.size_on_disk
+                                stats["gguf_files"] += 1
+        except Exception:
+            pass
+
+    return stats
+
+
+def _do_clear_cache(cache_type: str) -> dict:
+    import os, shutil
+    from codai.models.cache import get_all_cache_dirs, get_model_cache_dir
+    caches = get_all_cache_dirs()
+    freed = 0
+
+    if cache_type in ("all", "hf"):
+        hf_dir = caches.get("huggingface")
+        if hf_dir and os.path.exists(hf_dir):
+            try:
+                from huggingface_hub import scan_cache_dir
+                info = scan_cache_dir(hf_dir)
+                hashes = [r.commit_hash for repo in info.repos for r in repo.revisions]
+                if hashes:
+                    strategy = info.delete_revisions(*hashes)
+                    freed += strategy.expected_freed_size
+                    strategy.execute()
+            except Exception:
+                for item in os.listdir(hf_dir):
+                    p = os.path.join(hf_dir, item)
+                    try:
+                        if os.path.isdir(p):
+                            shutil.rmtree(p)
+                        else:
+                            freed += os.path.getsize(p)
+                            os.remove(p)
+                    except Exception:
+                        pass
+
+    if cache_type in ("all", "gguf"):
+        gguf_dir = caches.get("coderai") or get_model_cache_dir()
+        if gguf_dir and os.path.exists(gguf_dir):
+            for f in os.listdir(gguf_dir):
+                fp = os.path.join(gguf_dir, f)
+                if os.path.isfile(fp):
+                    try:
+                        freed += os.path.getsize(fp)
+                        os.remove(fp)
+                    except Exception:
+                        pass
+
+    return {"success": True, "freed_bytes": freed}
+
+
+def _do_delete_model(model_id: str, cache_type: str) -> dict:
+    import os, shutil
+    from codai.models.cache import get_all_cache_dirs, get_model_cache_dir
+    caches = get_all_cache_dirs()
+
+    if cache_type == "hf":
+        hf_dir = caches.get("huggingface")
+        if hf_dir:
+            try:
+                from huggingface_hub import scan_cache_dir
+                info = scan_cache_dir(hf_dir)
+                repo = next((r for r in info.repos if r.repo_id == model_id), None)
+                if repo:
+                    hashes = [r.commit_hash for r in repo.revisions]
+                    info.delete_revisions(*hashes).execute()
+                    return {"success": True}
+            except Exception:
+                pass
+            # Fallback: remove directory directly
+            safe = model_id.replace("/", "--")
+            d = os.path.join(hf_dir, f"models--{safe}")
+            if os.path.exists(d):
+                shutil.rmtree(d, ignore_errors=True)
+                return {"success": True}
+        return {"success": False, "detail": "Model not found in HF cache"}
+
+    if cache_type == "gguf":
+        gguf_dir = get_model_cache_dir()
+        fp = os.path.join(gguf_dir, model_id)
+        if os.path.isfile(fp):
+            os.remove(fp)
+            return {"success": True}
+        return {"success": False, "detail": "File not found"}
+
+    return {"success": False, "detail": "Unknown cache_type"}
+
+
+@router.get("/admin/api/cached-models")
+async def api_cached_models(username: str = Depends(require_admin)):
+    """Scan both caches and return all locally stored models."""
+    import asyncio
+    return await asyncio.to_thread(_scan_caches)
+
+
+@router.get("/admin/api/cache-stats")
+async def api_cache_stats(username: str = Depends(require_admin)):
+    """Return disk-usage statistics for each cache."""
+    import asyncio
+    return await asyncio.to_thread(_get_cache_stats)
+
+
+@router.delete("/admin/api/cache")
+async def api_clear_cache(cache_type: str = "all", username: str = Depends(require_admin)):
+    """Bulk-delete cache. cache_type: all | hf | gguf"""
+    import asyncio
+    return await asyncio.to_thread(_do_clear_cache, cache_type)
+
+
+@router.delete("/admin/api/cached-models/{model_id:path}")
+async def api_delete_cached_model(
+    model_id: str,
+    cache_type: str = "hf",
+    username: str = Depends(require_admin),
+):
+    """Delete a specific cached model (HF repo ID or GGUF filename)."""
+    import asyncio
+    return await asyncio.to_thread(_do_delete_model, model_id, cache_type)
+
+
+@router.post("/admin/api/model-enable")
+async def api_model_enable(request: Request, username: str = Depends(require_admin)):
+    """Register a cached model in models.json so CoderAI can use it."""
+    if config_manager is None:
+        raise HTTPException(status_code=503, detail="Config manager not initialized")
+    data = await request.json()
+    path = data.get("path") or data.get("model_id", "")
+    model_type = data.get("model_type", "text_models")
+    valid = {"text_models", "image_models", "audio_models", "gguf_models", "tts_models", "vision_models"}
+    if model_type not in valid:
+        raise HTTPException(status_code=400, detail=f"model_type must be one of {valid}")
+    lst = config_manager.models_data.setdefault(model_type, [])
+    if path not in lst:
+        lst.append(path)
+        config_manager.save_models()
+    return {"success": True}
+
+
+@router.post("/admin/api/model-disable")
+async def api_model_disable(request: Request, username: str = Depends(require_admin)):
+    """Remove a model from models.json (keeps it cached locally)."""
+    if config_manager is None:
+        raise HTTPException(status_code=503, detail="Config manager not initialized")
+    data = await request.json()
+    path = data.get("path") or data.get("model_id", "")
+    changed = False
+    for cat in ("text_models", "image_models", "audio_models",
+                "gguf_models", "tts_models", "vision_models"):
+        lst = config_manager.models_data.get(cat, [])
+        new_lst = [m for m in lst
+                   if (m if isinstance(m, str) else m.get("path", m.get("id", ""))) != path]
+        if len(new_lst) != len(lst):
+            config_manager.models_data[cat] = new_lst
+            changed = True
+    if changed:
+        config_manager.save_models()
+    return {"success": True}
+
+
+@router.get("/admin/api/model-loaded-status")
+async def api_model_loaded_status(username: str = Depends(require_admin)):
+    """Return the set of model keys currently loaded in VRAM."""
+    from codai.models.manager import multi_model_manager
+    return {"loaded": list(multi_model_manager.models.keys())}
+
+
+@router.post("/admin/api/model-load")
+async def api_model_load(request: Request, username: str = Depends(require_admin)):
+    """Load a configured model into VRAM (same VRAM checks as a real request)."""
+    from codai.models.manager import multi_model_manager
+    data = await request.json()
+    path = data.get("path", "")
+    if not path:
+        raise HTTPException(status_code=400, detail="path required")
+
+    # Find the model config entry to determine its type
+    model_type = "text"
+    if config_manager:
+        md = config_manager.models_data
+        for cat, mtype in (("image_models", "image"), ("audio_models", "audio"),
+                           ("vision_models", "vision"), ("tts_models", "tts")):
+            for m in md.get(cat, []):
+                mid = m if isinstance(m, str) else m.get("path") or m.get("id") or ""
+                if mid == path:
+                    model_type = mtype
+                    break
+
+    result = multi_model_manager.request_model(path, model_type if model_type != "text" else None)
+    if result.get("already_loaded"):
+        return {"success": True, "already_loaded": True}
+
+    # Not loaded yet — trigger actual load
+    try:
+        if model_type == "text":
+            mm = multi_model_manager._load_model_by_name(result["model_name"] or path)
+            if mm is None:
+                raise RuntimeError("Model failed to load")
+            multi_model_manager.models[result["model_key"] or path] = mm
+            multi_model_manager.active_in_vram = result["model_key"] or path
+        elif model_type == "image":
+            from codai.api.images import _load_diffusers_pipeline, _is_gguf_model, _load_sdcpp_model
+            from codai.api.state import get_global_args
+            global_args = get_global_args()
+            model_key = f"image:{path}"
+            if _is_gguf_model(path):
+                resolved = multi_model_manager.load_model(path)
+                import os as _os
+                if resolved and _os.path.isfile(resolved):
+                    sd_model = _load_sdcpp_model(resolved, global_args)
+                    if sd_model:
+                        multi_model_manager.add_model(model_key, sd_model)
+            else:
+                pipeline = _load_diffusers_pipeline(path, global_args)
+                if pipeline:
+                    multi_model_manager.add_model(model_key, pipeline)
+        return {"success": True, "already_loaded": False}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.post("/admin/api/model-unload")
+async def api_model_unload(request: Request, username: str = Depends(require_admin)):
+    """Unload a model from VRAM (keeps it available for on-request reload)."""
+    import gc
+    from codai.models.manager import multi_model_manager
+    data = await request.json()
+    path = data.get("path", "")
+    if not path:
+        raise HTTPException(status_code=400, detail="path required")
+
+    # Find the key in loaded models (exact or prefixed)
+    key = None
+    for k in list(multi_model_manager.models.keys()):
+        if k == path or k.endswith(f":{path}") or k.endswith(path.split("/")[-1]):
+            key = k
+            break
+    if key is None:
+        return {"success": True, "was_loaded": False}
+
+    model_obj = multi_model_manager.models.pop(key, None)
+    if model_obj is not None:
+        try:
+            if hasattr(model_obj, "cleanup"):
+                model_obj.cleanup()
+            elif hasattr(model_obj, "to"):
+                model_obj.to("cpu")
+        except Exception:
+            pass
+    if multi_model_manager.active_in_vram == key:
+        multi_model_manager.active_in_vram = None
+    if multi_model_manager.current_model_key == key:
+        multi_model_manager.current_model_key = None
+
+    gc.collect()
+    try:
+        import torch
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+    except Exception:
+        pass
+
+    return {"success": True, "was_loaded": True}
+
+
+@router.post("/admin/api/model-configure")
+async def api_model_configure(request: Request, username: str = Depends(require_admin)):
+    """Save per-model configuration and register/update in models.json."""
+    if config_manager is None:
+        raise HTTPException(status_code=503, detail="Config manager not initialized")
+    data = await request.json()
+    path = data.get("path") or data.get("model_id", "")
+    model_type = data.get("model_type", "text_models")
+    # Treat legacy gguf_models as text_models (GGUF is a format, not a type)
+    if model_type == "gguf_models":
+        model_type = "text_models"
+    valid = {"text_models", "image_models", "audio_models", "tts_models", "vision_models"}
+    if not path:
+        raise HTTPException(status_code=400, detail="path is required")
+    if model_type not in valid:
+        raise HTTPException(status_code=400, detail=f"model_type must be one of {valid}")
+
+    # Remove from all categories (handles type changes)
+    for cat in valid | {"gguf_models"}:
+        lst = config_manager.models_data.get(cat, [])
+        config_manager.models_data[cat] = [
+            m for m in lst
+            if (m if isinstance(m, str) else m.get("path", m.get("id", ""))) != path
+        ]
+
+    # Auto-estimate used_vram_gb from file size if not provided
+    used_vram_gb = data.get("used_vram_gb")
+    if used_vram_gb is None:
+        import os
+        if os.path.isfile(path):
+            size_bytes = os.path.getsize(path)
+            # GGUF: ~1.1x file size; HF safetensors: ~1.2x
+            multiplier = 1.1 if path.endswith(".gguf") else 1.2
+            used_vram_gb = round(size_bytes / 1e9 * multiplier, 2)
+
+    # Build settings entry (drop None-valued optional keys to keep JSON tidy)
+    entry: dict = {"path": path, "model_type": model_type}
+    if used_vram_gb is not None:
+        entry["used_vram_gb"] = used_vram_gb
+    for key in ("alias", "backend", "load_mode", "n_gpu_layers", "n_ctx",
+                "max_gpu_percent", "manual_ram_gb", "load_in_4bit", "load_in_8bit",
+                "flash_attention", "no_ram", "offload_strategy", "offload_dir",
+                "system_prompt", "parser", "tools_closer_prompt", "grammar_guided"):
+        if key in data:
+            entry[key] = data[key]
+
+    config_manager.models_data.setdefault(model_type, []).append(entry)
+    config_manager.save_models()
+    return {"success": True}
+
+
 # --- System endpoints ---

 @router.post("/admin/api/system/reload")
@@ -442,3 +1222,354 @@ async def api_reload_config(username: str = Depends(require_admin)):


 from datetime import datetime
+
+
+# --- Settings page ---
+
+@router.get("/admin/settings", response_class=HTMLResponse)
+async def settings_page(request: Request, username: str = Depends(require_admin)):
+    return templates.TemplateResponse(request, "settings.html", {"username": username, "is_admin": True})
+
+
+@router.get("/admin/api/settings")
+async def api_get_settings(username: str = Depends(require_admin)):
+    """Return current config.json as JSON."""
+    if config_manager is None or config_manager.config is None:
+        raise HTTPException(status_code=503, detail="Config manager not initialized")
+    c = config_manager.config
+    return {
+        "server": {
+            "host": c.server.host,
+            "port": c.server.port,
+            "https": c.server.https,
+            "https_key_path": c.server.https_key_path,
+            "https_cert_path": c.server.https_cert_path,
+        },
+        "backend": {
+            "type": c.backend.type,
+            "image_backend": c.backend.image_backend,
+            "audio_backend": c.backend.audio_backend,
+            "tts_backend": c.backend.tts_backend,
+        },
+        "models": {
+            "default_load_mode": c.models.default_load_mode,
+            "hf_cache_dir": c.models.hf_cache_dir,
+            "gguf_cache_dir": c.models.gguf_cache_dir,
+        },
+        "offload": {
+            "directory": c.offload.directory,
+            "strategy": c.offload.strategy,
+            "max_gpu_percent": c.offload.max_gpu_percent,
+            "no_ram": c.offload.no_ram,
+            "load_in_4bit": c.offload.load_in_4bit,
+            "load_in_8bit": c.offload.load_in_8bit,
+            "manual_ram_gb": c.offload.manual_ram_gb,
+            "flash_attention": c.offload.flash_attention,
+        },
+        "vulkan": {
+            "n_gpu_layers": c.vulkan.n_gpu_layers,
+            "n_ctx": c.vulkan.n_ctx,
+            "device_id": c.vulkan.device_id,
+            "single_gpu": c.vulkan.single_gpu,
+        },
+        "whisper": {
+            "server_path": c.whisper.server_path,
+            "server_port": c.whisper.server_port,
+        },
+        "system_prompt": c.system_prompt,
+        "tools_closer_prompt": c.tools_closer_prompt,
+        "grammar_guided": c.grammar_guided,
+        "parser": c.parser,
+    }
+
+
+@router.post("/admin/api/settings")
+async def api_save_settings(request: Request, username: str = Depends(require_admin)):
+    """Update and persist config.json from submitted JSON. Only sections present in the payload are updated."""
+    if config_manager is None or config_manager.config is None:
+        raise HTTPException(status_code=503, detail="Config manager not initialized")
+
+    data = await request.json()
+    c = config_manager.config
+
+    if "server" in data:
+        srv = data["server"]
+        c.server.host = srv.get("host", c.server.host)
+        c.server.port = int(srv.get("port", c.server.port))
+        c.server.https = bool(srv.get("https", c.server.https))
+        c.server.https_key_path = srv.get("https_key_path") or None
+        c.server.https_cert_path = srv.get("https_cert_path") or None
+
+    if "backend" in data:
+        bk = data["backend"]
+        c.backend.type = bk.get("type", c.backend.type)
+        c.backend.image_backend = bk.get("image_backend", c.backend.image_backend)
+        c.backend.audio_backend = bk.get("audio_backend", c.backend.audio_backend)
+        c.backend.tts_backend = bk.get("tts_backend", c.backend.tts_backend)
+
+    if "models" in data:
+        mdl = data["models"]
+        c.models.default_load_mode = mdl.get("default_load_mode", c.models.default_load_mode)
+        if "hf_cache_dir" in mdl:
+            c.models.hf_cache_dir = mdl["hf_cache_dir"] or None
+        if "gguf_cache_dir" in mdl:
+            c.models.gguf_cache_dir = mdl["gguf_cache_dir"] or None
+
+    if "offload" in data:
+        off = data["offload"]
+        c.offload.directory = off.get("directory", c.offload.directory)
+        c.offload.strategy = off.get("strategy", c.offload.strategy)
+        if "max_gpu_percent" in off:
+            c.offload.max_gpu_percent = off["max_gpu_percent"] or None
+        c.offload.no_ram = bool(off.get("no_ram", c.offload.no_ram))
+        c.offload.load_in_4bit = bool(off.get("load_in_4bit", c.offload.load_in_4bit))
+        c.offload.load_in_8bit = bool(off.get("load_in_8bit", c.offload.load_in_8bit))
+        if "manual_ram_gb" in off:
+            c.offload.manual_ram_gb = off["manual_ram_gb"] or None
+        c.offload.flash_attention = bool(off.get("flash_attention", c.offload.flash_attention))
+
+    if "vulkan" in data:
+        vk = data["vulkan"]
+        c.vulkan.n_gpu_layers = int(vk.get("n_gpu_layers", c.vulkan.n_gpu_layers))
+        c.vulkan.n_ctx = int(vk.get("n_ctx", c.vulkan.n_ctx))
+        c.vulkan.device_id = int(vk.get("device_id", c.vulkan.device_id))
+        c.vulkan.single_gpu = bool(vk.get("single_gpu", c.vulkan.single_gpu))
+
+    if "whisper" in data:
+        wh = data["whisper"]
+        c.whisper.server_path = wh.get("server_path") or None
+        c.whisper.server_port = int(wh.get("server_port", c.whisper.server_port))
+
+    if "system_prompt" in data:
+        c.system_prompt = data["system_prompt"] or None
+    if "tools_closer_prompt" in data:
+        c.tools_closer_prompt = bool(data["tools_closer_prompt"])
+    if "grammar_guided" in data:
+        c.grammar_guided = bool(data["grammar_guided"])
+    if "parser" in data:
+        c.parser = data["parser"]
+
+    config_manager.save_config()
+    return {"success": True}
+
+
+# --- HuggingFace model search proxy ---
+
+import re as _re
+_QUANT_RE = _re.compile(
+    r'\b(IQ[1-4]_XX[SML]?|Q[2-8]_K_[MSLX]|Q[2-8]_K|Q[2-8]_[0-9]|F16|F32|BF16)\b',
+    _re.IGNORECASE,
+)
+
+
+def _hf_file_size(sibling: dict) -> int:
+    """Return actual byte size from an HF siblings entry (prefers LFS size)."""
+    lfs = sibling.get("lfs") or {}
+    return lfs.get("size") or sibling.get("size") or 0
+
+
+@router.get("/admin/api/hf-search")
+async def api_hf_search(
+    q: str = "",
+    gguf_mode: str = "gguf",   # "gguf" | "all" | "no-gguf"
+    pipeline_tag: str = "",
+    sort: str = "downloads",
+    sizes: str = "",            # comma-separated e.g. "7b,70b"
+    arch: str = "",
+    username: str = Depends(require_admin),
+):
+    """Proxy HuggingFace model search; supports multiple sizes via parallel requests."""
+    import asyncio
+    import urllib.request
+    import urllib.parse
+    import json as _json
+
+    if sort not in ("downloads", "likes", "lastModified", "createdAt"):
+        sort = "downloads"
+
+    # Filter tags shared across all requests
+    filter_pairs: list = []
+    if gguf_mode == "gguf":
+        filter_pairs.append(("filter", "gguf"))
+    if pipeline_tag:
+        filter_pairs.append(("filter", pipeline_tag))
+    if arch == "lora":
+        filter_pairs.append(("filter", "lora"))
+
+    # Base search keywords
+    base_parts = [q.strip()] if q.strip() else []
+    if arch == "moe":
+        base_parts.append("moe")
+
+    size_list = [s.strip() for s in sizes.split(",") if s.strip()][:6]
+
+    async def _one(extra_kw: str = "") -> list:
+        parts = base_parts + ([extra_kw] if extra_kw else [])
+        effective_q = " ".join(parts)
+        limit = "12" if size_list else "20"
+        pairs = []
+        if effective_q:
+            pairs.append(("search", effective_q))
+        pairs.extend(filter_pairs)
+        pairs += [("sort", sort), ("direction", "-1"), ("limit", limit), ("full", "false")]
+        url = "https://huggingface.co/api/models?" + urllib.parse.urlencode(pairs)
+        rq = urllib.request.Request(url, headers={"User-Agent": "coderai-admin/1.0"})
+        def _fetch():
+            with urllib.request.urlopen(rq, timeout=15) as resp:
+                return _json.loads(resp.read())
+        return await asyncio.to_thread(_fetch)
+
+    try:
+        if size_list:
+            batches = await asyncio.gather(*[_one(sz) for sz in size_list], return_exceptions=True)
+        else:
+            batches = [await _one()]
+
+        seen: set = set()
+        merged: list = []
+        for batch in batches:
+            if isinstance(batch, Exception):
+                continue
+            for m in batch:
+                mid = m.get("modelId") or m.get("id", "")
+                if mid and mid not in seen:
+                    seen.add(mid)
+                    merged.append(m)
+
+        if sort == "downloads":
+            merged.sort(key=lambda m: m.get("downloads", 0), reverse=True)
+        elif sort == "likes":
+            merged.sort(key=lambda m: m.get("likes", 0), reverse=True)
+
+        if gguf_mode == "no-gguf":
+            merged = [m for m in merged if "gguf" not in (m.get("modelId") or m.get("id", "")).lower()]
+
+        return [
+            {
+                "id": m.get("modelId") or m.get("id", ""),
+                "downloads": m.get("downloads", 0),
+                "likes": m.get("likes", 0),
+                "pipeline_tag": m.get("pipeline_tag", ""),
+            }
+            for m in merged[:20]
+        ]
+    except Exception as e:
+        raise HTTPException(status_code=502, detail=f"HuggingFace API error: {e}")
+
+
+@router.get("/admin/api/hf-model-files")
+async def api_hf_model_files(model_id: str, username: str = Depends(require_admin)):
+    """Return GGUF files (name, size, VRAM estimate, quant type) for an HF model repo."""
+    import urllib.request
+    import urllib.parse
+    import json as _json
+
+    safe_id = urllib.parse.quote(model_id, safe="/")
+    url = f"https://huggingface.co/api/models/{safe_id}"
+    try:
+        req = urllib.request.Request(url, headers={"User-Agent": "coderai-admin/1.0"})
+        with urllib.request.urlopen(req, timeout=15) as resp:
+            data = _json.loads(resp.read())
+    except Exception as e:
+        raise HTTPException(status_code=502, detail=f"HuggingFace API error: {e}")
+
+    files = []
+    for sib in data.get("siblings", []):
+        name = sib.get("rfilename", "")
+        if not name.lower().endswith(".gguf"):
+            continue
+        size_bytes = _hf_file_size(sib)
+        size_gb = round(size_bytes / 1024 ** 3, 2) if size_bytes else None
+        vram_gb = round(size_gb * 1.1, 1) if size_gb else None
+        m = _QUANT_RE.search(name)
+        quant = m.group(1).upper() if m else None
+        files.append({
+            "name": name,
+            "size_gb": size_gb,
+            "vram_gb": vram_gb,
+            "quant": quant,
+        })
+
+    files.sort(key=lambda f: f.get("size_gb") or 0)
+    return files
+
+
+@router.get("/admin/api/hf-model-info")
+async def api_hf_model_info(model_id: str, username: str = Depends(require_admin)):
+    """Full metadata for a single HuggingFace model repo."""
+    import urllib.request
+    import urllib.parse
+    import json as _json
+
+    safe_id = urllib.parse.quote(model_id, safe="/")
+    url = f"https://huggingface.co/api/models/{safe_id}"
+    try:
+        req = urllib.request.Request(url, headers={"User-Agent": "coderai-admin/1.0"})
+        with urllib.request.urlopen(req, timeout=15) as resp:
+            data = _json.loads(resp.read())
+    except Exception as e:
+        raise HTTPException(status_code=502, detail=f"HuggingFace API error: {e}")
+
+    card = data.get("cardData") or {}
+
+    # Parameter count from safetensors metadata
+    params_label = None
+    sf = data.get("safetensors") or {}
+    total = sf.get("total")
+    if total:
+        if total >= 1e12:
+            params_label = f"{total/1e12:.1f}T"
+        elif total >= 1e9:
+            params_label = f"{total/1e9:.1f}B"
+        elif total >= 1e6:
+            params_label = f"{total/1e6:.0f}M"
+        else:
+            params_label = str(total)
+
+    # GGUF files with quant/size info
+    gguf_files = []
+    for sib in data.get("siblings", []):
+        name = sib.get("rfilename", "")
+        if not name.lower().endswith(".gguf"):
+            continue
+        size_bytes = _hf_file_size(sib)
+        size_gb = round(size_bytes / 1024 ** 3, 2) if size_bytes else None
+        vram_gb = round(size_gb * 1.1, 1) if size_gb else None
+        m = _QUANT_RE.search(name)
+        gguf_files.append({
+            "name": name,
+            "size_gb": size_gb,
+            "vram_gb": vram_gb,
+            "quant": m.group(1).upper() if m else None,
+        })
+    gguf_files.sort(key=lambda f: f.get("size_gb") or 0)
+
+    # All repo files (for total count)
+    all_files = [sib.get("rfilename", "") for sib in data.get("siblings", [])]
+
+    # Relevant tags (strip common noisy ones)
+    _noise = {"transformers", "safetensors", "gguf", "endpoints_compatible",
+              "has_space", "region:us", "license:other"}
+    tags = [t for t in data.get("tags", []) if t not in _noise]
+
+    base_model = card.get("base_model") or ""
+    if isinstance(base_model, list):
+        base_model = ", ".join(base_model)
+
+    return {
+        "id": data.get("modelId") or data.get("id", ""),
+        "author": data.get("author", ""),
+        "pipeline_tag": data.get("pipeline_tag", ""),
+        "downloads": data.get("downloads", 0),
+        "likes": data.get("likes", 0),
+        "last_modified": data.get("lastModified", ""),
+        "private": data.get("private", False),
+        "gated": data.get("gated", False),
+        "tags": tags,
+        "license": card.get("license", ""),
+        "language": card.get("language") or [],
+        "base_model": base_model,
+        "params_label": params_label,
+        "gguf_files": gguf_files,
+        "file_count": len(all_files),
+    }
--- a/codai/admin/static/style.css
+++ b/codai/admin/static/style.css
-/* CoderAI Admin Dashboard - Dark Theme */
+@import url('https://fonts.googleapis.com/css2?family=Plus+Jakarta+Sans:wght@400;500;600;700&family=JetBrains+Mono:wght@400;500&display=swap');

 :root {
-    --bg-primary: #0d1117;
-    --bg-secondary: #161b22;
-    --bg-tertiary: #21262d;
-    --border-color: #30363d;
-    --text-primary: #c9d1d9;
-    --text-secondary: #8b949e;
-    --text-muted: #6e7681;
-    --accent-blue: #58a6ff;
-    --accent-green: #3fb950;
-    --accent-red: #f85149;
-    --accent-yellow: #d29922;
-    --accent-purple: #bc8cff;
-}
-
-* {
-    margin: 0;
-    padding: 0;
-    box-sizing: border-box;
-}
-
-body {
-    font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Noto Sans', Helvetica, Arial, sans-serif;
-    background-color: var(--bg-primary);
-    color: var(--text-primary);
-    line-height: 1.6;
-}
-
-/* Layout */
-.layout {
-    display: flex;
-    min-height: 100vh;
-}
-
-.sidebar {
-    width: 260px;
-    background-color: var(--bg-secondary);
-    border-right: 1px solid var(--border-color);
-    display: flex;
-    flex-direction: column;
-    position: fixed;
-    height: 100vh;
-    overflow-y: auto;
-}
-
-.main-content {
-    flex: 1;
-    margin-left: 260px;
-    padding: 2rem;
-    max-width: 100%;
-}
-
-.content-wrapper {
-    max-width: 1400px;
-    margin: 0 auto;
-}
-
-/* Logo */
-.logo {
-    padding: 1.5rem;
-    border-bottom: 1px solid var(--border-color);
-}
-
-.logo h1 {
-    font-size: 1.5rem;
-    color: var(--accent-blue);
-    font-weight: 600;
-}
-
-/* Navigation */
-.nav {
-    flex: 1;
-    padding: 1rem 0;
-}
-
-.nav-item {
-    display: flex;
-    align-items: center;
-    padding: 0.75rem 1.5rem;
-    color: var(--text-secondary);
-    text-decoration: none;
-    transition: all 0.2s;
-    border-left: 3px solid transparent;
-}
-
-.nav-item:hover {
-    background-color: var(--bg-tertiary);
-    color: var(--text-primary);
-}
-
-.nav-item.active {
-    background-color: var(--bg-tertiary);
-    color: var(--accent-blue);
-    border-left-color: var(--accent-blue);
-}
-
-.nav-item .icon {
-    margin-right: 0.75rem;
-    font-size: 1.2rem;
-}
-
-/* Sidebar Footer */
-.sidebar-footer {
-    padding: 1rem 1.5rem;
-    border-top: 1px solid var(--border-color);
-}
-
-.user-info {
-    display: flex;
-    align-items: center;
-    margin-bottom: 0.75rem;
-    color: var(--text-secondary);
-    font-size: 0.9rem;
-}
-
-.user-info .icon {
-    margin-right: 0.5rem;
-}
-
-.logout-btn {
-    display: block;
-    width: 100%;
-    padding: 0.5rem;
-    background-color: var(--bg-tertiary);
-    color: var(--text-primary);
-    text-align: center;
-    text-decoration: none;
-    border-radius: 6px;
-    border: 1px solid var(--border-color);
-    transition: all 0.2s;
-}
-
-.logout-btn:hover {
-    background-color: var(--accent-red);
-    border-color: var(--accent-red);
-}
-
-/* Page Header */
-.page-header {
-    display: flex;
-    justify-content: space-between;
-    align-items: center;
-    margin-bottom: 2rem;
-}
-
-.page-header h1 {
-    font-size: 2rem;
-    font-weight: 600;
-}
-
-.header-actions {
-    display: flex;
-    gap: 0.75rem;
-}
-
-/* Cards */
-.card {
-    background-color: var(--bg-secondary);
-    border: 1px solid var(--border-color);
-    border-radius: 8px;
-    padding: 1.5rem;
-    margin-bottom: 1.5rem;
-}
-
-.card h3 {
-    font-size: 1.25rem;
-    margin-bottom: 1rem;
-    color: var(--text-primary);
-}
-
-/* Dashboard Grid */
-.dashboard-grid {
-    display: grid;
-    grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
-    gap: 1.5rem;
-    margin-bottom: 2rem;
-}
-
-/* Status Grid */
-.status-grid {
-    display: grid;
-    gap: 1rem;
-}
-
-.status-item {
-    display: flex;
-    justify-content: space-between;
-    padding: 0.5rem 0;
-}
-
-.status-item .label {
-    color: var(--text-secondary);
-}
-
-.status-item .value {
-    font-weight: 600;
-}
-
-.status-ok {
-    color: var(--accent-green);
-}
-
-/* Stats Grid */
-.stats-grid {
-    display: grid;
-    grid-template-columns: repeat(3, 1fr);
-    gap: 1rem;
-}
-
-.stat-item {
-    text-align: center;
-}
-
-.stat-value {
-    font-size: 2rem;
-    font-weight: 700;
-    color: var(--accent-blue);
-}
-
-.stat-label {
-    font-size: 0.875rem;
-    color: var(--text-secondary);
-    margin-top: 0.25rem;
-}
-
-/* Progress Bar */
-.progress-bar {
-    width: 100%;
-    height: 24px;
-    background-color: var(--bg-tertiary);
-    border-radius: 12px;
-    overflow: hidden;
-    margin: 1rem 0;
-}
-
-.progress-fill {
-    height: 100%;
-    background: linear-gradient(90deg, var(--accent-blue), var(--accent-purple));
-    transition: width 0.3s ease;
-}
-
-/* Buttons */
-.btn {
-    padding: 0.5rem 1rem;
-    border: none;
-    border-radius: 6px;
-    font-size: 0.875rem;
-    font-weight: 500;
-    cursor: pointer;
-    transition: all 0.2s;
-    text-decoration: none;
-    display: inline-block;
-}
-
-.btn-primary {
-    background-color: var(--accent-blue);
-    color: #fff;
-}
-
-.btn-primary:hover {
-    background-color: #4a8fd8;
-}
-
-.btn-secondary {
-    background-color: var(--bg-tertiary);
-    color: var(--text-primary);
-    border: 1px solid var(--border-color);
-}
-
-.btn-secondary:hover {
-    background-color: var(--border-color);
-}
-
-.btn-danger {
-    background-color: var(--accent-red);
-    color: #fff;
-}
-
-.btn-danger:hover {
-    background-color: #d63939;
-}
-
-.btn-sm {
-    padding: 0.375rem 0.75rem;
-    font-size: 0.8125rem;
-}
-
-.btn-block {
-    width: 100%;
-    display: block;
-}
-
-/* Forms */
-.form {
-    max-width: 600px;
-}
-
-.form-group {
-    margin-bottom: 1.5rem;
-}
-
-.form-group label {
-    display: block;
-    margin-bottom: 0.5rem;
-    color: var(--text-primary);
-    font-weight: 500;
-}
-
-.form-control {
-    width: 100%;
-    padding: 0.625rem;
-    background-color: var(--bg-tertiary);
-    border: 1px solid var(--border-color);
-    border-radius: 6px;
-    color: var(--text-primary);
-    font-size: 0.875rem;
-}
-
-.form-control:focus {
-    outline: none;
-    border-color: var(--accent-blue);
-}
-
-.form-text {
-    display: block;
-    margin-top: 0.25rem;
-    font-size: 0.8125rem;
-    color: var(--text-secondary);
-}
-
-.form-actions {
-    display: flex;
-    gap: 0.75rem;
-    margin-top: 1.5rem;
-}
-
-/* Tables */
-.table-responsive {
-    overflow-x: auto;
-}
-
-.table {
-    width: 100%;
-    border-collapse: collapse;
-}
-
-.table th,
-.table td {
-    padding: 0.75rem;
-    text-align: left;
-    border-bottom: 1px solid var(--border-color);
-}
-
-.table th {
-    color: var(--text-secondary);
-    font-weight: 600;
-    font-size: 0.875rem;
-    text-transform: uppercase;
-}
-
-.table tbody tr:hover {
-    background-color: var(--bg-tertiary);
-}
-
-/* Badges */
-.badge {
-    display: inline-block;
-    padding: 0.25rem 0.5rem;
-    font-size: 0.75rem;
-    font-weight: 600;
-    border-radius: 4px;
-}
-
-.badge-primary {
-    background-color: var(--accent-blue);
-    color: #fff;
-}
-
-.badge-secondary {
-    background-color: var(--bg-tertiary);
-    color: var(--text-secondary);
-    border: 1px solid var(--border-color);
-}
-
-/* Alerts */
-.alert {
-    padding: 1rem;
-    border-radius: 6px;
-    margin-bottom: 1rem;
-}
-
-.alert-error {
-    background-color: rgba(248, 81, 73, 0.1);
-    border: 1px solid var(--accent-red);
-    color: var(--accent-red);
-}
-
-.alert-warning {
-    background-color: rgba(210, 153, 34, 0.1);
-    border: 1px solid var(--accent-yellow);
-    color: var(--accent-yellow);
-}
-
-/* Login Page */
-.login-container {
-    display: flex;
-    align-items: center;
-    justify-content: center;
-    min-height: 100vh;
-    padding: 2rem;
-}
-
-.login-box {
-    width: 100%;
-    max-width: 400px;
-    background-color: var(--bg-secondary);
-    border: 1px solid var(--border-color);
-    border-radius: 8px;
-    padding: 2rem;
-}
-
-.login-header {
-    text-align: center;
-    margin-bottom: 2rem;
-}
-
-.login-header h1 {
-    font-size: 2rem;
-    color: var(--accent-blue);
-    margin-bottom: 0.5rem;
-}
-
-.login-header p {
-    color: var(--text-secondary);
-}
-
-.login-form {
-    margin-bottom: 1.5rem;
-}
-
-.login-footer {
-    text-align: center;
-    padding-top: 1rem;
-    border-top: 1px solid var(--border-color);
-}
-
-/* Tabs */
-.tabs {
-    display: flex;
-    gap: 0.5rem;
-    margin-bottom: 1.5rem;
-    border-bottom: 1px solid var(--border-color);
-}
-
-.tab-btn {
-    padding: 0.75rem 1.5rem;
-    background: none;
-    border: none;
-    color: var(--text-secondary);
-    cursor: pointer;
-    border-bottom: 2px solid transparent;
-    transition: all 0.2s;
-}
-
-.tab-btn:hover {
-    color: var(--text-primary);
-}
-
-.tab-btn.active {
-    color: var(--accent-blue);
-    border-bottom-color: var(--accent-blue);
-}
-
-.tab-content {
-    display: none;
-}
-
-.tab-content.active {
-    display: block;
-}
-
-/* Modal */
-.modal {
-    display: none;
-    position: fixed;
-    top: 0;
-    left: 0;
-    width: 100%;
-    height: 100%;
-    background-color: rgba(0, 0, 0, 0.7);
-    z-index: 1000;
-    align-items: center;
-    justify-content: center;
-}
-
-.modal-content {
-    background-color: var(--bg-secondary);
-    border: 1px solid var(--border-color);
-    border-radius: 8px;
-    width: 90%;
-    max-width: 600px;
-    max-height: 90vh;
-    overflow-y: auto;
-}
-
-.modal-header {
-    display: flex;
-    justify-content: space-between;
-    align-items: center;
-    padding: 1.5rem;
-    border-bottom: 1px solid var(--border-color);
-}
-
-.modal-header h2 {
-    font-size: 1.5rem;
-}
-
-.modal-close {
-    background: none;
-    border: none;
-    color: var(--text-secondary);
-    font-size: 1.5rem;
-    cursor: pointer;
-    padding: 0;
-    width: 32px;
-    height: 32px;
-    display: flex;
-    align-items: center;
-    justify-content: center;
-}
-
-.modal-close:hover {
-    color: var(--text-primary);
-}
-
-.modal-body {
-    padding: 1.5rem;
-}
-
-/* Chat Interface */
-.chat-container {
-    display: flex;
-    flex-direction: column;
-    height: calc(100vh - 4rem);
-    background-color: var(--bg-secondary);
-    border: 1px solid var(--border-color);
-    border-radius: 8px;
-}
-
-.chat-header {
-    display: flex;
-    justify-content: space-between;
-    align-items: center;
-    padding: 1rem 1.5rem;
-    border-bottom: 1px solid var(--border-color);
-}
-
-.chat-controls {
-    display: flex;
-    gap: 0.75rem;
-    align-items: center;
-}
-
-.chat-controls select {
-    min-width: 200px;
-}
-
-.chat-messages {
-    flex: 1;
-    overflow-y: auto;
-    padding: 1.5rem;
-}
-
-.welcome-message {
-    text-align: center;
-    padding: 3rem 1rem;
-    color: var(--text-secondary);
-}
-
-.message {
-    display: flex;
-    gap: 1rem;
-    margin-bottom: 1.5rem;
-}
-
-.message-avatar {
-    width: 36px;
-    height: 36px;
-    border-radius: 50%;
-    background-color: var(--bg-tertiary);
-    display: flex;
-    align-items: center;
-    justify-content: center;
-    flex-shrink: 0;
-}
-
-.message-content {
-    flex: 1;
-    padding: 0.75rem 1rem;
-    background-color: var(--bg-tertiary);
-    border-radius: 8px;
-    line-height: 1.6;
-}
-
-.message-user .message-content {
-    background-color: rgba(88, 166, 255, 0.1);
-}
-
-.chat-input-container {
-    padding: 1rem 1.5rem;
-    border-top: 1px solid var(--border-color);
-}
-
-.chat-input-form {
-    display: flex;
-    gap: 0.75rem;
-}
-
-.chat-input {
-    flex: 1;
-    padding: 0.75rem;
-    background-color: var(--bg-tertiary);
-    border: 1px solid var(--border-color);
-    border-radius: 6px;
-    color: var(--text-primary);
-    resize: none;
-    font-family: inherit;
-}
-
-.chat-input:focus {
-    outline: none;
-    border-color: var(--accent-blue);
-}
-
-/* Utility Classes */
-.text-muted {
-    color: var(--text-muted);
-}
-
-.text-center {
-    text-align: center;
-}
-
-.text-warning {
-    color: var(--accent-yellow);
-}
-
-/* Token Display */
-.token-display {
-    display: flex;
-    gap: 0.75rem;
-    align-items: center;
-    padding: 1rem;
-    background-color: var(--bg-tertiary);
-    border-radius: 6px;
-    margin: 1rem 0;
-}
-
-.token-display code {
-    flex: 1;
-    font-family: 'Courier New', monospace;
-    font-size: 0.875rem;
-    word-break: break-all;
-}
-
-/* Responsive */
-@media (max-width: 768px) {
-    .sidebar {
-        width: 100%;
-        position: relative;
-        height: auto;
-    }
-    
-    .main-content {
-        margin-left: 0;
-    }
-    
-    .dashboard-grid {
-        grid-template-columns: 1fr;
-    }
-    
-    .stats-grid {
-        grid-template-columns: 1fr;
-    }
+  --bg:       #08090D;
+  --nav:      #0C0D13;
+  --card:     #111218;
+  --raised:   #161820;
+  --border:   #1A1D28;
+  --border-2: #252836;
+  --text:     #DDE1F0;
+  --text-2:   #636880;
+  --text-3:   #2E3145;
+  --accent:   #6366F1;
+  --accent-s: rgba(99,102,241,.12);
+  --green:    #34D399;
+  --amber:    #F59E0B;
+  --red:      #F87171;
+  --font:     'Plus Jakarta Sans', system-ui, sans-serif;
+  --mono:     'JetBrains Mono', monospace;
+}
+
+*,*::before,*::after{margin:0;padding:0;box-sizing:border-box}
+html{scroll-behavior:smooth}
+body{font-family:var(--font);font-size:14px;background:var(--bg);color:var(--text);line-height:1.5;-webkit-font-smoothing:antialiased}
+a{color:inherit;text-decoration:none}
+button,input,select,textarea{font-family:inherit}
+
+/* ── Topnav ──────────────────────────────────────────────────────── */
+.topnav{
+  position:sticky;top:0;z-index:200;
+  height:44px;
+  background:var(--nav);
+  border-bottom:1px solid var(--border);
+}
+.topnav-inner{
+  max-width:1200px;margin:0 auto;padding:0 1.5rem;
+  height:100%;display:flex;align-items:center;justify-content:space-between;gap:1.5rem;
+}
+.topnav-left{display:flex;align-items:center;gap:1.75rem}
+.topnav-right{display:flex;align-items:center;gap:.625rem}
+
+/* logo */
+.nav-logo{display:flex;align-items:center;gap:.5rem;flex-shrink:0}
+.nav-logo-mark{
+  width:22px;height:22px;
+  background:var(--accent);
+  border-radius:5px;
+  display:flex;align-items:center;justify-content:center;
+  font-size:9px;font-weight:700;color:#fff;letter-spacing:-.01em;
+}
+.nav-logo-name{font-size:13px;font-weight:700;letter-spacing:-.01em}
+
+/* nav links */
+.nav-links{display:flex;align-items:center;gap:1px}
+.nav-link{
+  padding:.3125rem .625rem;
+  font-size:13px;font-weight:500;
+  color:var(--text-2);
+  border-radius:5px;
+  transition:color .1s,background .1s;
+  white-space:nowrap;
+}
+.nav-link:hover{color:var(--text);background:rgba(255,255,255,.04)}
+.nav-link.active{color:var(--text);background:var(--accent-s)}
+
+/* user + logout */
+.nav-username{font-size:12.5px;color:var(--text-2)}
+.nav-sep{width:1px;height:14px;background:var(--border-2)}
+.nav-logout{
+  font-size:12.5px;color:var(--text-3);
+  padding:.25rem .5rem;border:1px solid var(--border);border-radius:4px;
+  transition:all .1s;cursor:pointer;background:transparent;
+}
+.nav-logout:hover{color:var(--text-2);border-color:var(--border-2)}
+
+/* ── Main ────────────────────────────────────────────────────────── */
+.main{min-height:calc(100vh - 44px)}
+.container{max-width:1100px;margin:0 auto;padding:2rem 1.5rem}
+
+/* ── Page header ─────────────────────────────────────────────────── */
+.page-header{display:flex;justify-content:space-between;align-items:flex-start;margin-bottom:1.5rem;gap:1rem}
+.page-header h1{font-size:1.125rem;font-weight:700;letter-spacing:-.01em}
+.page-header p{font-size:12.5px;color:var(--text-2);margin-top:.2rem}
+.header-actions{display:flex;gap:.5rem;flex-shrink:0;align-items:center}
+
+/* ── Cards ───────────────────────────────────────────────────────── */
+.card{background:var(--card);border:1px solid var(--border);border-radius:8px;padding:1.25rem;margin-bottom:1rem}
+.card-title{font-size:11px;font-weight:700;text-transform:uppercase;letter-spacing:.07em;color:var(--text-2);margin-bottom:1rem}
+
+/* ── Stat grid ───────────────────────────────────────────────────── */
+.stat-grid{display:grid;grid-template-columns:repeat(auto-fit,minmax(180px,1fr));gap:.75rem;margin-bottom:1rem}
+.stat{background:var(--card);border:1px solid var(--border);border-radius:8px;padding:1.125rem}
+.stat-label{font-size:11px;font-weight:600;text-transform:uppercase;letter-spacing:.07em;color:var(--text-3);margin-bottom:.5rem}
+.stat-value{font-size:1.625rem;font-weight:700;letter-spacing:-.02em;line-height:1}
+.stat-sub{font-size:11.5px;color:var(--text-3);margin-top:.375rem;font-family:var(--mono)}
+
+/* live dot */
+.live{display:inline-flex;align-items:center;gap:.375rem;font-size:11.5px;font-weight:600;color:var(--green);font-family:var(--mono)}
+.live::before{content:'';width:5px;height:5px;border-radius:50%;background:var(--green);box-shadow:0 0 4px var(--green);animation:blink 2s ease infinite}
+@keyframes blink{0%,100%{opacity:1}50%{opacity:.25}}
+
+/* progress */
+.progress{height:3px;background:var(--raised);border-radius:2px;margin-top:.75rem;overflow:hidden}
+.progress-fill{height:100%;background:var(--accent);transition:width .5s}
+.progress-labels{display:flex;justify-content:space-between;font-size:11px;color:var(--text-3);margin-top:.3rem;font-family:var(--mono)}
+
+/* ── Buttons ─────────────────────────────────────────────────────── */
+.btn{
+  display:inline-flex;align-items:center;gap:.375rem;
+  padding:.375rem .875rem;border:none;border-radius:6px;
+  font-size:13px;font-weight:600;cursor:pointer;
+  transition:all .1s;white-space:nowrap;line-height:1.4;
+}
+.btn svg{width:13px;height:13px;stroke-width:2;flex-shrink:0}
+.btn-primary{background:var(--accent);color:#fff}
+.btn-primary:hover{background:#7577F3}
+.btn-secondary{background:var(--raised);color:var(--text);border:1px solid var(--border)}
+.btn-secondary:hover{background:var(--border);border-color:var(--border-2)}
+.btn-ghost{background:transparent;color:var(--text-2);border:1px solid var(--border)}
+.btn-ghost:hover{color:var(--text);border-color:var(--border-2)}
+.btn-danger{background:rgba(248,113,113,.08);color:var(--red);border:1px solid rgba(248,113,113,.2)}
+.btn-danger:hover{background:rgba(248,113,113,.15);border-color:rgba(248,113,113,.4)}
+.btn-sm{padding:.25rem .625rem;font-size:12px}
+.btn-sm svg{width:11px;height:11px}
+.btn:disabled{opacity:.4;cursor:not-allowed}
+
+/* ── Forms ───────────────────────────────────────────────────────── */
+.form-row{margin-bottom:1rem}
+.form-label{display:block;font-size:11px;font-weight:700;text-transform:uppercase;letter-spacing:.06em;color:var(--text-2);margin-bottom:.35rem}
+.form-input{
+  width:100%;padding:.5rem .75rem;
+  background:var(--raised);border:1px solid var(--border);border-radius:6px;
+  color:var(--text);font-size:13.5px;
+  transition:border-color .1s,box-shadow .1s;
+}
+.form-input:focus{outline:none;border-color:var(--accent);box-shadow:0 0 0 3px rgba(99,102,241,.12)}
+.form-input::placeholder{color:var(--text-3)}
+select.form-input{
+  cursor:pointer;appearance:none;
+  background-image:url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='10' height='5' viewBox='0 0 10 5'%3E%3Cpath d='M0 0l5 5 5-5z' fill='%23363A4D'/%3E%3C/svg%3E");
+  background-repeat:no-repeat;background-position:right .75rem center;background-size:8px;padding-right:2rem;
+}
+.form-hint{font-size:11.5px;color:var(--text-3);margin-top:.25rem}
+.form-actions{display:flex;gap:.5rem;margin-top:1.25rem;align-items:center}
+
+/* ── Alerts ──────────────────────────────────────────────────────── */
+.alert{display:flex;align-items:flex-start;gap:.5rem;padding:.625rem .875rem;border-radius:6px;font-size:13px;margin-bottom:1rem}
+.alert-error{background:rgba(248,113,113,.07);border:1px solid rgba(248,113,113,.2);color:var(--red)}
+.alert-warning{background:rgba(245,158,11,.07);border:1px solid rgba(245,158,11,.2);color:var(--amber)}
+.alert-info{background:var(--accent-s);border:1px solid rgba(99,102,241,.25);color:#A5B4FC}
+
+/* ── Tables ──────────────────────────────────────────────────────── */
+.table-wrap{border:1px solid var(--border);border-radius:8px;overflow:hidden}
+table{width:100%;border-collapse:collapse}
+thead{background:var(--raised)}
+th{padding:.5rem 1rem;text-align:left;font-size:11px;font-weight:700;text-transform:uppercase;letter-spacing:.07em;color:var(--text-3);border-bottom:1px solid var(--border);white-space:nowrap}
+td{padding:.625rem 1rem;font-size:13px;color:var(--text-2);border-bottom:1px solid var(--border)}
+tbody tr:last-child td{border-bottom:none}
+tbody tr:hover td{background:rgba(255,255,255,.015);color:var(--text)}
+.td-name{font-weight:600;color:var(--text) !important}
+td code{font-family:var(--mono);font-size:11.5px;background:var(--raised);padding:.1rem .35rem;border-radius:3px}
+.empty-row td{text-align:center;padding:2.5rem;color:var(--text-3) !important}
+
+/* ── Badges ──────────────────────────────────────────────────────── */
+.badge{display:inline-flex;align-items:center;padding:.15rem .45rem;font-size:11px;font-weight:700;border-radius:4px;text-transform:uppercase;letter-spacing:.04em}
+.badge-admin{background:var(--accent-s);color:#A5B4FC;border:1px solid rgba(99,102,241,.2)}
+.badge-user{background:var(--raised);color:var(--text-3);border:1px solid var(--border)}
+.badge-ok{background:rgba(52,211,153,.08);color:var(--green);border:1px solid rgba(52,211,153,.2)}
+
+/* ── Modals ──────────────────────────────────────────────────────── */
+.modal{display:none;position:fixed;inset:0;background:rgba(0,0,0,.6);backdrop-filter:blur(2px);z-index:500;align-items:center;justify-content:center}
+.modal.show{display:flex}
+.modal-box{background:var(--card);border:1px solid var(--border-2);border-radius:10px;width:90%;max-width:440px;max-height:90vh;overflow-y:auto;animation:pop .12s ease}
+@keyframes pop{from{opacity:0;transform:scale(.97) translateY(-4px)}to{opacity:1;transform:none}}
+.modal-head{display:flex;justify-content:space-between;align-items:center;padding:.875rem 1.125rem;border-bottom:1px solid var(--border)}
+.modal-title{font-size:14px;font-weight:700}
+.modal-close{background:none;border:none;color:var(--text-3);cursor:pointer;font-size:1.125rem;line-height:1;padding:.125rem;border-radius:3px;transition:color .1s}
+.modal-close:hover{color:var(--text)}
+.modal-body{padding:1.125rem}
+
+/* ── Tabs ────────────────────────────────────────────────────────── */
+.tabs{display:flex;gap:1px;border-bottom:1px solid var(--border);margin-bottom:1.25rem}
+.tab{padding:.5rem .875rem;font-size:13px;font-weight:500;color:var(--text-2);background:none;border:none;cursor:pointer;border-bottom:2px solid transparent;margin-bottom:-1px;transition:color .1s}
+.tab:hover{color:var(--text)}
+.tab.active{color:var(--text);border-bottom-color:var(--accent)}
+.tab-panel{display:none}
+.tab-panel.active{display:block}
+
+/* ── Token box ───────────────────────────────────────────────────── */
+.token-box{display:flex;align-items:center;gap:.625rem;padding:.625rem .875rem;background:var(--raised);border:1px solid var(--border);border-radius:6px;margin:.875rem 0}
+.token-box code{flex:1;font-family:var(--mono);font-size:11.5px;color:var(--accent);word-break:break-all}
+
+/* ── Chat ────────────────────────────────────────────────────────── */
+.chat-wrap{display:flex;flex-direction:column;height:calc(100vh - 44px - 2rem);background:var(--card);border:1px solid var(--border);border-radius:8px;overflow:hidden}
+.chat-bar{display:flex;justify-content:space-between;align-items:center;padding:.625rem 1rem;border-bottom:1px solid var(--border);flex-shrink:0;gap:.75rem}
+.chat-bar h2{font-size:13.5px;font-weight:700}
+.chat-controls{display:flex;gap:.5rem;align-items:center}
+.chat-messages{flex:1;overflow-y:auto;padding:1.125rem;scroll-behavior:smooth}
+.chat-empty{text-align:center;padding:4rem 1rem;color:var(--text-3)}
+.chat-empty h3{font-size:1rem;font-weight:600;color:var(--text-2);margin-bottom:.35rem}
+.msg{display:flex;gap:.625rem;margin-bottom:1rem}
+.msg-av{width:24px;height:24px;border-radius:5px;display:flex;align-items:center;justify-content:center;font-size:9px;font-weight:700;flex-shrink:0;margin-top:1px;font-family:var(--mono)}
+.msg-av.user{background:rgba(99,102,241,.15);color:var(--accent);border:1px solid rgba(99,102,241,.2)}
+.msg-av.ai{background:var(--raised);color:var(--text-2);border:1px solid var(--border)}
+.msg-body{flex:1}
+.msg-meta{font-size:11px;color:var(--text-3);margin-bottom:.25rem;font-family:var(--mono)}
+.msg-text{background:var(--raised);border:1px solid var(--border);border-radius:6px;padding:.5rem .75rem;font-size:13.5px;line-height:1.6;color:var(--text);word-wrap:break-word}
+.msg.user .msg-text{background:rgba(99,102,241,.06);border-color:rgba(99,102,241,.15)}
+.chat-foot{padding:.75rem 1rem;border-top:1px solid var(--border);flex-shrink:0}
+.chat-input-row{display:flex;gap:.5rem;align-items:flex-end}
+.chat-textarea{flex:1;padding:.5rem .75rem;background:var(--raised);border:1px solid var(--border);border-radius:6px;color:var(--text);font-size:13.5px;resize:none;min-height:38px;max-height:140px;line-height:1.5;transition:border-color .1s}
+.chat-textarea:focus{outline:none;border-color:var(--accent)}
+.chat-hint{font-size:11px;color:var(--text-3);margin-top:.375rem}
+
+/* ── Login ───────────────────────────────────────────────────────── */
+.login-wrap{min-height:100vh;display:flex;align-items:center;justify-content:center;background:var(--bg);padding:1.5rem}
+.login-card{width:100%;max-width:360px;background:var(--card);border:1px solid var(--border-2);border-radius:10px;padding:2rem}
+.login-logo{display:flex;align-items:center;gap:.625rem;margin-bottom:1.75rem}
+.login-mark{width:30px;height:30px;background:var(--accent);border-radius:7px;display:flex;align-items:center;justify-content:center;font-size:11px;font-weight:700;color:#fff}
+.login-logo-text h1{font-size:1.0625rem;font-weight:700;letter-spacing:-.01em}
+.login-logo-text p{font-size:11.5px;color:var(--text-2)}
+.login-footer{margin-top:1.25rem;text-align:center;font-size:11.5px;color:var(--text-3);font-family:var(--mono)}
+
+/* ── Centered form (change pw, etc.) ─────────────────────────────── */
+.centered-wrap{min-height:calc(100vh - 44px);display:flex;align-items:center;justify-content:center;padding:1.5rem}
+.centered-card{width:100%;max-width:400px;background:var(--card);border:1px solid var(--border);border-radius:10px;padding:1.75rem}
+.centered-card h1{font-size:1.0625rem;font-weight:700;margin-bottom:.375rem}
+.centered-card .sub{font-size:12.5px;color:var(--text-2);margin-bottom:1.5rem}
+
+/* ── Search bar ──────────────────────────────────────────────────── */
+.search-bar{display:flex;gap:.5rem;margin-bottom:1rem}
+.search-bar .form-input{flex:1}
+
+/* ── Divider ─────────────────────────────────────────────────────── */
+hr{border:none;border-top:1px solid var(--border);margin:1.125rem 0}
+
+/* ── Utils ───────────────────────────────────────────────────────── */
+.mono{font-family:var(--mono)}
+.muted{color:var(--text-3)}
+.dim{color:var(--text-2)}
+.small{font-size:12.5px}
+.text-green{color:var(--green)}
+.text-red{color:var(--red)}
+.text-amber{color:var(--amber)}
+.flex{display:flex}.items-center{align-items:center}.gap-2{gap:.5rem}.mb-0{margin-bottom:0!important}
+
+/* ── Responsive ──────────────────────────────────────────────────── */
+@media(max-width:640px){
+  .topnav-inner{padding:0 1rem}
+  .nav-links{gap:0}
+  .nav-link{padding:.3rem .5rem;font-size:12.5px}
+  .container{padding:1.25rem 1rem}
 }
--- a/codai/admin/templates/base.html
+++ b/codai/admin/templates/base.html
 <!DOCTYPE html>
 <html lang="en">
 <head>
-    <meta charset="UTF-8">
-    <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>{% block title %}CoderAI Admin{% endblock %}</title>
-    <link rel="stylesheet" href="/static/admin/style.css">
+  <meta charset="UTF-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1.0">
+  <title>{% block title %}CoderAI{% endblock %}</title>
+  <link rel="preconnect" href="https://fonts.googleapis.com">
+  <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
+  <link href="https://fonts.googleapis.com/css2?family=Plus+Jakarta+Sans:wght@400;500;600;700&family=JetBrains+Mono:wght@400;500&display=swap" rel="stylesheet">
+  <link rel="stylesheet" href="/static/admin/style.css">
+  {% block head %}{% endblock %}
 </head>
 <body>
-    {% if username %}
-    <div class="layout">
-        <aside class="sidebar">
-            <div class="logo">
-                <h1>CoderAI</h1>
-            </div>
-            <nav class="nav">
-                <a href="/admin" class="nav-item {% if request.url.path == '/admin' %}active{% endif %}">
-                    <span class="icon">📊</span>
-                    <span>Overview</span>
-                </a>
-                {% if is_admin %}
-                <a href="/admin/models" class="nav-item {% if '/models' in request.url.path %}active{% endif %}">
-                    <span class="icon">🤖</span>
-                    <span>Models</span>
-                </a>
-                <a href="/admin/tokens" class="nav-item {% if '/tokens' in request.url.path %}active{% endif %}">
-                    <span class="icon">🔑</span>
-                    <span>API Tokens</span>
-                </a>
-                <a href="/admin/users" class="nav-item {% if '/users' in request.url.path %}active{% endif %}">
-                    <span class="icon">👥</span>
-                    <span>Users</span>
-                </a>
-                {% endif %}
-                <a href="/chat" class="nav-item {% if '/chat' in request.url.path %}active{% endif %}">
-                    <span class="icon">💬</span>
-                    <span>Chat</span>
-                </a>
-            </nav>
-            <div class="sidebar-footer">
-                <div class="user-info">
-                    <span class="icon">👤</span>
-                    <span>{{ username }}</span>
-                </div>
-                <a href="/logout" class="logout-btn">Logout</a>
-            </div>
-        </aside>
-        <main class="main-content">
-            <div class="content-wrapper">
-                {% block content %}{% endblock %}
-            </div>
-        </main>
+
+{% if username %}
+<nav class="topnav">
+  <div class="topnav-inner">
+    <div class="topnav-left">
+      <a href="/admin" class="nav-logo">
+        <div class="nav-logo-mark">AI</div>
+        <span class="nav-logo-name">CoderAI</span>
+      </a>
+      <div class="nav-links">
+        <a href="/admin" class="nav-link {% if request.url.path == '/admin' %}active{% endif %}">Overview</a>
+        <a href="/chat" class="nav-link {% if request.url.path == '/chat' %}active{% endif %}">Chat</a>
+        {% if is_admin|default(false) %}
+        <a href="/admin/models" class="nav-link {% if '/models' in request.url.path %}active{% endif %}">Models</a>
+        <a href="/admin/tokens" class="nav-link {% if '/tokens' in request.url.path %}active{% endif %}">Tokens</a>
+        <a href="/admin/users" class="nav-link {% if '/users' in request.url.path %}active{% endif %}">Users</a>
+        <a href="/admin/settings" class="nav-link {% if '/settings' in request.url.path %}active{% endif %}">Settings</a>
+        {% endif %}
+      </div>
    </div>
-    {% else %}
-    <div class="content-wrapper">
-        {% block content %}{% endblock %}
+    <div class="topnav-right">
+      <span class="nav-username">{{ username }}</span>
+      <div class="nav-sep"></div>
+      <a href="/logout" class="nav-logout">Sign out</a>
    </div>
-    {% endif %}
-    
-    {% block scripts %}{% endblock %}
+  </div>
+</nav>
+<main class="main">
+{% endif %}
+
+<div class="{% block wrapper_class %}container{% endblock %}">
+  {% block content %}{% endblock %}
+</div>
+
+{% if username %}
+</main>
+{% endif %}
+
+{% block scripts %}{% endblock %}
 </body>
 </html>
--- a/codai/admin/templates/change_password.html
+++ b/codai/admin/templates/change_password.html
 {% extends "base.html" %}
-
-{% block title %}Change Password - CoderAI{% endblock %}
+{% block title %}Change Password — CoderAI{% endblock %}

 {% block content %}
-<div class="page-header">
+<div class="centered-wrap">
+  <div class="centered-card">
    <h1>Change Password</h1>
-    {% if must_change %}
-    <p class="text-warning">You must change your password before continuing.</p>
-    {% endif %}
-</div>
+    <p class="sub">
+      {% if must_change %}You must set a new password before continuing.
+      {% else %}Update your account password.{% endif %}
+    </p>

-{% if error %}
-<div class="alert alert-error">
-    {{ error }}
-</div>
-{% endif %}
+    {% if error %}
+    <div class="alert alert-error">{{ error }}</div>
+    {% endif %}

-<div class="card">
-    <form method="post" action="/admin/change-password" class="form">
+    <form method="post" action="/admin/change-password">
+      {% if not must_change %}
+      <div class="form-row">
+        <label class="form-label" for="old_password">Current Password</label>
+        <input class="form-input" type="password" id="old_password" name="old_password"
+               placeholder="••••••••" required autocomplete="current-password">
+      </div>
+      {% endif %}
+      <div class="form-row">
+        <label class="form-label" for="new_password">New Password</label>
+        <input class="form-input" type="password" id="new_password" name="new_password"
+               placeholder="••••••••" required minlength="8" autocomplete="new-password">
+        <span class="form-hint">At least 8 characters</span>
+      </div>
+      <div class="form-row">
+        <label class="form-label" for="confirm_password">Confirm Password</label>
+        <input class="form-input" type="password" id="confirm_password" name="confirm_password"
+               placeholder="••••••••" required minlength="8" autocomplete="new-password">
+      </div>
+      <div class="form-actions">
+        <button type="submit" class="btn btn-primary">Update password</button>
        {% if not must_change %}
-        <div class="form-group">
-            <label for="old_password">Current Password</label>
-            <input type="password" id="old_password" name="old_password" required>
-        </div>
+        <a href="/admin" class="btn btn-ghost">Cancel</a>
        {% endif %}
-        
-        <div class="form-group">
-            <label for="new_password">New Password</label>
-            <input type="password" id="new_password" name="new_password" required minlength="8">
-            <small class="form-text">Minimum 8 characters</small>
-        </div>
-        
-        <div class="form-group">
-            <label for="confirm_password">Confirm New Password</label>
-            <input type="password" id="confirm_password" name="confirm_password" required minlength="8">
-        </div>
-        
-        <div class="form-actions">
-            <button type="submit" class="btn btn-primary">Change Password</button>
-            {% if not must_change %}
-            <a href="/admin" class="btn btn-secondary">Cancel</a>
-            {% endif %}
-        </div>
+      </div>
    </form>
+  </div>
 </div>
 {% endblock %}
--- a/codai/admin/templates/chat.html
+++ b/codai/admin/templates/chat.html
 {% extends "base.html" %}
-
-{% block title %}Chat - CoderAI{% endblock %}
+{% block title %}Chat — CoderAI{% endblock %}
+{% block wrapper_class %}{% endblock %}

 {% block content %}
-<div class="chat-container">
-    <div class="chat-header">
-        <div class="chat-title">
-            <h2>Chat</h2>
-        </div>
-        <div class="chat-controls">
-            <select id="model-selector" class="form-control">
-                <option value="">Select a model...</option>
-            </select>
-            <button class="btn btn-secondary" onclick="newChat()">New Chat</button>
-        </div>
+<div class="chat-wrap" style="margin:0 1.5rem 1rem;border-radius:8px">
+  <div class="chat-bar">
+    <h2>Chat</h2>
+    <div class="chat-controls">
+      <select id="model-sel" class="form-input" style="font-size:13px;padding:.3rem .625rem;min-width:200px">
+        <option value="">Select model…</option>
+      </select>
+      <button class="btn btn-ghost btn-sm" onclick="newChat()">Clear</button>
    </div>
-    
-    <div class="chat-messages" id="chat-messages">
-        <div class="welcome-message">
-            <h3>Welcome to CoderAI Chat</h3>
-            <p>Select a model and start chatting</p>
-        </div>
+  </div>
+
+  <div class="chat-messages" id="chat-msgs">
+    <div class="chat-empty">
+      <h3>CoderAI Chat</h3>
+      <p>Select a model and start typing</p>
    </div>
-    
-    <div class="chat-input-container">
-        <form id="chat-form" class="chat-input-form">
-            <textarea id="chat-input" class="chat-input" 
-                      placeholder="Type your message..." 
-                      rows="3"></textarea>
-            <button type="submit" class="btn btn-primary" id="send-btn">Send</button>
-        </form>
+  </div>
+
+  <div class="chat-foot">
+    <div id="typing" style="font-size:11px;color:var(--text-3);height:14px;margin-bottom:.3rem;font-family:var(--mono)"></div>
+    <div class="chat-input-row">
+      <textarea id="chat-in" class="chat-textarea" placeholder="Send a message…" rows="1"></textarea>
+      <button class="btn btn-primary" id="send-btn" onclick="send()" style="padding:.5rem .75rem;align-self:flex-end">
+        <svg viewBox="0 0 16 16" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" style="width:13px;height:13px"><line x1="14" y1="2" x2="7" y2="9"/><polygon points="14 2 10 14 7 9 2 6 14 2"/></svg>
+      </button>
    </div>
+    <div class="chat-hint">Enter to send · Shift+Enter for newline</div>
+  </div>
 </div>
 {% endblock %}

 {% block scripts %}
 <script>
-let currentModel = null;
-let messages = [];
+let history = [];
+let busy = false;

 async function loadModels() {
-    try {
-        const response = await fetch('/v1/models');
-        const data = await response.json();
-        
-        const selector = document.getElementById('model-selector');
-        selector.innerHTML = '<option value="">Select a model...</option>';
-        
-        data.data.forEach(model => {
-            const option = document.createElement('option');
-            option.value = model.id;
-            option.textContent = model.id;
-            selector.appendChild(option);
-        });
-    } catch (error) {
-        console.error('Failed to load models:', error);
-    }
+  try {
+    const d = await fetch('/v1/models').then(r => r.json());
+    const sel = document.getElementById('model-sel');
+    sel.innerHTML = '<option value="">Select model…</option>';
+    (d.data || []).forEach(m => {
+      const o = document.createElement('option');
+      o.value = o.textContent = m.id;
+      sel.appendChild(o);
+    });
+  } catch {}
 }

-document.getElementById('model-selector').addEventListener('change', (e) => {
-    currentModel = e.target.value;
-});
-
 function newChat() {
-    messages = [];
-    document.getElementById('chat-messages').innerHTML = `
-        <div class="welcome-message">
-            <h3>New Chat Started</h3>
-            <p>Select a model and start chatting</p>
-        </div>
-    `;
+  history = [];
+  document.getElementById('chat-msgs').innerHTML = '<div class="chat-empty"><h3>New conversation</h3><p>Start typing below</p></div>';
 }

-function addMessage(role, content) {
-    const messagesDiv = document.getElementById('chat-messages');
-    
-    // Remove welcome message if present
-    const welcome = messagesDiv.querySelector('.welcome-message');
-    if (welcome) {
-        welcome.remove();
-    }
-    
-    const messageDiv = document.createElement('div');
-    messageDiv.className = `message message-${role}`;
-    
-    const avatar = document.createElement('div');
-    avatar.className = 'message-avatar';
-    avatar.textContent = role === 'user' ? '👤' : '🤖';
-    
-    const contentDiv = document.createElement('div');
-    contentDiv.className = 'message-content';
-    contentDiv.textContent = content;
-    
-    messageDiv.appendChild(avatar);
-    messageDiv.appendChild(contentDiv);
-    messagesDiv.appendChild(messageDiv);
-    
-    // Scroll to bottom
-    messagesDiv.scrollTop = messagesDiv.scrollHeight;
+function addMsg(role, text) {
+  const wrap = document.getElementById('chat-msgs');
+  wrap.querySelector('.chat-empty')?.remove();
+  const t = new Date().toLocaleTimeString([],{hour:'2-digit',minute:'2-digit'});
+  const d = document.createElement('div');
+  d.className = 'msg ' + role;
+  d.innerHTML = `
+    <div class="msg-av ${role === 'user' ? 'user' : 'ai'}">${role === 'user' ? 'YOU' : 'AI'}</div>
+    <div class="msg-body">
+      <div class="msg-meta">${role === 'user' ? 'You' : 'Assistant'} · ${t}</div>
+      <div class="msg-text">${String(text).replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;').replace(/\n/g,'<br>')}</div>
+    </div>`;
+  wrap.appendChild(d);
+  wrap.scrollTop = wrap.scrollHeight;
 }

-document.getElementById('chat-form').addEventListener('submit', async (e) => {
-    e.preventDefault();
-    
-    if (!currentModel) {
-        alert('Please select a model first');
-        return;
-    }
-    
-    const input = document.getElementById('chat-input');
-    const message = input.value.trim();
-    
-    if (!message) {
-        return;
-    }
-    
-    // Add user message
-    addMessage('user', message);
-    messages.push({ role: 'user', content: message });
-    
-    // Clear input
-    input.value = '';
-    
-    // Disable send button
-    const sendBtn = document.getElementById('send-btn');
-    sendBtn.disabled = true;
-    sendBtn.textContent = 'Sending...';
-    
-    try {
-        const response = await fetch('/v1/chat/completions', {
-            method: 'POST',
-            headers: {
-                'Content-Type': 'application/json'
-            },
-            body: JSON.stringify({
-                model: currentModel,
-                messages: messages,
-                stream: false
-            })
-        });
-        
-        if (!response.ok) {
-            throw new Error('Request failed');
-        }
-        
-        const data = await response.json();
-        const assistantMessage = data.choices[0].message.content;
-        
-        addMessage('assistant', assistantMessage);
-        messages.push({ role: 'assistant', content: assistantMessage });
-        
-    } catch (error) {
-        addMessage('assistant', 'Error: ' + error.message);
-    } finally {
-        sendBtn.disabled = false;
-        sendBtn.textContent = 'Send';
-    }
-});
+async function send() {
+  if (busy) return;
+  const model = document.getElementById('model-sel').value;
+  if (!model) { document.getElementById('model-sel').focus(); return; }
+  const input = document.getElementById('chat-in');
+  const text = input.value.trim();
+  if (!text) return;

-// Handle Enter key (Shift+Enter for new line)
-document.getElementById('chat-input').addEventListener('keydown', (e) => {
-    if (e.key === 'Enter' && !e.shiftKey) {
-        e.preventDefault();
-        document.getElementById('chat-form').dispatchEvent(new Event('submit'));
-    }
+  addMsg('user', text);
+  history.push({role:'user', content:text});
+  input.value = '';
+  input.style.height = 'auto';
+
+  busy = true;
+  document.getElementById('send-btn').disabled = true;
+  document.getElementById('typing').textContent = 'Assistant is typing…';
+
+  try {
+    const r = await fetch('/v1/chat/completions', {
+      method:'POST', headers:{'Content-Type':'application/json'},
+      body: JSON.stringify({model, messages: history, stream:false})
+    });
+    if (!r.ok) throw new Error('HTTP ' + r.status);
+    const d = await r.json();
+    const reply = d.choices[0].message.content;
+    addMsg('assistant', reply);
+    history.push({role:'assistant', content:reply});
+  } catch (e) {
+    addMsg('assistant', 'Error: ' + e.message);
+  } finally {
+    busy = false;
+    document.getElementById('send-btn').disabled = false;
+    document.getElementById('typing').textContent = '';
+  }
+}
+
+document.getElementById('chat-in').addEventListener('keydown', e => {
+  if (e.key === 'Enter' && !e.shiftKey) { e.preventDefault(); send(); }
+});
+document.getElementById('chat-in').addEventListener('input', function() {
+  this.style.height = 'auto';
+  this.style.height = Math.min(this.scrollHeight, 140) + 'px';
 });

-// Load models on page load
 loadModels();
 </script>
 {% endblock %}
--- a/codai/admin/templates/dashboard.html
+++ b/codai/admin/templates/dashboard.html
 {% extends "base.html" %}
-
-{% block title %}Dashboard - CoderAI{% endblock %}
+{% block title %}Overview — CoderAI{% endblock %}

 {% block content %}
 <div class="page-header">
+  <div>
    <h1>Overview</h1>
-    <div class="header-actions">
-        <button class="btn btn-secondary" onclick="reloadConfig()">Reload Config</button>
-    </div>
+    <p>System status</p>
+  </div>
+  <div class="header-actions">
+    <span class="live" id="live-label">Live</span>
+  </div>
 </div>

-<div class="dashboard-grid">
-    <div class="card">
-        <h3>System Status</h3>
-        <div class="status-grid">
-            <div class="status-item">
-                <span class="label">Backend:</span>
-                <span class="value" id="backend">Loading...</span>
-            </div>
-            <div class="status-item">
-                <span class="label">GPU:</span>
-                <span class="value" id="gpu">Loading...</span>
-            </div>
-            <div class="status-item">
-                <span class="label">Uptime:</span>
-                <span class="value" id="uptime">Loading...</span>
-            </div>
-            <div class="status-item">
-                <span class="label">Status:</span>
-                <span class="value status-ok" id="status">OK</span>
-            </div>
-        </div>
-    </div>
-    
-    <div class="card">
-        <h3>Active Models</h3>
-        <div id="active-models">
-            <p class="text-muted">No models loaded</p>
-        </div>
-        {% if is_admin %}
-        <a href="/admin/models" class="btn btn-primary btn-sm">Manage Models</a>
-        {% endif %}
+<div class="stat-grid">
+  <div class="stat">
+    <div class="stat-label">Status</div>
+    <div class="stat-value small" id="sys-status" style="font-size:1.125rem">—</div>
+    <div class="stat-sub" id="sys-backend">loading…</div>
+  </div>
+  <div class="stat">
+    <div class="stat-label">Models Loaded</div>
+    <div class="stat-value" id="models-count">—</div>
+    <div class="stat-sub" id="models-mode">—</div>
+  </div>
+  <div class="stat">
+    <div class="stat-label">Requests</div>
+    <div class="stat-value" id="req-total">0</div>
+    <div class="stat-sub"><span id="req-active">0</span> active</div>
+  </div>
+  <div class="stat">
+    <div class="stat-label">VRAM</div>
+    <div class="stat-value" id="vram-pct">—</div>
+    <div class="progress" style="margin-top:.625rem">
+      <div class="progress-fill" id="vram-bar" style="width:0%"></div>
    </div>
-    
-    <div class="card">
-        <h3>Request Stats</h3>
-        <div class="stats-grid">
-            <div class="stat-item">
-                <div class="stat-value" id="total-requests">0</div>
-                <div class="stat-label">Total Requests</div>
-            </div>
-            <div class="stat-item">
-                <div class="stat-value" id="active-requests">0</div>
-                <div class="stat-label">Active</div>
-            </div>
-            <div class="stat-item">
-                <div class="stat-value" id="queued-requests">0</div>
-                <div class="stat-label">Queued</div>
-            </div>
-        </div>
-    </div>
-    
-    <div class="card">
-        <h3>VRAM Usage</h3>
-        <div class="progress-bar">
-            <div class="progress-fill" id="vram-progress" style="width: 0%"></div>
-        </div>
-        <p class="text-muted" id="vram-text">0 GB / 0 GB (0%)</p>
+    <div class="progress-labels">
+      <span id="vram-used">—</span><span id="vram-total">—</span>
    </div>
+  </div>
 </div>

-<div class="card">
-    <h3>Recent Activity</h3>
-    <div class="table-responsive">
-        <table class="table">
-            <thead>
-                <tr>
-                    <th>Time</th>
-                    <th>Model</th>
-                    <th>Type</th>
-                    <th>Status</th>
-                    <th>Duration</th>
-                </tr>
-            </thead>
-            <tbody id="activity-table">
-                <tr>
-                    <td colspan="5" class="text-center text-muted">No recent activity</td>
-                </tr>
-            </tbody>
-        </table>
-    </div>
+<div class="card" style="margin-bottom:1rem">
+  <div class="card-title">Models</div>
+  <div id="active-models"><span class="muted small">No models loaded</span></div>
+  {% if is_admin %}
+  <div style="margin-top:.875rem">
+    <a href="/admin/models" class="btn btn-ghost btn-sm">Manage models</a>
+  </div>
+  {% endif %}
+</div>
+
+<div class="card mb-0">
+  <div class="card-title">Recent Activity</div>
+  <div class="table-wrap" style="border:none">
+    <table>
+      <thead><tr><th>Time</th><th>Model</th><th>Type</th><th>Status</th><th>Duration</th></tr></thead>
+      <tbody id="activity-body">
+        <tr class="empty-row"><td colspan="5">No recent activity</td></tr>
+      </tbody>
+    </table>
+  </div>
 </div>
 {% endblock %}

 {% block scripts %}
 <script>
-async function loadStatus() {
-    try {
-        const response = await fetch('/admin/api/status');
-        const data = await response.json();
-        
-        document.getElementById('backend').textContent = data.backend || 'auto';
-        document.getElementById('uptime').textContent = data.uptime || '0h 0m';
-        document.getElementById('status').textContent = data.status === 'ok' ? 'OK' : 'Error';
-        
-        // Update models loaded count
-        if (data.models_loaded > 0) {
-            document.getElementById('active-models').innerHTML = 
-                `<p>${data.models_loaded} model(s) loaded</p>`;
-        }
-    } catch (error) {
-        console.error('Failed to load status:', error);
+async function poll() {
+  try {
+    const d = await fetch('/admin/api/status').then(r => r.json());
+    const ok = d.status === 'ok';
+    document.getElementById('sys-status').textContent = ok ? 'Online' : 'Error';
+    document.getElementById('sys-status').className = 'stat-value small ' + (ok ? 'text-green' : 'text-red');
+    document.getElementById('sys-backend').textContent = d.backend || d.load_mode || '—';
+    document.getElementById('models-count').textContent = d.models_loaded ?? '—';
+    document.getElementById('models-mode').textContent = d.load_mode ? d.load_mode + ' mode' : '';
+
+    const loaded = d.loaded_models || [];
+    const enabled = d.enabled_models || [];
+    const loadedSet = new Set(loaded);
+    const notLoaded = enabled.filter(m => !loadedSet.has(m));
+    let html = '';
+    if(loaded.length) html += loaded.map(m => `<span class="badge badge-admin" style="margin:.125rem" title="Loaded">● ${m}</span>`).join('');
+    if(notLoaded.length) html += notLoaded.map(m => `<span class="badge" style="margin:.125rem;opacity:.55" title="Enabled, not loaded">○ ${m}</span>`).join('');
+    document.getElementById('active-models').innerHTML = html || '<span class="muted small">No models loaded</span>';
+
+    if (d.vram) {
+      const pct = Math.round(d.vram.used / d.vram.total * 100);
+      document.getElementById('vram-pct').textContent = pct + '%';
+      document.getElementById('vram-bar').style.width = pct + '%';
+      document.getElementById('vram-used').textContent = d.vram.used.toFixed(1) + ' GB';
+      document.getElementById('vram-total').textContent = d.vram.total.toFixed(1) + ' GB';
+    } else {
+      document.getElementById('vram-pct').textContent = 'N/A';
    }
-}

-async function reloadConfig() {
-    if (confirm('Reload configuration from disk? This will not restart the server.')) {
-        try {
-            const response = await fetch('/admin/api/system/reload', { method: 'POST' });
-            if (response.ok) {
-                alert('Configuration reloaded successfully');
-                loadStatus();
-            } else {
-                alert('Failed to reload configuration');
-            }
-        } catch (error) {
-            alert('Error: ' + error.message);
-        }
+    if (d.requests) {
+      document.getElementById('req-total').textContent = d.requests.total ?? 0;
+      document.getElementById('req-active').textContent = d.requests.active ?? 0;
    }
+  } catch {
+    document.getElementById('sys-status').textContent = 'Offline';
+    document.getElementById('sys-status').className = 'stat-value small text-red';
+  }
 }
-
-// Load status on page load
-loadStatus();
-
-// Refresh status every 5 seconds
-setInterval(loadStatus, 5000);
+poll();
+setInterval(poll, 7000);
 </script>
 {% endblock %}
--- a/codai/admin/templates/login.html
+++ b/codai/admin/templates/login.html
-{% extends "base.html" %}
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1.0">
+  <title>Sign in — CoderAI</title>
+  <link rel="preconnect" href="https://fonts.googleapis.com">
+  <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
+  <link href="https://fonts.googleapis.com/css2?family=Plus+Jakarta+Sans:wght@400;500;600;700&family=JetBrains+Mono:wght@400;500&display=swap" rel="stylesheet">
+  <link rel="stylesheet" href="/static/admin/style.css">
+</head>
+<body>
+<div class="login-wrap">
+  <div class="login-card">
+    <div class="login-logo">
+      <div class="login-mark">AI</div>
+      <div class="login-logo-text">
+        <h1>CoderAI</h1>
+        <p>Local AI Server</p>
+      </div>
+    </div>

-{% block title %}Login - CoderAI{% endblock %}
+    {% if error %}
+    <div class="alert alert-error" style="margin-bottom:1.25rem">{{ error }}</div>
+    {% endif %}

-{% block content %}
-<div class="login-container">
-    <div class="login-box">
-        <div class="login-header">
-            <h1>CoderAI</h1>
-            <p>Admin Dashboard</p>
-        </div>
-        
-        {% if error %}
-        <div class="alert alert-error">
-            {{ error }}
-        </div>
-        {% endif %}
-        
-        <form method="post" action="/login" class="login-form">
-            <div class="form-group">
-                <label for="username">Username</label>
-                <input type="text" id="username" name="username" required autofocus>
-            </div>
-            
-            <div class="form-group">
-                <label for="password">Password</label>
-                <input type="password" id="password" name="password" required>
-            </div>
-            
-            <button type="submit" class="btn btn-primary btn-block">Login</button>
-        </form>
-        
-        <div class="login-footer">
-            <p class="text-muted">Default credentials: admin / admin</p>
-        </div>
-    </div>
+    <form method="post" action="/login">
+      <div class="form-row">
+        <label class="form-label" for="username">Username</label>
+        <input class="form-input" type="text" id="username" name="username"
+               placeholder="admin" required autofocus autocomplete="username">
+      </div>
+      <div class="form-row">
+        <label class="form-label" for="password">Password</label>
+        <input class="form-input" type="password" id="password" name="password"
+               placeholder="••••••••" required autocomplete="current-password">
+      </div>
+      <button type="submit" class="btn btn-primary" style="width:100%;justify-content:center;margin-top:.875rem;padding:.5625rem">
+        Sign in
+      </button>
+    </form>
+
+    <div class="login-footer">default: admin / admin</div>
+  </div>
 </div>
-{% endblock %}
+</body>
+</html>
--- a/codai/admin/templates/models.html
+++ b/codai/admin/templates/models.html
 {% extends "base.html" %}
+{% block title %}Models — CoderAI{% endblock %}

-{% block title %}Models - CoderAI{% endblock %}
+{% block head %}
+<style>
+/* chips */
+.chip{display:inline-flex;align-items:center;padding:.2rem .55rem;border-radius:20px;cursor:pointer;background:var(--raised);border:1px solid var(--border);font-size:11px;font-weight:500;user-select:none;white-space:nowrap;transition:background .1s,border-color .1s,color .1s}
+.chip:hover{border-color:var(--border-2)}
+.chip.on{background:var(--accent-s);border-color:rgba(99,102,241,.35);color:#A5B4FC}
+.chip-row{display:flex;flex-wrap:wrap;gap:.3rem;align-items:center}
+/* gguf 3-way toggle */
+.tog-grp{display:inline-flex;border:1px solid var(--border);border-radius:6px;overflow:hidden}
+.tog-btn{border:none!important;border-radius:0!important;background:var(--raised)!important;color:var(--text-2)!important;font-size:11px!important;padding:.25rem .65rem!important;cursor:pointer;font-weight:500;transition:all .1s}
+.tog-btn:hover{color:var(--text)!important}
+.tog-btn.on{background:var(--accent-s)!important;color:#A5B4FC!important}
+.tog-btn+.tog-btn{border-left:1px solid var(--border)!important}
+/* filter label */
+.fl{font-size:10px;font-weight:700;text-transform:uppercase;letter-spacing:.06em;color:var(--text-2);white-space:nowrap}
+/* info drawer */
+#info-overlay{display:none;position:fixed;inset:0;background:rgba(0,0,0,.45);z-index:200}
+#info-drawer{position:fixed;top:0;right:0;width:540px;max-width:96vw;height:100vh;background:var(--bg);border-left:1px solid var(--border);z-index:201;overflow-y:auto;transform:translateX(100%);transition:transform .22s cubic-bezier(.4,0,.2,1)}
+#info-drawer.open{transform:translateX(0)}
+#info-sticky{position:sticky;top:0;background:var(--bg);border-bottom:1px solid var(--border);z-index:1;padding:1rem 1.25rem;display:flex;align-items:center;gap:.75rem}
+#info-title{font-weight:600;font-size:14px;flex:1;overflow:hidden;text-overflow:ellipsis;white-space:nowrap}
+</style>
+{% endblock %}

 {% block content %}
 <div class="page-header">
+  <div>
    <h1>Models</h1>
-    <div class="header-actions">
-        <button class="btn btn-primary" onclick="showDownloadModal()">Download Model</button>
-    </div>
+    <p>Configure and download AI models</p>
+  </div>
+  <div class="header-actions">
+    <button class="btn btn-secondary" onclick="openModal('upload-modal')">Upload GGUF</button>
+    <button class="btn btn-primary" onclick="openModal('dl-modal')">Download model</button>
+  </div>
 </div>

 <div class="tabs">
-    <button class="tab-btn active" onclick="showTab('local')">Local Models</button>
-    <button class="tab-btn" onclick="showTab('search')">Search HuggingFace</button>
-    <button class="tab-btn" onclick="showTab('config')">Configuration</button>
+  <button class="tab active" onclick="switchTab('local', this)">Local models</button>
+  <button class="tab" onclick="switchTab('search', this)">Find on HuggingFace</button>
 </div>

-<div id="tab-local" class="tab-content active">
-    <div class="card">
-        <h3>Text Models</h3>
-        <div id="text-models-list">
-            <p class="text-muted">No text models configured</p>
-        </div>
+<!-- active downloads strip (all tabs) -->
+<div id="dl-strip" style="display:none;background:var(--raised);border:1px solid var(--border);border-radius:8px;margin-bottom:1rem;padding:.5rem .875rem">
+  <div style="font-size:10px;font-weight:700;text-transform:uppercase;letter-spacing:.06em;color:var(--text-2);margin-bottom:.4rem">Active downloads</div>
+  <div id="dl-strip-list"></div>
+</div>
+
+<!-- LOCAL -->
+<div id="tab-local" class="tab-panel active">
+
+  <!-- cache stats -->
+  <div class="card">
+    <div style="display:flex;align-items:center;justify-content:space-between;flex-wrap:wrap;gap:.5rem;margin-bottom:1rem">
+      <div class="card-title" style="margin:0">Storage</div>
+      <div style="display:flex;gap:.35rem;flex-wrap:wrap">
+        <button class="btn btn-ghost btn-sm" onclick="refreshLocal()">↺ Refresh</button>
+        <button class="btn btn-danger btn-sm" onclick="clearCacheConfirm('hf')">Clear HF</button>
+        <button class="btn btn-danger btn-sm" onclick="clearCacheConfirm('gguf')">Clear GGUF</button>
+        <button class="btn btn-danger btn-sm" onclick="clearCacheConfirm('all')">Clear all</button>
+      </div>
    </div>
-    
-    <div class="card">
-        <h3>Image Models</h3>
-        <div id="image-models-list">
-            <p class="text-muted">No image models configured</p>
-        </div>
+    <div style="display:grid;grid-template-columns:repeat(3,1fr);gap:.75rem" id="cache-stats-row">
+      <div style="background:var(--bg);border:1px solid var(--border);border-radius:6px;padding:.75rem 1rem">
+        <div class="fl">HuggingFace</div>
+        <div id="stat-hf-size" style="font-size:1.2rem;font-weight:600;margin:.3rem 0">—</div>
+        <div id="stat-hf-count" class="muted small">— models</div>
+        <div id="stat-hf-free" class="muted small" style="margin-top:.25rem">— free</div>
+      </div>
+      <div style="background:var(--bg);border:1px solid var(--border);border-radius:6px;padding:.75rem 1rem">
+        <div class="fl">GGUF cache</div>
+        <div id="stat-gguf-size" style="font-size:1.2rem;font-weight:600;margin:.3rem 0">—</div>
+        <div id="stat-gguf-count" class="muted small">— files</div>
+        <div id="stat-gguf-free" class="muted small" style="margin-top:.25rem">— free</div>
+      </div>
+      <div style="background:var(--bg);border:1px solid var(--border);border-radius:6px;padding:.75rem 1rem">
+        <div class="fl">Total used</div>
+        <div id="stat-total-size" style="font-size:1.2rem;font-weight:600;margin:.3rem 0">—</div>
+        <div class="muted small">on disk</div>
+      </div>
    </div>
-    
-    <div class="card">
-        <h3>Audio Models</h3>
-        <div id="audio-models-list">
-            <p class="text-muted">No audio models configured</p>
-        </div>
+  </div>
+
+  <!-- HF models -->
+  <div class="card">
+    <div class="card-title">HuggingFace models <span id="hf-model-badge" class="muted small"></span></div>
+    <div id="hf-models-list"><span class="muted small">Loading…</span></div>
+  </div>
+
+  <!-- GGUF files -->
+  <div class="card mb-0">
+    <div class="card-title">GGUF files <span id="gguf-file-badge" class="muted small"></span></div>
+    <div id="gguf-models-list"><span class="muted small">Loading…</span></div>
+  </div>
+</div>
+
+<!-- SEARCH -->
+<div id="tab-search" class="tab-panel">
+  <div class="card">
+    <div class="card-title">Search HuggingFace</div>
+
+    <!-- query row -->
+    <div class="search-bar" style="margin-bottom:.75rem">
+      <input type="text" id="search-q" class="form-input" placeholder="Search models (e.g. llama, mistral, qwen…)">
+      <button class="btn btn-secondary" onclick="doSearch()">Search</button>
    </div>
-    
-    <div class="card">
-        <h3>GGUF Models</h3>
-        <div id="gguf-models-list">
-            <p class="text-muted">No GGUF models configured</p>
+
+    <!-- filter row 1: toggles + dropdowns -->
+    <div style="display:flex;flex-wrap:wrap;gap:.625rem;align-items:center;margin-bottom:.625rem">
+      <div style="display:flex;align-items:center;gap:.35rem">
+        <span class="fl">Format</span>
+        <div class="tog-grp">
+          <button class="tog-btn on" data-val="gguf">GGUF</button>
+          <button class="tog-btn" data-val="all">All</button>
+          <button class="tog-btn" data-val="no-gguf">No GGUF</button>
        </div>
+      </div>
+      <div style="display:flex;align-items:center;gap:.35rem">
+        <span class="fl">Type</span>
+        <select id="filter-pipeline" class="form-input" style="padding:.25rem .4rem;font-size:12px">
+          <option value="">All</option>
+          <option value="text-generation">Text generation</option>
+          <option value="text2text-generation">Text-to-text</option>
+          <option value="text-to-image">Text-to-image</option>
+          <option value="image-to-text">Image-to-text</option>
+          <option value="automatic-speech-recognition">Speech recog.</option>
+          <option value="text-to-speech">TTS</option>
+          <option value="feature-extraction">Embeddings</option>
+        </select>
+      </div>
+      <div style="display:flex;align-items:center;gap:.35rem">
+        <span class="fl">Arch</span>
+        <select id="filter-arch" class="form-input" style="padding:.25rem .4rem;font-size:12px">
+          <option value="">Any</option>
+          <option value="moe">MoE</option>
+          <option value="lora">LoRA</option>
+        </select>
+      </div>
+      <div style="display:flex;align-items:center;gap:.35rem">
+        <span class="fl">Sort</span>
+        <select id="filter-sort" class="form-input" style="padding:.25rem .4rem;font-size:12px">
+          <option value="downloads">Most downloaded</option>
+          <option value="likes">Most liked</option>
+          <option value="lastModified">Recently updated</option>
+        </select>
+      </div>
    </div>
+
+    <!-- filter row 2: size chips -->
+    <div style="display:flex;align-items:flex-start;gap:.5rem;margin-bottom:.5rem">
+      <span class="fl" style="padding-top:.25rem;min-width:32px">Size</span>
+      <div class="chip-row" id="size-chips">
+        <span class="chip" data-val="0.5b">0.5B</span>
+        <span class="chip" data-val="1b">1B</span>
+        <span class="chip" data-val="1.5b">1.5B</span>
+        <span class="chip" data-val="3b">3B</span>
+        <span class="chip" data-val="4b">4B</span>
+        <span class="chip" data-val="7b">7B</span>
+        <span class="chip" data-val="8b">8B</span>
+        <span class="chip" data-val="9b">9B</span>
+        <span class="chip" data-val="12b">12B</span>
+        <span class="chip" data-val="13b">13B</span>
+        <span class="chip" data-val="14b">14B</span>
+        <span class="chip" data-val="22b">22B</span>
+        <span class="chip" data-val="27b">27B</span>
+        <span class="chip" data-val="32b">32B</span>
+        <span class="chip" data-val="34b">34B</span>
+        <span class="chip" data-val="40b">40B</span>
+        <span class="chip" data-val="70b">70B</span>
+        <span class="chip" data-val="72b">72B</span>
+        <span class="chip" data-val="90b">90B</span>
+        <span class="chip" data-val="123b">123B</span>
+        <span class="chip" data-val="235b">235B</span>
+        <span class="chip" data-val="671b">671B</span>
+      </div>
+    </div>
+
+    <!-- filter row 3: quant chips (file-level filter) -->
+    <div style="display:flex;align-items:flex-start;gap:.5rem;margin-bottom:1rem">
+      <span class="fl" style="padding-top:.25rem;min-width:32px">Quant</span>
+      <div class="chip-row" id="quant-chips">
+        <span class="chip" data-val="Q2_K">Q2_K</span>
+        <span class="chip" data-val="Q3_K_M">Q3_K_M</span>
+        <span class="chip" data-val="Q4_K_S">Q4_K_S</span>
+        <span class="chip" data-val="Q4_K_M">Q4_K_M ★</span>
+        <span class="chip" data-val="Q5_K_S">Q5_K_S</span>
+        <span class="chip" data-val="Q5_K_M">Q5_K_M</span>
+        <span class="chip" data-val="Q6_K">Q6_K</span>
+        <span class="chip" data-val="Q8_0">Q8_0</span>
+        <span class="chip" data-val="F16">F16</span>
+        <span class="chip" data-val="IQ4_XS">IQ4_XS</span>
+        <span class="chip" data-val="IQ3_XXS">IQ3_XXS</span>
+        <span class="chip" data-val="IQ2_XXS">IQ2_XXS</span>
+      </div>
+    </div>
+
+    <div id="search-results"><span class="muted small">Enter a query above to search</span></div>
+  </div>
 </div>

-<div id="tab-search" class="tab-content">
-    <div class="card">
-        <h3>Search HuggingFace Models</h3>
-        <div class="search-form">
-            <input type="text" id="search-query" placeholder="Search models..." class="form-control">
-            <div class="filter-group">
-                <label>
-                    <input type="checkbox" id="filter-gguf" checked>
-                    GGUF only
-                </label>
-                <label>
-                    <input type="checkbox" id="filter-text" checked>
-                    Text models
-                </label>
-                <label>
-                    <input type="checkbox" id="filter-image">
-                    Image models
-                </label>
-            </div>
-            <button class="btn btn-primary" onclick="searchModels()">Search</button>
+
+<!-- Download modal -->
+<div id="dl-modal" class="modal">
+  <div class="modal-box">
+    <div class="modal-head">
+      <span class="modal-title">Download model</span>
+      <button class="modal-close" onclick="closeModal('dl-modal')">×</button>
+    </div>
+    <div class="modal-body">
+      <div id="dl-form">
+        <div class="form-row">
+          <label class="form-label">HuggingFace repo ID or URL</label>
+          <input type="text" id="dl-id" class="form-input" placeholder="e.g. bartowski/Llama-3.1-8B-Instruct-GGUF">
+        </div>
+        <!-- GGUF mode: specific file or pattern -->
+        <div id="dl-pattern-row" class="form-row">
+          <label class="form-label">File / pattern</label>
+          <input type="text" id="dl-pattern" class="form-input" placeholder=".gguf">
+          <span class="form-hint" id="dl-hint">Exact filename (e.g. <code>model-Q4_K_M.gguf</code>) or pattern (<code>.gguf</code>). Leave blank to download the first .gguf found.</span>
+        </div>
+        <!-- Snapshot mode: full repo via HF API -->
+        <div id="dl-snapshot-note" class="alert alert-info" style="display:none">
+          Will download the full repository using the HuggingFace snapshot API. This is the correct method for safetensors / non-GGUF models. Large repos may take a while.
        </div>
-        
-        <div id="search-results" class="search-results">
-            <p class="text-muted">Enter a search query to find models</p>
+        <div class="form-actions">
+          <button class="btn btn-primary" onclick="startDownload()">Download</button>
+          <button class="btn btn-ghost" onclick="closeModal('dl-modal')">Close</button>
        </div>
+      </div>
+      <div id="dl-progress" style="display:none;padding:.25rem 0">
+        <div id="dl-filename" style="font-size:13px;font-weight:500;margin-bottom:.5rem;overflow:hidden;text-overflow:ellipsis;white-space:nowrap">Preparing…</div>
+        <div class="progress" style="margin-top:0;margin-bottom:.35rem;height:5px;background:var(--border-2)"><div id="dl-bar" class="progress-fill" style="width:0%;height:100%"></div></div>
+        <div style="display:flex;justify-content:space-between;font-size:11px;color:var(--text-2);margin-bottom:.75rem">
+          <span id="dl-bytes">—</span>
+          <span id="dl-speed">—</span>
+          <span id="dl-eta">—</span>
+          <span id="dl-pct">0%</span>
+        </div>
+        <div id="dl-log" style="display:none;background:var(--raised);border-radius:6px;padding:.4rem .6rem;font-size:11px;font-family:monospace;color:var(--text-2);max-height:72px;overflow-y:auto"></div>
+      </div>
    </div>
+  </div>
 </div>

-<div id="tab-config" class="tab-content">
-    <div class="card">
-        <h3>Model Loading Configuration</h3>
-        <form id="config-form" class="form">
-            <div class="form-group">
-                <label for="load-mode">Load Mode</label>
-                <select id="load-mode" name="load_mode" class="form-control">
-                    <option value="ondemand">On Demand (default)</option>
-                    <option value="loadall">Load All</option>
-                    <option value="loadswap">Load & Swap</option>
-                </select>
-                <small class="form-text">
-                    On Demand: Load one model at a time<br>
-                    Load All: Try to load all models in VRAM<br>
-                    Load & Swap: Keep models in RAM, swap to VRAM as needed
-                </small>
-            </div>
-            
-            <div class="form-group">
-                <label>Models to Load at Startup</label>
-                <div id="loaded-models-list">
-                    <p class="text-muted">No models selected</p>
-                </div>
-            </div>
-            
-            <div class="form-group">
-                <label>Models to Pre-load (RAM)</label>
-                <div id="preload-models-list">
-                    <p class="text-muted">No models selected</p>
-                </div>
-            </div>
-            
-            <button type="submit" class="btn btn-primary">Save Configuration</button>
-        </form>
+<!-- Upload modal -->
+<div id="upload-modal" class="modal">
+  <div class="modal-box">
+    <div class="modal-head">
+      <span class="modal-title">Upload GGUF model</span>
+      <button class="modal-close" onclick="closeModal('upload-modal')">×</button>
    </div>
+    <div class="modal-body">
+      <div id="upload-form">
+        <div class="form-row">
+          <label class="form-label">Select GGUF file</label>
+          <input type="file" id="upload-file" class="form-input" accept=".gguf">
+        </div>
+        <div class="form-actions">
+          <button class="btn btn-primary" onclick="startUpload()">Upload</button>
+          <button class="btn btn-ghost" onclick="closeModal('upload-modal')">Close</button>
+        </div>
+      </div>
+      <div id="upload-progress" style="display:none;padding:.25rem 0">
+        <div id="upload-filename" style="font-size:13px;font-weight:500;margin-bottom:.5rem">Uploading…</div>
+        <div class="progress" style="margin-top:0;margin-bottom:.35rem;height:5px;background:var(--border-2)"><div id="upload-bar" class="progress-fill" style="width:0%;height:100%"></div></div>
+        <div style="display:flex;justify-content:space-between;font-size:11px;color:var(--text-2)">
+          <span id="upload-pct">0%</span>
+          <span id="upload-status">—</span>
+        </div>
+      </div>
+    </div>
+  </div>
 </div>

-<!-- Download Modal -->
-<div id="download-modal" class="modal">
-    <div class="modal-content">
-        <div class="modal-header">
-            <h2>Download Model</h2>
-            <button class="modal-close" onclick="hideDownloadModal()">&times;</button>
-        </div>
-        <div class="modal-body">
-            <form id="download-form">
-                <div class="form-group">
-                    <label for="model-id">Model ID or URL</label>
-                    <input type="text" id="model-id" class="form-control" 
-                           placeholder="e.g., TheBloke/Llama-2-7B-GGUF" required>
-                    <small class="form-text">HuggingFace model ID or direct URL</small>
-                </div>
-                
-                <div class="form-group">
-                    <label for="file-pattern">File Pattern (optional)</label>
-                    <input type="text" id="file-pattern" class="form-control" 
-                           placeholder=".gguf">
-                    <small class="form-text">Filter files to download (e.g., .gguf, .safetensors)</small>
-                </div>
-                
-                <div class="form-actions">
-                    <button type="submit" class="btn btn-primary">Download</button>
-                    <button type="button" class="btn btn-secondary" onclick="hideDownloadModal()">Cancel</button>
-                </div>
-            </form>
-            <div id="download-progress" class="download-progress" style="display: none;">
-                <p>Downloading...</p>
-                <div class="progress-bar">
-                    <div class="progress-fill" id="download-progress-bar"></div>
-                </div>
-            </div>
+<!-- Model configuration modal -->
+<div id="cfg-modal" class="modal">
+  <div class="modal-box" style="max-width:600px;max-height:92vh;overflow-y:auto">
+    <div class="modal-head" style="position:sticky;top:0;background:var(--card);z-index:1;border-bottom:1px solid var(--border)">
+      <span class="modal-title" id="cfg-modal-title">Configure model</span>
+      <button class="modal-close" onclick="closeModal('cfg-modal')">×</button>
+    </div>
+    <div class="modal-body">
+      <input type="hidden" id="cfg-path">
+      <input type="hidden" id="cfg-orig-type">
+
+      <!-- identity -->
+      <div class="form-row">
+        <label class="form-label">Model ID / path</label>
+        <div id="cfg-id-label" style="font-size:12px;font-family:monospace;color:var(--text-2);word-break:break-all;padding:.3rem 0"></div>
+      </div>
+      <div style="display:grid;grid-template-columns:1fr 1fr;gap:.75rem">
+        <div class="form-row" style="margin:0">
+          <label class="form-label">Type</label>
+          <select id="cfg-type" class="form-input">
+            <option value="text_models">Text (LLM)</option>
+            <option value="image_models">Image generation</option>
+            <option value="audio_models">Audio</option>
+            <option value="tts_models">TTS</option>
+            <option value="vision_models">Vision</option>
+          </select>
+        </div>
+        <div class="form-row" style="margin:0">
+          <label class="form-label">Alias <span class="muted">(optional)</span></label>
+          <input type="text" id="cfg-alias" class="form-input" placeholder="Friendly name">
+        </div>
+      </div>
+
+      <!-- backend -->
+      <div class="card-title" style="margin-top:1.25rem">Backend</div>
+      <div style="display:grid;grid-template-columns:1fr 1fr;gap:.75rem">
+        <div class="form-row" style="margin:0">
+          <label class="form-label">Compute backend</label>
+          <select id="cfg-backend" class="form-input">
+            <option value="auto">Auto-detect</option>
+            <option value="nvidia">NVIDIA (CUDA)</option>
+            <option value="vulkan">Vulkan</option>
+            <option value="opencl">OpenCL</option>
+            <option value="cpu">CPU only</option>
+          </select>
+        </div>
+        <div class="form-row" style="margin:0">
+          <label class="form-label">Load mode</label>
+          <select id="cfg-load-mode" class="form-input">
+            <option value="load">Load (pre-load in VRAM)</option>
+            <option value="on-request">On-request (load when needed)</option>
+          </select>
+        </div>
+      </div>
+      <div style="display:grid;grid-template-columns:1fr 1fr;gap:.75rem;margin-top:.75rem">
+        <div class="form-row" style="margin:0">
+          <label class="form-label">Used VRAM <span class="muted">(GB)</span></label>
+          <input type="number" id="cfg-used-vram" class="form-input" min="0" step="0.1" placeholder="auto-estimate">
+          <span class="form-hint" id="cfg-used-vram-hint" style="font-size:11px;color:var(--text-3)"></span>
+        </div>
+      </div>
+
+      <!-- inference -->
+      <div class="card-title" style="margin-top:1.25rem">Inference</div>
+      <div style="display:grid;grid-template-columns:1fr 1fr;gap:.75rem">
+        <div class="form-row" style="margin:0">
+          <label class="form-label">GPU layers <span class="muted">(-1 = all)</span></label>
+          <input type="number" id="cfg-gpu-layers" class="form-input" min="-1" value="-1">
+        </div>
+        <div class="form-row" style="margin:0">
+          <label class="form-label">Context size</label>
+          <input type="number" id="cfg-n-ctx" class="form-input" min="128" step="128" value="2048">
+        </div>
+        <div class="form-row" style="margin:0">
+          <label class="form-label">Max GPU % <span class="muted">(optional)</span></label>
+          <input type="number" id="cfg-max-gpu" class="form-input" min="1" max="100" placeholder="e.g. 90">
+        </div>
+        <div class="form-row" style="margin:0">
+          <label class="form-label">Manual RAM GB <span class="muted">(optional)</span></label>
+          <input type="number" id="cfg-ram-gb" class="form-input" min="0" step="0.5" placeholder="auto">
+        </div>
+      </div>
+      <div style="display:flex;gap:1.5rem;flex-wrap:wrap;margin-top:.75rem">
+        <label style="display:flex;align-items:center;gap:.5rem;cursor:pointer;font-size:13px"><input type="checkbox" id="cfg-4bit"> 4-bit quantization</label>
+        <label style="display:flex;align-items:center;gap:.5rem;cursor:pointer;font-size:13px"><input type="checkbox" id="cfg-8bit"> 8-bit quantization</label>
+        <label style="display:flex;align-items:center;gap:.5rem;cursor:pointer;font-size:13px"><input type="checkbox" id="cfg-flash"> Flash Attention 2</label>
+        <label style="display:flex;align-items:center;gap:.5rem;cursor:pointer;font-size:13px"><input type="checkbox" id="cfg-noram"> No RAM fallback</label>
+      </div>
+
+      <!-- offload -->
+      <div class="card-title" style="margin-top:1.25rem">Offload</div>
+      <div style="display:grid;grid-template-columns:1fr 1fr;gap:.75rem">
+        <div class="form-row" style="margin:0">
+          <label class="form-label">Strategy</label>
+          <select id="cfg-offload-strategy" class="form-input">
+            <option value="auto">Auto</option>
+            <option value="cpu">CPU RAM</option>
+            <option value="disk">Disk</option>
+            <option value="none">None</option>
+          </select>
        </div>
+        <div class="form-row" style="margin:0">
+          <label class="form-label">Offload directory</label>
+          <input type="text" id="cfg-offload-dir" class="form-input" placeholder="./offload">
+        </div>
+      </div>
+
+      <!-- generation -->
+      <div class="card-title" style="margin-top:1.25rem">Generation</div>
+      <div class="form-row">
+        <label class="form-label">System prompt <span class="muted">(optional)</span></label>
+        <textarea id="cfg-sysprompt" class="form-input" rows="3"
+          placeholder="You are a helpful assistant…"
+          style="resize:vertical;font-family:var(--font-mono,monospace);font-size:12px;line-height:1.5"></textarea>
+      </div>
+      <div style="display:grid;grid-template-columns:1fr 1fr;gap:.75rem">
+        <div class="form-row" style="margin:0">
+          <label class="form-label">Chat template parser</label>
+          <select id="cfg-parser" class="form-input">
+            <option value="auto">Auto-detect</option>
+            <option value="llama">Llama</option>
+            <option value="mistral">Mistral</option>
+            <option value="chatml">ChatML</option>
+            <option value="phi">Phi</option>
+            <option value="gemma">Gemma</option>
+            <option value="qwen">Qwen</option>
+          </select>
+        </div>
+      </div>
+      <div style="display:flex;gap:1.5rem;flex-wrap:wrap;margin-top:.75rem">
+        <label style="display:flex;align-items:center;gap:.5rem;cursor:pointer;font-size:13px"><input type="checkbox" id="cfg-tools"> Tools closer prompt</label>
+        <label style="display:flex;align-items:center;gap:.5rem;cursor:pointer;font-size:13px"><input type="checkbox" id="cfg-grammar"> Grammar-guided generation</label>
+      </div>
+
+      <div class="form-actions" style="margin-top:1.5rem">
+        <button class="btn btn-primary" onclick="saveModelConfig()">Save</button>
+        <button class="btn btn-ghost" onclick="closeModal('cfg-modal')">Cancel</button>
+      </div>
    </div>
+  </div>
+</div>
+
+<!-- Model info drawer -->
+<div id="info-overlay" onclick="closeInfo()"></div>
+<div id="info-drawer">
+  <div id="info-sticky">
+    <div id="info-title">—</div>
+    <button class="btn btn-ghost btn-sm" onclick="closeInfo()">✕</button>
+  </div>
+  <div id="info-body" style="padding:1.25rem 1.5rem"></div>
 </div>
 {% endblock %}

 {% block scripts %}
 <script>
-function showTab(tabName) {
-    document.querySelectorAll('.tab-content').forEach(tab => {
-        tab.classList.remove('active');
-    });
-    document.querySelectorAll('.tab-btn').forEach(btn => {
-        btn.classList.remove('active');
+/* ── helpers ─────────────────────────────────────────── */
+function esc(s){return String(s).replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;')}
+function fmtNum(n){if(!n)return'0';return n>=1e6?(n/1e6).toFixed(1)+'M':n>=1000?(n/1000).toFixed(1)+'k':String(n)}
+function fmtGB(gb){if(!gb)return'—';return gb>=1?gb.toFixed(1)+' GB':(gb*1024).toFixed(0)+' MB'}
+function fmtDate(s){try{return new Date(s).toLocaleDateString(undefined,{year:'numeric',month:'short',day:'numeric'})}catch{return s}}
+
+/* ── tab / modal ─────────────────────────────────────── */
+function switchTab(name,btn){
+  document.querySelectorAll('.tab-panel').forEach(p=>p.classList.remove('active'));
+  document.querySelectorAll('.tab').forEach(b=>b.classList.remove('active'));
+  document.getElementById('tab-'+name).classList.add('active');
+  btn.classList.add('active');
+}
+function openModal(id){document.getElementById(id).classList.add('show')}
+function closeModal(id){document.getElementById(id).classList.remove('show')}
+
+/* ── GGUF format toggle ──────────────────────────────── */
+let _ggufMode = 'gguf';
+document.querySelectorAll('.tog-btn').forEach(btn=>{
+  btn.addEventListener('click',()=>{
+    document.querySelectorAll('.tog-btn').forEach(b=>b.classList.remove('on'));
+    btn.classList.add('on');
+    _ggufMode = btn.dataset.val;
+  });
+});
+
+/* ── chip toggles ────────────────────────────────────── */
+document.querySelectorAll('.chip').forEach(c=>{
+  c.addEventListener('click',()=>c.classList.toggle('on'));
+});
+function getChips(id){return[...document.querySelectorAll('#'+id+' .chip.on')].map(c=>c.dataset.val)}
+
+/* ── search ──────────────────────────────────────────── */
+let _results   = [];
+let _filesCache = {};
+let _activeQuants = new Set();
+
+document.getElementById('search-q').addEventListener('keydown',e=>{if(e.key==='Enter')doSearch()});
+
+async function doSearch(){
+  const q   = document.getElementById('search-q').value.trim();
+  const out = document.getElementById('search-results');
+  const pipeline = document.getElementById('filter-pipeline').value;
+  const arch     = document.getElementById('filter-arch').value;
+  const sort     = document.getElementById('filter-sort').value;
+  const sizes    = getChips('size-chips').join(',');
+  _activeQuants  = new Set(getChips('quant-chips').map(v=>v.toUpperCase().split(' ')[0])); // strip ★
+
+  _filesCache = {};
+  _results    = [];
+  out.innerHTML = '<span class="muted small">Searching HuggingFace…</span>';
+
+  const params = new URLSearchParams({q:q||' ', gguf_mode:_ggufMode, sort});
+  if(pipeline) params.append('pipeline_tag', pipeline);
+  if(sizes)    params.append('sizes', sizes);
+  if(arch)     params.append('arch', arch);
+
+  try{
+    const r = await fetch('/admin/api/hf-search?'+params);
+    if(!r.ok){const e=await r.json();throw new Error(e.detail||r.statusText)}
+    _results = await r.json();
+    if(!_results.length){out.innerHTML='<span class="muted small">No results. Try different keywords or fewer filters.</span>';return}
+    out.innerHTML = _results.map((m,i)=>`
+      <div style="padding:.75rem 0;border-bottom:1px solid var(--border)">
+        <div style="display:flex;align-items:flex-start;justify-content:space-between;gap:.5rem">
+          <div style="min-width:0;flex:1">
+            <div style="font-weight:500;font-size:13px;overflow:hidden;text-overflow:ellipsis;white-space:nowrap"
+                 title="${esc(m.id)}">${esc(m.id)}</div>
+            <div style="font-size:11px;color:var(--text-3);margin-top:.25rem;display:flex;align-items:center;gap:.5rem;flex-wrap:wrap">
+              ${m.pipeline_tag?`<span class="badge badge-user">${esc(m.pipeline_tag)}</span>`:''}
+              <span>↓ ${fmtNum(m.downloads)}</span>
+              <span>♥ ${fmtNum(m.likes)}</span>
+            </div>
+          </div>
+          <div style="display:flex;gap:.3rem;flex-shrink:0">
+            <button class="btn btn-ghost btn-sm" onclick="openInfo(${i})">Info</button>
+            <button class="btn btn-ghost btn-sm" onclick="toggleFiles(${i})">▾ Files</button>
+            <button class="btn btn-secondary btn-sm" onclick="dlFromResult(${i})">Download</button>
+          </div>
+        </div>
+        <div id="fp-${i}" style="display:none;margin-top:.625rem;padding:.5rem .625rem;background:var(--raised);border-radius:6px">
+          <span class="muted small">Loading…</span>
+        </div>
+      </div>`).join('');
+  }catch(e){
+    out.innerHTML='<span class="muted small">Error: '+esc(e.message)+'</span>';
+  }
+}
+
+function dlFromResult(i){
+  const m = _results[i];
+  if(looksLikeGguf(m.id, '')){
+    const selQuants = [..._activeQuants];
+    const pattern = selQuants.length===1 ? selQuants[0]+'.gguf' : '.gguf';
+    openDownloadFor(m.id, pattern);
+  } else {
+    openDownloadFor(m.id, '');  // triggers snapshot download
+  }
+}
+
+/* ── files panel ─────────────────────────────────────── */
+async function toggleFiles(i){
+  const panel = document.getElementById('fp-'+i);
+  if(panel.style.display!=='none'){panel.style.display='none';return}
+  panel.style.display='block';
+  const modelId = _results[i].id;
+  if(_filesCache[modelId]){renderFiles(panel,modelId,_filesCache[modelId]);return}
+  panel.innerHTML='<span class="muted small">Fetching file list…</span>';
+  try{
+    const r = await fetch('/admin/api/hf-model-files?model_id='+encodeURIComponent(modelId));
+    if(!r.ok)throw new Error((await r.json()).detail||r.statusText);
+    const files = await r.json();
+    _filesCache[modelId]=files;
+    renderFiles(panel,modelId,files);
+  }catch(e){panel.innerHTML='<span class="muted small">Error: '+esc(e.message)+'</span>'}
+}
+
+function renderFiles(panel,modelId,files){
+  const shown = _activeQuants.size>0
+    ? files.filter(f=>{
+        const q=(f.quant||'').toUpperCase();
+        return _activeQuants.has(q)||[..._activeQuants].some(aq=>f.name.toUpperCase().includes(aq));
+      })
+    : files;
+  if(!shown.length){
+    panel.innerHTML=`<span class="muted small">${_activeQuants.size?'No matching quant files — deselect quant chips to see all.':'No GGUF files found.'}</span>`;
+    return;
+  }
+  panel.innerHTML=
+    '<table style="width:100%;border-collapse:collapse;font-size:12px">'+
+    '<thead><tr style="color:var(--text-3);font-size:10px;text-transform:uppercase;letter-spacing:.04em">'+
+    '<th style="text-align:left;padding:.2rem .25rem">File</th>'+
+    '<th style="text-align:center;padding:.2rem .25rem">Quant</th>'+
+    '<th style="text-align:right;padding:.2rem .25rem">Size</th>'+
+    '<th style="text-align:right;padding:.2rem .25rem">≈ VRAM</th>'+
+    '<th></th></tr></thead><tbody>'+
+    shown.map(f=>
+      '<tr style="border-top:1px solid var(--border)">'+
+      `<td style="padding:.3rem .25rem;font-family:monospace;max-width:280px;overflow:hidden;text-overflow:ellipsis;white-space:nowrap" title="${esc(f.name)}">${esc(f.name)}</td>`+
+      `<td style="text-align:center;padding:.3rem .25rem">${f.quant?'<span class="badge badge-admin">'+esc(f.quant)+'</span>':''}</td>`+
+      `<td style="text-align:right;padding:.3rem .25rem;white-space:nowrap;color:var(--text-2)">${fmtGB(f.size_gb)}</td>`+
+      `<td style="text-align:right;padding:.3rem .25rem;white-space:nowrap;color:var(--text-3)">${f.vram_gb?'~'+fmtGB(f.vram_gb):''}</td>`+
+      `<td style="padding:.3rem .25rem;text-align:right"><button class="btn btn-secondary btn-sm" onclick="openDownloadFor('${esc(modelId)}','${esc(f.name)}')">↓</button></td>`+
+      '</tr>'
+    ).join('')+
+    '</tbody></table>';
+}
+
+/* ── info drawer ─────────────────────────────────────── */
+function openInfo(i){
+  const m = _results[i];
+  document.getElementById('info-title').textContent = m.id;
+  document.getElementById('info-body').innerHTML = '<span class="muted small">Loading…</span>';
+  document.getElementById('info-overlay').style.display='block';
+  document.getElementById('info-drawer').classList.add('open');
+  document.body.style.overflow='hidden';
+  loadInfo(m.id);
+}
+function closeInfo(){
+  document.getElementById('info-overlay').style.display='none';
+  document.getElementById('info-drawer').classList.remove('open');
+  document.body.style.overflow='';
+}
+
+async function loadInfo(modelId){
+  try{
+    const r = await fetch('/admin/api/hf-model-info?model_id='+encodeURIComponent(modelId));
+    if(!r.ok)throw new Error((await r.json()).detail||r.statusText);
+    renderInfo(await r.json());
+  }catch(e){
+    document.getElementById('info-body').innerHTML='<span class="muted small">Error: '+esc(e.message)+'</span>';
+  }
+}
+
+function renderInfo(d){
+  const escId = esc(d.id);
+  let html=`
+    <div style="display:flex;gap:.4rem;flex-wrap:wrap;margin-bottom:1.25rem">
+      <a href="https://huggingface.co/${escId}" target="_blank" class="btn btn-ghost btn-sm">HuggingFace ↗</a>
+      <button class="btn btn-secondary btn-sm" onclick="openDownloadFor('${escId}','${d.gguf_files&&d.gguf_files.length?'.gguf':''}')">Download repo</button>
+      ${d.gated?'<span class="badge badge-admin" style="align-self:center">Gated</span>':''}
+      ${d.private?'<span class="badge badge-user" style="align-self:center">Private</span>':''}
+    </div>
+    <div style="display:grid;grid-template-columns:1fr 1fr;gap:.625rem 1.25rem;font-size:13px;margin-bottom:1.25rem">
+      ${d.pipeline_tag?`<div><div class="fl" style="margin-bottom:.2rem">Type</div>${esc(d.pipeline_tag)}</div>`:''}
+      ${d.params_label?`<div><div class="fl" style="margin-bottom:.2rem">Parameters</div>${esc(d.params_label)}</div>`:''}
+      <div><div class="fl" style="margin-bottom:.2rem">Downloads</div>↓ ${fmtNum(d.downloads)}</div>
+      <div><div class="fl" style="margin-bottom:.2rem">Likes</div>♥ ${fmtNum(d.likes)}</div>
+      ${d.license?`<div><div class="fl" style="margin-bottom:.2rem">License</div>${esc(d.license)}</div>`:''}
+      ${d.last_modified?`<div><div class="fl" style="margin-bottom:.2rem">Updated</div>${fmtDate(d.last_modified)}</div>`:''}
+      ${d.author?`<div><div class="fl" style="margin-bottom:.2rem">Author</div>${esc(d.author)}</div>`:''}
+      ${d.file_count?`<div><div class="fl" style="margin-bottom:.2rem">Files in repo</div>${d.file_count}</div>`:''}
+      ${d.base_model?`<div style="grid-column:span 2"><div class="fl" style="margin-bottom:.2rem">Base model</div><code style="font-size:11px">${esc(d.base_model)}</code></div>`:''}
+      ${d.language&&d.language.length?`<div style="grid-column:span 2"><div class="fl" style="margin-bottom:.2rem">Languages</div>${d.language.slice(0,8).map(esc).join(', ')}</div>`:''}
+    </div>`;
+
+  const relevantTags = (d.tags||[]).filter(t=>!['transformers','safetensors','gguf','pytorch','jax'].includes(t));
+  if(relevantTags.length){
+    html+=`<div style="margin-bottom:1.25rem">
+      <div class="fl" style="margin-bottom:.4rem">Tags</div>
+      ${relevantTags.slice(0,30).map(t=>`<span class="badge badge-user" style="margin:.1rem">${esc(t)}</span>`).join('')}
+    </div>`;
+  }
+
+  if(d.gguf_files&&d.gguf_files.length){
+    html+=`<div class="card-title">GGUF files (${d.gguf_files.length})</div>
+    <table style="width:100%;border-collapse:collapse;font-size:12px">
+      <thead><tr style="color:var(--text-3);font-size:10px;text-transform:uppercase;letter-spacing:.04em">
+        <th style="text-align:left;padding:.25rem .25rem">File</th>
+        <th style="text-align:center;padding:.25rem .25rem">Quant</th>
+        <th style="text-align:right;padding:.25rem .25rem">Size</th>
+        <th style="text-align:right;padding:.25rem .25rem">≈ VRAM</th>
+        <th></th>
+      </tr></thead><tbody>
+      ${d.gguf_files.map(f=>`
+        <tr style="border-top:1px solid var(--border)">
+          <td style="padding:.35rem .25rem;font-family:monospace;font-size:11px;max-width:220px;overflow:hidden;text-overflow:ellipsis;white-space:nowrap" title="${esc(f.name)}">${esc(f.name)}</td>
+          <td style="text-align:center;padding:.35rem .25rem">${f.quant?`<span class="badge badge-admin">${esc(f.quant)}</span>`:''}</td>
+          <td style="text-align:right;padding:.35rem .25rem;white-space:nowrap;color:var(--text-2)">${fmtGB(f.size_gb)}</td>
+          <td style="text-align:right;padding:.35rem .25rem;white-space:nowrap;color:var(--text-3)">${f.vram_gb?'~'+fmtGB(f.vram_gb):''}</td>
+          <td style="padding:.35rem .25rem;text-align:right"><button class="btn btn-secondary btn-sm" onclick="openDownloadFor('${escId}','${esc(f.name)}')">↓</button></td>
+        </tr>`).join('')}
+      </tbody>
+    </table>`;
+  } else {
+    html+=`<div class="muted small">No GGUF files in this repo.</div>`;
+  }
+
+  document.getElementById('info-title').textContent = d.id;
+  document.getElementById('info-body').innerHTML = html;
+}
+
+/* ── download modal ──────────────────────────────────── */
+function looksLikeGguf(modelId, filePattern){
+  if(filePattern && filePattern.toLowerCase().includes('.gguf')) return true;
+  if(_ggufMode==='gguf') return true;
+  if(modelId.toLowerCase().includes('gguf')) return true;
+  return false;
+}
+
+function openDownloadFor(modelId, filePattern){
+  document.getElementById('dl-id').value = modelId;
+  const isGguf = looksLikeGguf(modelId, filePattern);
+  if(isGguf){
+    document.getElementById('dl-pattern-row').style.display = 'block';
+    document.getElementById('dl-snapshot-note').style.display = 'none';
+    document.getElementById('dl-pattern').value = filePattern || '.gguf';
+  } else {
+    document.getElementById('dl-pattern-row').style.display = 'none';
+    document.getElementById('dl-snapshot-note').style.display = 'flex';
+    document.getElementById('dl-pattern').value = '';
+  }
+  openModal('dl-modal');
+}
+
+/* progress helpers */
+function fmtBytes(n){if(!n&&n!==0)return'—';if(n>=1e9)return(n/1e9).toFixed(2)+' GB';if(n>=1e6)return(n/1e6).toFixed(1)+' MB';if(n>=1e3)return(n/1e3).toFixed(0)+' KB';return n+' B'}
+function fmtRate(bps){return bps?(fmtBytes(bps)+'/s'):''}
+function fmtEta(s){if(s===null||s===undefined)return'';s=Math.round(s);if(s<60)return s+'s left';const m=Math.floor(s/60),ss=s%60;if(m<60)return m+':'+String(ss).padStart(2,'0')+' left';return Math.floor(m/60)+'h '+(m%60)+'m left'}
+
+let _dlEs = null;
+let _dlDone = false;
+
+function _dlReset(){
+  const bar = document.getElementById('dl-bar');
+  bar.style.transition = 'none';
+  bar.style.width = '0%';
+  document.getElementById('dl-filename').textContent='Preparing…';
+  document.getElementById('dl-bytes').textContent='—';
+  document.getElementById('dl-speed').textContent='—';
+  document.getElementById('dl-eta').textContent='—';
+  document.getElementById('dl-pct').textContent='0%';
+  document.getElementById('dl-log').style.display='none';
+  document.getElementById('dl-log').innerHTML='';
+  requestAnimationFrame(()=>{ bar.style.transition=''; });
+}
+
+function _dlLog(msg){
+  const el=document.getElementById('dl-log');
+  el.style.display='block';
+  const d=document.createElement('div');
+  d.textContent=msg;
+  el.appendChild(d);
+  el.scrollTop=el.scrollHeight;
+}
+
+function showDownloadError(msg){
+  if(_dlEs){_dlEs.close();_dlEs=null}
+  document.getElementById('dl-form').style.display='block';
+  document.getElementById('dl-progress').style.display='none';
+  alert('Download error: '+msg);
+}
+
+function handleProgressEvent(evt){
+  if(evt.type==='start'){
+    document.getElementById('dl-filename').textContent=evt.filename||'Downloading…';
+    document.getElementById('dl-bytes').textContent='0 / '+fmtBytes(evt.total);
+    document.getElementById('dl-bar').style.width='0%';
+  }else if(evt.type==='progress'){
+    const pct=evt.percent||0;
+    document.getElementById('dl-bar').style.width=pct+'%';
+    document.getElementById('dl-pct').textContent=pct.toFixed(1)+'%';
+    document.getElementById('dl-bytes').textContent=fmtBytes(evt.downloaded)+' / '+fmtBytes(evt.total);
+    document.getElementById('dl-speed').textContent=fmtRate(evt.rate);
+    document.getElementById('dl-eta').textContent=fmtEta(evt.eta);
+    if(evt.filename) document.getElementById('dl-filename').textContent=evt.filename;
+  }else if(evt.type==='info'){
+    _dlLog(evt.message);
+  }else if(evt.type==='done'){
+    _dlDone=true;
+    if(_dlEs){_dlEs.close();_dlEs=null}
+    document.getElementById('dl-bar').style.width='100%';
+    document.getElementById('dl-pct').textContent='100%';
+    document.getElementById('dl-filename').textContent='Download complete!';
+    refreshLocal();
+    setTimeout(()=>{
+      closeModal('dl-modal');
+      document.getElementById('dl-form').style.display='block';
+      document.getElementById('dl-progress').style.display='none';
+      _dlReset();
+    },1800);
+  }else if(evt.type==='error'){
+    _dlDone=true;
+    showDownloadError(evt.message);
+  }
+  // keepalive: ignore
+}
+
+async function startDownload(){
+  const id=document.getElementById('dl-id').value.trim();
+  if(!id){document.getElementById('dl-id').focus();return}
+  _dlDone=false;
+  _dlReset();
+  document.getElementById('dl-form').style.display='none';
+  document.getElementById('dl-progress').style.display='block';
+  try{
+    const r=await fetch('/admin/api/model-download',{
+      method:'POST',headers:{'Content-Type':'application/json'},
+      body:JSON.stringify({model_id:id,file_pattern:document.getElementById('dl-pattern').value||null})
    });
-    
-    document.getElementById('tab-' + tabName).classList.add('active');
-    event.target.classList.add('active');
+    if(!r.ok){const e=await r.json();showDownloadError(e.detail||'Request failed');return}
+    const {session_id}=await r.json();
+    _dlEs=new EventSource('/admin/api/download-stream/'+session_id);
+    _dlEs.onmessage=function(e){
+      try{handleProgressEvent(JSON.parse(e.data))}catch{}
+    };
+    _dlEs.onerror=function(){
+      if(_dlDone) return;
+      if(_dlEs&&_dlEs.readyState===EventSource.CLOSED) return;
+      showDownloadError('Connection to download stream lost');
+    };
+  }catch(e){showDownloadError(e.message)}
 }

-function showDownloadModal() {
-    document.getElementById('download-modal').style.display = 'flex';
+/* ── active downloads strip ──────────────────────────── */
+let _pollTimer = null;
+
+async function pollDownloads(){
+  try{
+    const r = await fetch('/admin/api/downloads');
+    if(!r.ok) return;
+    const all = await r.json();
+    const active = all.filter(d=>d.status!=='done'&&d.status!=='error');
+    const strip = document.getElementById('dl-strip');
+    const list  = document.getElementById('dl-strip-list');
+    if(!active.length){ strip.style.display='none'; return; }
+    strip.style.display='block';
+    list.innerHTML = active.map(d=>{
+      const pct = d.percent||0;
+      const name = d.filename||d.model_id||'';
+      const spd  = d.rate?fmtRate(d.rate):'';
+      const eta  = d.eta!=null?fmtEta(d.eta):'';
+      return `<div style="display:flex;align-items:center;gap:.75rem;padding:.2rem 0">
+        <div style="flex:1;min-width:0">
+          <div style="font-size:12px;font-weight:500;overflow:hidden;text-overflow:ellipsis;white-space:nowrap">${esc(d.model_id)}</div>
+          <div style="font-size:11px;color:var(--text-2);overflow:hidden;text-overflow:ellipsis;white-space:nowrap">${esc(name)}</div>
+          <div class="progress" style="margin-top:.3rem;margin-bottom:0;height:3px"><div class="progress-fill" style="width:${pct}%"></div></div>
+        </div>
+        <div style="font-size:11px;color:var(--text-2);text-align:right;white-space:nowrap;flex-shrink:0">
+          <div>${pct.toFixed(1)}%</div>
+          ${spd?`<div>${esc(spd)}</div>`:''}
+          ${eta?`<div class="muted">${esc(eta)}</div>`:''}
+        </div>
+      </div>`;
+    }).join('<div style="border-top:1px solid var(--border);margin:.3rem 0"></div>');
+  }catch{}
 }

-function hideDownloadModal() {
-    document.getElementById('download-modal').style.display = 'none';
+function startPolling(){
+  if(_pollTimer) return;
+  _pollTimer = setInterval(pollDownloads, 2000);
+  pollDownloads();
 }

-async function searchModels() {
-    const query = document.getElementById('search-query').value;
-    const resultsDiv = document.getElementById('search-results');
-    
-    if (!query) {
-        resultsDiv.innerHTML = '<p class="text-muted">Enter a search query</p>';
-        return;
+startPolling();
+
+/* ── cache stats & local models ──────────────────────── */
+async function loadCacheStats(){
+  try{
+    const r = await fetch('/admin/api/cache-stats');
+    if(!r.ok) return;
+    const s = await r.json();
+    document.getElementById('stat-hf-size').textContent  = fmtBytes(s.hf_bytes);
+    document.getElementById('stat-hf-count').textContent = s.hf_models+' model'+(s.hf_models!==1?'s':'');
+    document.getElementById('stat-hf-free').textContent = s.hf_disk_free_bytes!=null ? fmtBytes(s.hf_disk_free_bytes)+' free' : '—';
+    document.getElementById('stat-gguf-size').textContent = fmtBytes(s.gguf_bytes);
+    document.getElementById('stat-gguf-count').textContent = s.gguf_files+' file'+(s.gguf_files!==1?'s':'');
+    document.getElementById('stat-gguf-free').textContent = s.gguf_disk_free_bytes!=null ? fmtBytes(s.gguf_disk_free_bytes)+' free' : '—';
+    document.getElementById('stat-total-size').textContent = fmtBytes((s.hf_bytes||0)+(s.gguf_bytes||0));
+  }catch{}
+}
+
+let _localModels = [];
+
+async function loadCachedModels(){
+  _localModels = [];
+  const hfEl   = document.getElementById('hf-models-list');
+  const ggufEl = document.getElementById('gguf-models-list');
+  hfEl.innerHTML = ggufEl.innerHTML = '<span class="muted small">Loading…</span>';
+  try{
+    const r = await fetch('/admin/api/cached-models');
+    if(!r.ok) throw new Error((await r.json()).detail||r.statusText);
+    const d = await r.json();
+
+    // HF models
+    const hf = d.hf||[];
+    document.getElementById('hf-model-badge').textContent = hf.length ? `(${hf.length})` : '';
+    if(!hf.length){
+      hfEl.innerHTML = '<span class="muted small">No HuggingFace models cached.</span>';
+    }else{
+      const rows = hf.map(m=>{
+        const idx = _localModels.length;
+        _localModels.push({label:m.id, path:m.id, cacheType:'hf', size_gb:m.size_gb||0,
+          defaultType:m.model_type||'text_models', settings:m.settings||{}, in_config:m.in_config});
+        const loaded = _loadedKeys.has(m.id) || [..._loadedKeys].some(k=>k.endsWith(':'+m.id)||k===m.id);
+        return `<tr style="border-top:1px solid var(--border)">
+          <td style="padding:.4rem .25rem;font-family:monospace;font-size:12px;max-width:260px;overflow:hidden;text-overflow:ellipsis;white-space:nowrap" title="${esc(m.id)}">${esc(m.id)}</td>
+          <td style="text-align:right;padding:.4rem .25rem;white-space:nowrap;color:var(--text-2)">${fmtGB(m.size_gb)}</td>
+          <td style="text-align:right;padding:.4rem .25rem;color:var(--text-2)">${m.file_count}</td>
+          <td style="text-align:center;padding:.4rem .25rem">${m.in_config?'<span class="badge badge-ok">enabled</span>':'<span class="muted small">—</span>'}</td>
+          <td style="padding:.4rem .25rem;text-align:right;white-space:nowrap">
+            ${m.in_config?(loaded
+              ?`<button class="btn btn-ghost btn-sm" onclick="unloadModel(${idx})">Unload</button>`
+              :`<button class="btn btn-primary btn-sm" onclick="loadModel(${idx})">Load now</button>`):''}
+            <button class="btn btn-secondary btn-sm" onclick="openCfgModal(${idx})">${m.in_config?'Configure':'Add to CoderAI'}</button>
+            ${m.in_config?`<button class="btn btn-ghost btn-sm" onclick="disableModel(${idx})">Remove</button>`:''}
+            <button class="btn btn-danger btn-sm" onclick="deleteModelConfirm(${idx})">Delete</button>
+          </td>
+        </tr>`;
+      });
+      hfEl.innerHTML = '<table style="width:100%;border-collapse:collapse;font-size:13px">'+
+        '<thead><tr style="color:var(--text-2);font-size:10px;text-transform:uppercase;letter-spacing:.05em">'+
+        '<th style="text-align:left;padding:.3rem .25rem;font-weight:700">Model</th>'+
+        '<th style="text-align:right;padding:.3rem .25rem;font-weight:700">Size</th>'+
+        '<th style="text-align:right;padding:.3rem .25rem;font-weight:700">Files</th>'+
+        '<th style="text-align:center;padding:.3rem .25rem;font-weight:700">Config</th>'+
+        '<th></th></tr></thead><tbody>'+rows.join('')+'</tbody></table>';
    }
-    
-    resultsDiv.innerHTML = '<p>Searching...</p>';
-    
-    // TODO: Implement actual HuggingFace API search
-    setTimeout(() => {
-        resultsDiv.innerHTML = '<p class="text-muted">Search functionality coming soon</p>';
-    }, 1000);
-}
-
-document.getElementById('download-form').addEventListener('submit', async (e) => {
-    e.preventDefault();
-    
-    const modelId = document.getElementById('model-id').value;
-    const filePattern = document.getElementById('file-pattern').value;
-    
-    document.getElementById('download-progress').style.display = 'block';
-    
-    // TODO: Implement actual download
-    setTimeout(() => {
-        alert('Download functionality coming soon');
-        hideDownloadModal();
-        document.getElementById('download-progress').style.display = 'none';
-    }, 1000);
-});

-// Load models on page load
-async function loadModels() {
-    // TODO: Implement loading models from API
+    // GGUF files
+    const gguf = d.gguf||[];
+    document.getElementById('gguf-file-badge').textContent = gguf.length ? `(${gguf.length})` : '';
+    if(!gguf.length){
+      ggufEl.innerHTML = '<span class="muted small">No GGUF files cached.</span>';
+    }else{
+      const rows = gguf.map(f=>{
+        const idx = _localModels.length;
+        _localModels.push({label:f.filename, path:f.path, cacheType:'gguf', size_gb:f.size_gb||0,
+          defaultType:f.model_type||'text_models', settings:f.settings||{}, in_config:f.in_config});
+        const loaded = _loadedKeys.has(f.path) || _loadedKeys.has(f.filename) || [..._loadedKeys].some(k=>k.endsWith(':'+f.path)||k.endsWith(':'+f.filename));
+        return `<tr style="border-top:1px solid var(--border)">
+          <td style="padding:.4rem .25rem;font-family:monospace;font-size:11px;max-width:320px;overflow:hidden;text-overflow:ellipsis;white-space:nowrap" title="${esc(f.filename)}">${esc(f.filename)}</td>
+          <td style="text-align:right;padding:.4rem .25rem;white-space:nowrap;color:var(--text-2)">${fmtGB(f.size_gb)}</td>
+          <td style="text-align:center;padding:.4rem .25rem">${f.in_config?'<span class="badge badge-ok">enabled</span>':'<span class="muted small">—</span>'}</td>
+          <td style="padding:.4rem .25rem;text-align:right;white-space:nowrap">
+            ${f.in_config?(loaded
+              ?`<button class="btn btn-ghost btn-sm" onclick="unloadModel(${idx})">Unload</button>`
+              :`<button class="btn btn-primary btn-sm" onclick="loadModel(${idx})">Load now</button>`):''}
+            <button class="btn btn-secondary btn-sm" onclick="openCfgModal(${idx})">${f.in_config?'Configure':'Add to CoderAI'}</button>
+            ${f.in_config?`<button class="btn btn-ghost btn-sm" onclick="disableModel(${idx})">Remove</button>`:''}
+            <button class="btn btn-danger btn-sm" onclick="deleteModelConfirm(${idx})">Delete</button>
+          </td>
+        </tr>`;
+      });
+      ggufEl.innerHTML = '<table style="width:100%;border-collapse:collapse;font-size:13px">'+
+        '<thead><tr style="color:var(--text-2);font-size:10px;text-transform:uppercase;letter-spacing:.05em">'+
+        '<th style="text-align:left;padding:.3rem .25rem;font-weight:700">File</th>'+
+        '<th style="text-align:right;padding:.3rem .25rem;font-weight:700">Size</th>'+
+        '<th style="text-align:center;padding:.3rem .25rem;font-weight:700">Config</th>'+
+        '<th></th></tr></thead><tbody>'+rows.join('')+'</tbody></table>';
+    }
+  }catch(e){
+    hfEl.innerHTML = ggufEl.innerHTML = `<span class="muted small">Error: ${esc(e.message)}</span>`;
+  }
+}
+
+let _loadedKeys = new Set();
+
+async function refreshLoadedStatus(){
+  try{
+    const r = await fetch('/admin/api/model-loaded-status');
+    if(r.ok){ const d = await r.json(); _loadedKeys = new Set(d.loaded||[]); }
+  }catch{}
+}
+
+async function refreshLocal(){
+  await refreshLoadedStatus();
+  loadCacheStats();
+  loadCachedModels();
+}
+
+refreshLocal();
+
+async function clearCacheConfirm(type){
+  const labels = {hf:'HuggingFace', gguf:'GGUF', all:'ALL'};
+  if(!confirm(`Delete ${labels[type]} model cache? This cannot be undone.`)) return;
+  try{
+    const r = await fetch('/admin/api/cache?cache_type='+type, {method:'DELETE'});
+    const d = await r.json();
+    if(d.success){
+      refreshLocal();
+      alert(`Cache cleared. Freed ${fmtBytes(d.freed_bytes||0)}.`);
+    }else alert('Error clearing cache');
+  }catch(e){alert('Error: '+e.message)}
+}
+
+async function deleteModelConfirm(idx){
+  const m = _localModels[idx];
+  if(!confirm(`Delete "${m.label}" from local cache? This cannot be undone.`)) return;
+  const idForUrl = m.cacheType === 'hf' ? m.path : m.label;
+  try{
+    const r = await fetch('/admin/api/cached-models/'+encodeURIComponent(idForUrl)+'?cache_type='+m.cacheType, {method:'DELETE'});
+    const d = await r.json();
+    if(d.success) refreshLocal();
+    else alert('Error: '+(d.detail||'Unknown'));
+  }catch(e){alert('Error: '+e.message)}
+}
+
+function openCfgModal(idx){
+  const m = _localModels[idx];
+  const s = m.settings || {};
+  document.getElementById('cfg-modal-title').textContent = m.in_config ? 'Configure model' : 'Add to CoderAI';
+  document.getElementById('cfg-id-label').textContent = m.label;
+  document.getElementById('cfg-path').value = m.path;
+  document.getElementById('cfg-orig-type').value = m.defaultType;
+  // Map legacy gguf_models to text_models
+  const rawType = s.model_type || m.defaultType;
+  document.getElementById('cfg-type').value = rawType === 'gguf_models' ? 'text_models' : rawType;
+  document.getElementById('cfg-alias').value = s.alias || '';
+  document.getElementById('cfg-backend').value = s.backend || 'auto';
+  document.getElementById('cfg-load-mode').value = s.load_mode || 'on-request';
+  // Used VRAM
+  const usedVram = s.used_vram_gb != null ? s.used_vram_gb : null;
+  document.getElementById('cfg-used-vram').value = usedVram != null ? usedVram : '';
+  // Show estimate hint from file size (GGUF: ~1.1x file size; HF: from size_gb)
+  const estVram = _estimateVram(m);
+  document.getElementById('cfg-used-vram-hint').textContent = estVram ? `Estimated: ~${estVram.toFixed(1)} GB` : '';
+  document.getElementById('cfg-gpu-layers').value = s.n_gpu_layers !== undefined ? s.n_gpu_layers : -1;
+  document.getElementById('cfg-n-ctx').value = s.n_ctx || 2048;
+  document.getElementById('cfg-max-gpu').value = s.max_gpu_percent != null ? s.max_gpu_percent : '';
+  document.getElementById('cfg-ram-gb').value = s.manual_ram_gb != null ? s.manual_ram_gb : '';
+  document.getElementById('cfg-4bit').checked = !!s.load_in_4bit;
+  document.getElementById('cfg-8bit').checked = !!s.load_in_8bit;
+  document.getElementById('cfg-flash').checked = !!s.flash_attention;
+  document.getElementById('cfg-noram').checked = !!s.no_ram;
+  document.getElementById('cfg-offload-strategy').value = s.offload_strategy || 'auto';
+  document.getElementById('cfg-offload-dir').value = s.offload_dir || './offload';
+  document.getElementById('cfg-sysprompt').value = s.system_prompt || '';
+  document.getElementById('cfg-parser').value = s.parser || 'auto';
+  document.getElementById('cfg-tools').checked = !!s.tools_closer_prompt;
+  document.getElementById('cfg-grammar').checked = !!s.grammar_guided;
+  openModal('cfg-modal');
+}
+
+function _estimateVram(m) {
+  // Estimate VRAM from file size: GGUF ~1.1x, HF safetensors ~1.2x
+  if (m.size_gb) return m.size_gb * (m.cacheType === 'gguf' ? 1.1 : 1.2);
+  return null;
+}
+
+async function saveModelConfig(){
+  const path = document.getElementById('cfg-path').value;
+  const maxGpu = parseFloat(document.getElementById('cfg-max-gpu').value);
+  const ramGb  = parseFloat(document.getElementById('cfg-ram-gb').value);
+  const usedVram = parseFloat(document.getElementById('cfg-used-vram').value);
+  const data = {
+    path,
+    model_type:        document.getElementById('cfg-type').value,
+    alias:             document.getElementById('cfg-alias').value.trim() || null,
+    backend:           document.getElementById('cfg-backend').value,
+    load_mode:         document.getElementById('cfg-load-mode').value,
+    used_vram_gb:      isNaN(usedVram) ? null : usedVram,
+    n_gpu_layers:      parseInt(document.getElementById('cfg-gpu-layers').value) || -1,
+    n_ctx:             parseInt(document.getElementById('cfg-n-ctx').value) || 2048,
+    max_gpu_percent:   isNaN(maxGpu) ? null : maxGpu,
+    manual_ram_gb:     isNaN(ramGb) ? null : ramGb,
+    load_in_4bit:      document.getElementById('cfg-4bit').checked,
+    load_in_8bit:      document.getElementById('cfg-8bit').checked,
+    flash_attention:   document.getElementById('cfg-flash').checked,
+    no_ram:            document.getElementById('cfg-noram').checked,
+    offload_strategy:  document.getElementById('cfg-offload-strategy').value,
+    offload_dir:       document.getElementById('cfg-offload-dir').value.trim() || './offload',
+    system_prompt:     document.getElementById('cfg-sysprompt').value.trim() || null,
+    parser:            document.getElementById('cfg-parser').value,
+    tools_closer_prompt: document.getElementById('cfg-tools').checked,
+    grammar_guided:    document.getElementById('cfg-grammar').checked,
+  };
+  try{
+    const r = await fetch('/admin/api/model-configure',{
+      method:'POST', headers:{'Content-Type':'application/json'},
+      body: JSON.stringify(data)
+    });
+    const d = await r.json();
+    if(d.success){ closeModal('cfg-modal'); loadCachedModels(); }
+    else alert('Error: '+(d.detail||'Unknown'));
+  }catch(e){ alert('Error: '+e.message); }
 }

-loadModels();
+async function loadModel(idx){
+  const m = _localModels[idx];
+  // Find the button and show loading state
+  const btn = document.querySelector(`button[onclick="loadModel(${idx})"]`);
+  if(btn){ btn.disabled = true; btn.textContent = 'Loading…'; }
+  try{
+    const r = await fetch('/admin/api/model-load',{
+      method:'POST', headers:{'Content-Type':'application/json'},
+      body: JSON.stringify({path: m.path})
+    });
+    const d = await r.json();
+    if(d.success) refreshLocal();
+    else{ if(btn){ btn.disabled=false; btn.textContent='Load now'; } alert('Error: '+(d.detail||'Unknown')); }
+  }catch(e){ if(btn){ btn.disabled=false; btn.textContent='Load now'; } alert('Error: '+e.message); }
+}
+
+async function unloadModel(idx){
+  const m = _localModels[idx];
+  try{
+    const r = await fetch('/admin/api/model-unload',{
+      method:'POST', headers:{'Content-Type':'application/json'},
+      body: JSON.stringify({path: m.path})
+    });
+    const d = await r.json();
+    if(d.success) refreshLocal();
+    else alert('Error: '+(d.detail||'Unknown'));
+  }catch(e){ alert('Error: '+e.message); }
+}
+
+async function disableModel(idx){
+  const m = _localModels[idx];
+  if(!confirm('Remove this model from CoderAI config? It will stay in the local cache.')) return;
+  try{
+    const r = await fetch('/admin/api/model-disable',{
+      method:'POST', headers:{'Content-Type':'application/json'},
+      body: JSON.stringify({path: m.path})
+    });
+    const d = await r.json();
+    if(d.success) loadCachedModels();
+    else alert('Error: '+(d.detail||'Unknown'));
+  }catch(e){ alert('Error: '+e.message); }
+}
+
+async function startUpload(){
+  const fileInput = document.getElementById('upload-file');
+  const file = fileInput.files[0];
+  if(!file){ alert('Please select a file'); return; }
+  if(!file.name.endsWith('.gguf')){ alert('Only .gguf files are supported'); return; }
+
+  document.getElementById('upload-form').style.display='none';
+  document.getElementById('upload-progress').style.display='block';
+  document.getElementById('upload-filename').textContent = file.name;
+
+  const chunkSize = 512 * 1024; // 512KB
+  const totalChunks = Math.ceil(file.size / chunkSize);
+  const uploadId = Date.now() + '_' + file.name;
+
+  for(let i=0; i<totalChunks; i++){
+    const start = i * chunkSize;
+    const end = Math.min(start + chunkSize, file.size);
+    const chunk = file.slice(start, end);
+
+    const formData = new FormData();
+    formData.append('chunk', chunk);
+    formData.append('filename', file.name);
+    formData.append('chunk_index', i);
+    formData.append('total_chunks', totalChunks);
+    formData.append('upload_id', uploadId);
+
+    try{
+      const r = await fetch('/admin/api/model-upload',{method:'POST', body:formData});
+      const d = await r.json();
+      if(!d.success){ alert('Upload failed: '+(d.detail||'Unknown')); return; }
+
+      const pct = Math.round((i+1)/totalChunks*100);
+      document.getElementById('upload-bar').style.width = pct+'%';
+      document.getElementById('upload-pct').textContent = pct+'%';
+      document.getElementById('upload-status').textContent = `Chunk ${i+1}/${totalChunks}`;
+
+      if(d.complete){
+        document.getElementById('upload-status').textContent = 'Complete!';
+        setTimeout(()=>{
+          closeModal('upload-modal');
+          document.getElementById('upload-form').style.display='block';
+          document.getElementById('upload-progress').style.display='none';
+          fileInput.value='';
+          refreshLocal();
+        }, 1500);
+        return;
+      }
+    }catch(e){ alert('Upload error: '+e.message); return; }
+  }
+}
 </script>
 {% endblock %}
--- a/codai/admin/templates/settings.html
+++ b/codai/admin/templates/settings.html
+{% extends "base.html" %}
+{% block title %}Settings — CoderAI{% endblock %}
+
+{% block content %}
+<div class="page-header">
+  <div>
+    <h1>Settings</h1>
+    <p>Server configuration — restart CoderAI to apply changes</p>
+  </div>
+  <div class="header-actions">
+    <span id="save-status" class="muted small" style="margin-right:.5rem"></span>
+    <button class="btn btn-primary" onclick="saveSettings()">Save changes</button>
+  </div>
+</div>
+
+<div id="settings-alert" style="display:none"></div>
+
+<!-- Server -->
+<div class="card mb-0">
+  <div class="card-title">Server</div>
+  <div style="display:grid;grid-template-columns:1fr 160px;gap:1rem;align-items:start">
+    <div class="form-row" style="margin:0">
+      <label class="form-label">Listen host</label>
+      <input type="text" id="s-host" class="form-input" placeholder="0.0.0.0">
+      <span class="form-hint">IP address or hostname to bind to (0.0.0.0 = all interfaces)</span>
+    </div>
+    <div class="form-row" style="margin:0">
+      <label class="form-label">Port</label>
+      <input type="number" id="s-port" class="form-input" placeholder="8000" min="1" max="65535">
+    </div>
+  </div>
+  <div class="form-row" style="margin-top:1rem;margin-bottom:.25rem">
+    <label style="display:flex;align-items:center;gap:.5rem;cursor:pointer">
+      <input type="checkbox" id="s-https" onchange="toggleHttps()">
+      <span style="font-size:13px;font-weight:500">Enable HTTPS</span>
+    </label>
+  </div>
+  <div id="https-fields" style="display:none;margin-top:.75rem">
+    <div class="form-row">
+      <label class="form-label">SSL key path <span class="muted">(leave blank to auto-generate)</span></label>
+      <input type="text" id="s-key" class="form-input" placeholder="/path/to/key.pem">
+    </div>
+    <div class="form-row" style="margin:0">
+      <label class="form-label">SSL certificate path</label>
+      <input type="text" id="s-cert" class="form-input" placeholder="/path/to/cert.pem">
+    </div>
+  </div>
+</div>
+
+<!-- Storage -->
+<div class="card mb-0" style="margin-top:1rem">
+  <div class="card-title">Storage</div>
+  <div class="form-row">
+    <label class="form-label">HuggingFace cache directory <span class="muted">(leave blank for default ~/.cache/huggingface)</span></label>
+    <input type="text" id="s-hf-cache" class="form-input" placeholder="e.g. /data/models/huggingface">
+  </div>
+  <div class="form-row" style="margin:0">
+    <label class="form-label">GGUF cache directory <span class="muted">(leave blank for default ~/.cache/coderai/models)</span></label>
+    <input type="text" id="s-gguf-cache" class="form-input" placeholder="e.g. /data/models/gguf">
+  </div>
+</div>
+{% endblock %}
+
+{% block scripts %}
+<script>
+function toggleHttps(){
+  document.getElementById('https-fields').style.display =
+    document.getElementById('s-https').checked ? 'block' : 'none';
+}
+
+function showAlert(type, msg){
+  const el = document.getElementById('settings-alert');
+  el.className = 'alert alert-' + (type === 'error' ? 'error' : 'info');
+  el.textContent = msg;
+  el.style.display = 'flex';
+  if(type !== 'error') setTimeout(()=>{ el.style.display='none'; }, 4000);
+}
+
+async function loadSettings(){
+  try{
+    const d = await fetch('/admin/api/settings').then(r=>r.json());
+    document.getElementById('s-host').value  = d.server?.host ?? '0.0.0.0';
+    document.getElementById('s-port').value  = d.server?.port ?? 8000;
+    document.getElementById('s-https').checked = !!d.server?.https;
+    document.getElementById('s-key').value   = d.server?.https_key_path ?? '';
+    document.getElementById('s-cert').value  = d.server?.https_cert_path ?? '';
+    document.getElementById('s-hf-cache').value   = d.models?.hf_cache_dir ?? '';
+    document.getElementById('s-gguf-cache').value = d.models?.gguf_cache_dir ?? '';
+    toggleHttps();
+  }catch(e){ showAlert('error','Failed to load settings: '+e.message); }
+}
+
+async function saveSettings(){
+  const strOrNull = id => document.getElementById(id).value.trim() || null;
+  const data = {
+    server:{
+      host: document.getElementById('s-host').value.trim() || '0.0.0.0',
+      port: parseInt(document.getElementById('s-port').value) || 8000,
+      https: document.getElementById('s-https').checked,
+      https_key_path:  strOrNull('s-key'),
+      https_cert_path: strOrNull('s-cert'),
+    },
+    models:{
+      hf_cache_dir:   strOrNull('s-hf-cache'),
+      gguf_cache_dir: strOrNull('s-gguf-cache'),
+    }
+  };
+  try{
+    const r = await fetch('/admin/api/settings',{
+      method:'POST', headers:{'Content-Type':'application/json'},
+      body: JSON.stringify(data)
+    });
+    if(r.ok) showAlert('info','Settings saved. Restart CoderAI to apply.');
+    else{ const e=await r.json(); showAlert('error', e.detail||'Save failed'); }
+  }catch(e){ showAlert('error','Error: '+e.message); }
+}
+
+loadSettings();
+</script>
+{% endblock %}
--- a/codai/admin/templates/tokens.html
+++ b/codai/admin/templates/tokens.html
 {% extends "base.html" %}
-
-{% block title %}API Tokens - CoderAI{% endblock %}
+{% block title %}Tokens — CoderAI{% endblock %}

 {% block content %}
 <div class="page-header">
+  <div>
    <h1>API Tokens</h1>
-    <div class="header-actions">
-        <button class="btn btn-primary" onclick="showCreateTokenModal()">Create Token</button>
-    </div>
+    <p>Access tokens for API clients</p>
+  </div>
+  <div class="header-actions">
+    <button class="btn btn-primary" onclick="openModal('create-modal')">New token</button>
+  </div>
 </div>

-<div class="card">
-    <div class="table-responsive">
-        <table class="table">
-            <thead>
-                <tr>
-                    <th>Name</th>
-                    <th>Token</th>
-                    <th>Provider</th>
-                    <th>Created</th>
-                    <th>Last Used</th>
-                    <th>Actions</th>
-                </tr>
-            </thead>
-            <tbody id="tokens-table">
-                <tr>
-                    <td colspan="6" class="text-center text-muted">No tokens created</td>
-                </tr>
-            </tbody>
-        </table>
-    </div>
+<div class="table-wrap">
+  <table>
+    <thead>
+      <tr><th>Name</th><th>Token</th><th>Format</th><th>Created</th><th>Last used</th><th></th></tr>
+    </thead>
+    <tbody id="tokens-body">
+      <tr class="empty-row"><td colspan="6">No tokens — create one to get started</td></tr>
+    </tbody>
+  </table>
 </div>

-<!-- Create Token Modal -->
-<div id="create-token-modal" class="modal">
-    <div class="modal-content">
-        <div class="modal-header">
-            <h2>Create API Token</h2>
-            <button class="modal-close" onclick="hideCreateTokenModal()">&times;</button>
-        </div>
-        <div class="modal-body">
-            <form id="create-token-form">
-                <div class="form-group">
-                    <label for="token-name">Token Name</label>
-                    <input type="text" id="token-name" class="form-control" 
-                           placeholder="e.g., Production API" required>
-                    <small class="form-text">A descriptive name for this token</small>
-                </div>
-                
-                <div class="form-group">
-                    <label for="token-provider">Provider Format</label>
-                    <select id="token-provider" class="form-control">
-                        <option value="openai">OpenAI (sk-...)</option>
-                        <option value="anthropic">Anthropic (sk-ant-...)</option>
-                        <option value="custom">Custom</option>
-                    </select>
-                </div>
-                
-                <div class="form-actions">
-                    <button type="submit" class="btn btn-primary">Create Token</button>
-                    <button type="button" class="btn btn-secondary" onclick="hideCreateTokenModal()">Cancel</button>
-                </div>
-            </form>
-        </div>
+<div id="create-modal" class="modal">
+  <div class="modal-box">
+    <div class="modal-head">
+      <span class="modal-title">New API token</span>
+      <button class="modal-close" onclick="closeModal('create-modal')">×</button>
+    </div>
+    <div class="modal-body">
+      <div class="form-row">
+        <label class="form-label">Name</label>
+        <input type="text" id="t-name" class="form-input" placeholder="e.g. My App">
+        <span class="form-hint">A label to identify this token</span>
+      </div>
+      <div class="form-row">
+        <label class="form-label">Format</label>
+        <select id="t-provider" class="form-input">
+          <option value="openai">OpenAI (sk-coderai-…)</option>
+          <option value="anthropic">Anthropic</option>
+          <option value="custom">Custom</option>
+        </select>
+      </div>
+      <div class="form-actions">
+        <button class="btn btn-primary" onclick="createToken()">Generate</button>
+        <button class="btn btn-ghost" onclick="closeModal('create-modal')">Cancel</button>
+      </div>
    </div>
+  </div>
 </div>

-<!-- Show Token Modal -->
-<div id="show-token-modal" class="modal">
-    <div class="modal-content">
-        <div class="modal-header">
-            <h2>Token Created</h2>
-            <button class="modal-close" onclick="hideShowTokenModal()">&times;</button>
-        </div>
-        <div class="modal-body">
-            <div class="alert alert-warning">
-                <strong>Important:</strong> Copy this token now. You won't be able to see it again!
-            </div>
-            
-            <div class="token-display">
-                <code id="new-token-value"></code>
-                <button class="btn btn-secondary btn-sm" onclick="copyToken()">Copy</button>
-            </div>
-            
-            <div class="form-actions">
-                <button class="btn btn-primary" onclick="hideShowTokenModal()">Done</button>
-            </div>
-        </div>
+<div id="show-modal" class="modal">
+  <div class="modal-box">
+    <div class="modal-head">
+      <span class="modal-title">Token created</span>
+      <button class="modal-close" onclick="closeModal('show-modal')">×</button>
    </div>
+    <div class="modal-body">
+      <div class="alert alert-warning">Copy this now — it won't be shown again.</div>
+      <div class="token-box">
+        <code id="new-token"></code>
+        <button class="btn btn-ghost btn-sm" id="copy-btn" onclick="copyToken()">Copy</button>
+      </div>
+      <div class="form-actions">
+        <button class="btn btn-primary" onclick="closeModal('show-modal')">Done</button>
+      </div>
+    </div>
+  </div>
 </div>
 {% endblock %}

 {% block scripts %}
 <script>
-function showCreateTokenModal() {
-    document.getElementById('create-token-modal').style.display = 'flex';
-}
-
-function hideCreateTokenModal() {
-    document.getElementById('create-token-modal').style.display = 'none';
-    document.getElementById('create-token-form').reset();
-}
-
-function showShowTokenModal(token) {
-    document.getElementById('new-token-value').textContent = token;
-    document.getElementById('show-token-modal').style.display = 'flex';
+function openModal(id) { document.getElementById(id).classList.add('show'); }
+function closeModal(id) {
+  document.getElementById(id).classList.remove('show');
+  if (id === 'create-modal') document.getElementById('t-name').value = '';
 }

-function hideShowTokenModal() {
-    document.getElementById('show-token-modal').style.display = 'none';
+function copyToken() {
+  navigator.clipboard.writeText(document.getElementById('new-token').textContent).then(() => {
+    const btn = document.getElementById('copy-btn');
+    btn.textContent = 'Copied!'; setTimeout(() => btn.textContent = 'Copy', 2000);
+  });
 }

-function copyToken() {
-    const token = document.getElementById('new-token-value').textContent;
-    navigator.clipboard.writeText(token).then(() => {
-        alert('Token copied to clipboard');
-    });
+function fmt(s) {
+  try { return new Date(s).toLocaleDateString(undefined, {month:'short',day:'numeric',year:'numeric'}); } catch { return s; }
 }
+function esc(s) { return String(s).replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;'); }

 async function loadTokens() {
-    try {
-        const response = await fetch('/admin/api/tokens');
-        const tokens = await response.json();
-        
-        const tbody = document.getElementById('tokens-table');
-        if (tokens.length === 0) {
-            tbody.innerHTML = '<tr><td colspan="6" class="text-center text-muted">No tokens created</td></tr>';
-            return;
-        }
-        
-        tbody.innerHTML = tokens.map(token => `
-            <tr>
-                <td>${token.name}</td>
-                <td><code>${token.token.substring(0, 20)}...</code></td>
-                <td>${token.provider}</td>
-                <td>${new Date(token.created_at).toLocaleDateString()}</td>
-                <td>${token.last_used ? new Date(token.last_used).toLocaleDateString() : 'Never'}</td>
-                <td>
-                    <button class="btn btn-danger btn-sm" onclick="deleteToken(${token.id})">Delete</button>
-                </td>
-            </tr>
-        `).join('');
-    } catch (error) {
-        console.error('Failed to load tokens:', error);
+  try {
+    const tokens = await fetch('/admin/api/tokens').then(r => r.json());
+    const tbody = document.getElementById('tokens-body');
+    if (!tokens.length) {
+      tbody.innerHTML = '<tr class="empty-row"><td colspan="6">No tokens — create one to get started</td></tr>';
+      return;
    }
+    tbody.innerHTML = tokens.map(t => `
+      <tr>
+        <td class="td-name">${esc(t.name)}</td>
+        <td><code>${esc(t.token.substring(0,28))}…</code></td>
+        <td><span class="badge badge-user">${esc(t.provider)}</span></td>
+        <td class="mono small dim">${fmt(t.created_at)}</td>
+        <td class="dim small">${t.last_used ? fmt(t.last_used) : 'Never'}</td>
+        <td style="text-align:right"><button class="btn btn-danger btn-sm" onclick="delToken(${t.id})">Delete</button></td>
+      </tr>`).join('');
+  } catch {}
 }

-document.getElementById('create-token-form').addEventListener('submit', async (e) => {
-    e.preventDefault();
-    
-    const name = document.getElementById('token-name').value;
-    const provider = document.getElementById('token-provider').value;
-    
-    try {
-        const response = await fetch('/admin/api/tokens', {
-            method: 'POST',
-            headers: { 'Content-Type': 'application/json' },
-            body: JSON.stringify({ name, provider })
-        });
-        
-        if (response.ok) {
-            const data = await response.json();
-            hideCreateTokenModal();
-            showShowTokenModal(data.token);
-            loadTokens();
-        } else {
-            alert('Failed to create token');
-        }
-    } catch (error) {
-        alert('Error: ' + error.message);
+async function createToken() {
+  const name = document.getElementById('t-name').value.trim();
+  if (!name) { document.getElementById('t-name').focus(); return; }
+  try {
+    const r = await fetch('/admin/api/tokens', {
+      method:'POST', headers:{'Content-Type':'application/json'},
+      body: JSON.stringify({name, provider: document.getElementById('t-provider').value})
+    });
+    if (r.ok) {
+      const d = await r.json();
+      closeModal('create-modal');
+      document.getElementById('new-token').textContent = d.token;
+      openModal('show-modal');
+      loadTokens();
+    } else {
+      const e = await r.json(); alert(e.detail || 'Failed');
    }
-});
+  } catch (e) { alert(e.message); }
+}

-async function deleteToken(tokenId) {
-    if (!confirm('Are you sure you want to delete this token? This cannot be undone.')) {
-        return;
-    }
-    
-    try {
-        const response = await fetch(`/admin/api/tokens/${tokenId}`, {
-            method: 'DELETE'
-        });
-        
-        if (response.ok) {
-            loadTokens();
-        } else {
-            alert('Failed to delete token');
-        }
-    } catch (error) {
-        alert('Error: ' + error.message);
-    }
+async function delToken(id) {
+  if (!confirm('Delete this token? Clients using it will lose access immediately.')) return;
+  const r = await fetch('/admin/api/tokens/'+id, {method:'DELETE'});
+  if (r.ok) loadTokens(); else alert('Failed to delete');
 }

 loadTokens();

--- a/codai/admin/templates/users.html
+++ b/codai/admin/templates/users.html
 {% extends "base.html" %}
-
-{% block title %}Users - CoderAI{% endblock %}
+{% block title %}Users — CoderAI{% endblock %}

 {% block content %}
 <div class="page-header">
+  <div>
    <h1>Users</h1>
-    <div class="header-actions">
-        <button class="btn btn-primary" onclick="showCreateUserModal()">Create User</button>
-    </div>
+    <p>{{ users|length }} account{{ 's' if users|length != 1 else '' }}</p>
+  </div>
+  <div class="header-actions">
+    <button class="btn btn-primary" onclick="openModal('add-modal')">Add user</button>
+  </div>
 </div>

-<div class="card">
-    <div class="table-responsive">
-        <table class="table">
-            <thead>
-                <tr>
-                    <th>Username</th>
-                    <th>Role</th>
-                    <th>Created</th>
-                    <th>Actions</th>
-                </tr>
-            </thead>
-            <tbody>
-                {% for user in users %}
-                <tr>
-                    <td>{{ user.username }}</td>
-                    <td>
-                        <span class="badge {% if user.role == 'admin' %}badge-primary{% else %}badge-secondary{% endif %}">
-                            {{ user.role }}
-                        </span>
-                    </td>
-                    <td>{{ user.created_at[:10] }}</td>
-                    <td>
-                        {% if user.username != username %}
-                        <button class="btn btn-danger btn-sm" onclick="deleteUser({{ user.id }}, '{{ user.username }}')">Delete</button>
-                        {% else %}
-                        <a href="/admin/change-password" class="btn btn-secondary btn-sm">Change Password</a>
-                        {% endif %}
-                    </td>
-                </tr>
-                {% endfor %}
-            </tbody>
-        </table>
-    </div>
+<div class="table-wrap">
+  <table>
+    <thead>
+      <tr><th>User</th><th>Role</th><th>Created</th><th></th></tr>
+    </thead>
+    <tbody>
+      {% for user in users %}
+      <tr>
+        <td class="td-name">
+          {{ user.username }}
+          {% if user.username == username %}<span class="badge badge-user" style="margin-left:.375rem">you</span>{% endif %}
+        </td>
+        <td><span class="badge {% if user.role == 'admin' %}badge-admin{% else %}badge-user{% endif %}">{{ user.role }}</span></td>
+        <td class="mono small dim">{{ user.created_at[:10] }}</td>
+        <td style="text-align:right">
+          {% if user.username == username %}
+          <a href="/admin/change-password" class="btn btn-ghost btn-sm">Change password</a>
+          {% else %}
+          <button class="btn btn-danger btn-sm" onclick="delUser({{ user.id }}, '{{ user.username }}')">Delete</button>
+          {% endif %}
+        </td>
+      </tr>
+      {% else %}
+      <tr class="empty-row"><td colspan="4">No users found</td></tr>
+      {% endfor %}
+    </tbody>
+  </table>
 </div>

-<!-- Create User Modal -->
-<div id="create-user-modal" class="modal">
-    <div class="modal-content">
-        <div class="modal-header">
-            <h2>Create User</h2>
-            <button class="modal-close" onclick="hideCreateUserModal()">&times;</button>
-        </div>
-        <div class="modal-body">
-            <form id="create-user-form">
-                <div class="form-group">
-                    <label for="new-username">Username</label>
-                    <input type="text" id="new-username" class="form-control" required>
-                </div>
-                
-                <div class="form-group">
-                    <label for="new-password">Password</label>
-                    <input type="password" id="new-password" class="form-control" required minlength="8">
-                    <small class="form-text">Minimum 8 characters</small>
-                </div>
-                
-                <div class="form-group">
-                    <label for="new-role">Role</label>
-                    <select id="new-role" class="form-control">
-                        <option value="user">User</option>
-                        <option value="admin">Admin</option>
-                    </select>
-                </div>
-                
-                <div class="form-actions">
-                    <button type="submit" class="btn btn-primary">Create User</button>
-                    <button type="button" class="btn btn-secondary" onclick="hideCreateUserModal()">Cancel</button>
-                </div>
-            </form>
-        </div>
+<div id="add-modal" class="modal">
+  <div class="modal-box">
+    <div class="modal-head">
+      <span class="modal-title">Add user</span>
+      <button class="modal-close" onclick="closeModal('add-modal')">×</button>
    </div>
+    <div class="modal-body">
+      <div id="add-err" class="alert alert-error" style="display:none"></div>
+      <div class="form-row">
+        <label class="form-label">Username</label>
+        <input type="text" id="new-uname" class="form-input" placeholder="username" autocomplete="off">
+      </div>
+      <div class="form-row">
+        <label class="form-label">Password</label>
+        <input type="password" id="new-pwd" class="form-input" placeholder="••••••••" autocomplete="new-password">
+        <span class="form-hint">Minimum 8 characters</span>
+      </div>
+      <div class="form-row">
+        <label class="form-label">Role</label>
+        <select id="new-role" class="form-input">
+          <option value="user">User</option>
+          <option value="admin">Admin</option>
+        </select>
+      </div>
+      <div class="form-actions">
+        <button class="btn btn-primary" onclick="addUser()">Create</button>
+        <button class="btn btn-ghost" onclick="closeModal('add-modal')">Cancel</button>
+      </div>
+    </div>
+  </div>
 </div>
 {% endblock %}

 {% block scripts %}
 <script>
-function showCreateUserModal() {
-    document.getElementById('create-user-modal').style.display = 'flex';
+function openModal(id) { document.getElementById(id).classList.add('show'); }
+function closeModal(id) {
+  document.getElementById(id).classList.remove('show');
+  document.getElementById('add-err').style.display = 'none';
+  document.getElementById('new-uname').value = '';
+  document.getElementById('new-pwd').value = '';
 }

-function hideCreateUserModal() {
-    document.getElementById('create-user-modal').style.display = 'none';
-    document.getElementById('create-user-form').reset();
+async function addUser() {
+  const uname = document.getElementById('new-uname').value.trim();
+  const pwd = document.getElementById('new-pwd').value;
+  const errEl = document.getElementById('add-err');
+  errEl.style.display = 'none';
+  if (!uname) { errEl.textContent = 'Username required'; errEl.style.display = 'flex'; return; }
+  if (pwd.length < 8) { errEl.textContent = 'Password must be at least 8 characters'; errEl.style.display = 'flex'; return; }
+  try {
+    const r = await fetch('/admin/api/users', {
+      method: 'POST', headers: {'Content-Type':'application/json'},
+      body: JSON.stringify({username: uname, password: pwd, role: document.getElementById('new-role').value})
+    });
+    if (r.ok) { location.reload(); }
+    else { const e = await r.json(); errEl.textContent = e.detail || 'Failed'; errEl.style.display = 'flex'; }
+  } catch (e) { errEl.textContent = e.message; errEl.style.display = 'flex'; }
 }

-document.getElementById('create-user-form').addEventListener('submit', async (e) => {
-    e.preventDefault();
-    
-    const username = document.getElementById('new-username').value;
-    const password = document.getElementById('new-password').value;
-    const role = document.getElementById('new-role').value;
-    
-    try {
-        const response = await fetch('/admin/api/users', {
-            method: 'POST',
-            headers: { 'Content-Type': 'application/json' },
-            body: JSON.stringify({ username, password, role })
-        });
-        
-        if (response.ok) {
-            hideCreateUserModal();
-            location.reload();
-        } else {
-            const error = await response.json();
-            alert('Failed to create user: ' + (error.detail || 'Unknown error'));
-        }
-    } catch (error) {
-        alert('Error: ' + error.message);
-    }
-});
-
-async function deleteUser(userId, username) {
-    if (!confirm(`Are you sure you want to delete user "${username}"?`)) {
-        return;
-    }
-    
-    try {
-        const response = await fetch(`/admin/api/users/${userId}`, {
-            method: 'DELETE'
-        });
-        
-        if (response.ok) {
-            location.reload();
-        } else {
-            const error = await response.json();
-            alert('Failed to delete user: ' + (error.detail || 'Unknown error'));
-        }
-    } catch (error) {
-        alert('Error: ' + error.message);
-    }
+async function delUser(id, name) {
+  if (!confirm('Delete user "' + name + '"?')) return;
+  const r = await fetch('/admin/api/users/'+id, {method:'DELETE'});
+  if (r.ok) location.reload();
+  else { const e = await r.json(); alert(e.detail || 'Failed'); }
 }
 </script>
 {% endblock %}
--- a/codai/api/app.py
+++ b/codai/api/app.py
@@ -91,6 +91,16 @@ app.include_router(text_router)
 app.include_router(admin_router)


+@app.exception_handler(401)
+async def unauthorized_redirect(request: Request, exc: HTTPException):
+    """Redirect browser clients to login page on 401; return JSON for API clients."""
+    accept = request.headers.get("accept", "")
+    if "text/html" in accept:
+        from fastapi.responses import RedirectResponse
+        return RedirectResponse(url="/login", status_code=302)
+    return JSONResponse(status_code=401, content={"detail": exc.detail})
+
+
 @app.get("/v1/models", response_model=ModelList)
 async def list_models():
    """List available models."""

--- a/codai/api/images.py
+++ b/codai/api/images.py
@@ -476,41 +476,56 @@ async def _generate_with_sdcpp(sd_model, request, global_args, http_request=None
    }


-def _load_sdcpp_model(model_path: str, global_args):
+def _load_sdcpp_model(model_path: str, global_args, model_config: dict = None):
    """
    Try to load a model using stable-diffusion-cpp-python.
    
    Returns the loaded StableDiffusion model or None.
    """
    from stable_diffusion_cpp import StableDiffusion
-    
+
    # Check for --no-ram mode
    no_ram = getattr(global_args, 'no_ram', False) if global_args else False
-    
+
    print(f"Loading sd.cpp model from: {model_path}")
-    
+
    # Build sd.cpp constructor args from config
    kwargs = {
        'model_path': model_path,
+        'offload_params_to_cpu': False,  # Use GPU by default
+        'keep_clip_on_cpu': False,
+        'keep_control_net_on_cpu': False,
+        'keep_vae_on_cpu': False,
    }
-    
+
    # Add optional paths from CLI args
    if global_args:
        if hasattr(global_args, 'vae_path') and global_args.vae_path:
            kwargs['vae_path'] = global_args.vae_path
        if hasattr(global_args, 'llm_path') and global_args.llm_path:
            kwargs['lora_model_dir'] = global_args.llm_path
-    
-    # --no-ram mode: maximize GPU offloading for sd.cpp
+
+    # If backend is explicitly cpu, offload to CPU
+    backend = (model_config or {}).get('backend', 'auto') if model_config else 'auto'
+    if backend == 'cpu':
+        kwargs['offload_params_to_cpu'] = True
+        kwargs['keep_clip_on_cpu'] = True
+        kwargs['keep_vae_on_cpu'] = True
+
    if no_ram:
-        # stable-diffusion-cpp-python supports n_threads and gpu-related params
-        # Force full GPU offload by keeping all operations on GPU
-        kwargs['keep_clip_on_cpu'] = False  # Don't offload CLIP to CPU
-        kwargs['keep_control_net_cpu'] = False  # Don't offload ControlNet to CPU
-        kwargs['keep_vae_on_cpu'] = False  # Don't offload VAE to CPU
        print("--no-ram mode: sd.cpp maximizing GPU usage (no CPU offload for CLIP/VAE/ControlNet)")
-    
-    sd_model = StableDiffusion(**kwargs)
+
+    try:
+        sd_model = StableDiffusion(**kwargs)
+    except Exception as e:
+        if 'cpu' not in str(backend) and ('memory' in str(e).lower() or 'cuda' in str(e).lower() or 'out of' in str(e).lower()):
+            print(f"GPU load failed ({e}), retrying with CPU offload...")
+            kwargs['offload_params_to_cpu'] = True
+            kwargs['keep_clip_on_cpu'] = True
+            kwargs['keep_vae_on_cpu'] = True
+            sd_model = StableDiffusion(**kwargs)
+        else:
+            raise
    return sd_model


@@ -665,7 +680,8 @@ async def create_image_generation(request: ImageGenerationRequest, http_request:
            
            # Only use sd.cpp if we have a local file path
            if resolved_path and os.path.isfile(resolved_path):
-                sd_model = _load_sdcpp_model(resolved_path, global_args)
+                cfg = multi_model_manager.config.get(model_key) or multi_model_manager.config.get(model_name) or {}
+                sd_model = _load_sdcpp_model(resolved_path, global_args, model_config=cfg)
                
                if sd_model is not None:
                    # Cache the loaded model in the manager

--- a/codai/cli.py
+++ b/codai/cli.py
@@ -123,12 +123,13 @@ def setup_default_config(config_dir: Path):
    # Default auth.json with admin / admin
    from pathlib import Path
    import secrets
-    from argon2 import PasswordHasher
-    if hasattr(argon2, 'PasswordHasher'):
-        ph = argon2.PasswordHasher()
+    try:
+        from argon2 import PasswordHasher
+        ph = PasswordHasher()
        default_admin_hash = ph.hash("admin")
-    else:
-        default_admin_hash = "argon2id$v=19$m=65536,t=3,p=4$...admin_hash_placeholder"
+    except ImportError:
+        from codai.admin.auth import hash_password
+        default_admin_hash = hash_password("admin")
    
    default_auth = {
        "users": [{

--- a/codai/config.py
+++ b/codai/config.py
@@ -29,12 +29,54 @@ class BackendConfig:
 class ModelsConfig:
    """Models configuration."""
    default_load_mode: str = "ondemand"
+    hf_cache_dir: Optional[str] = None
+    gguf_cache_dir: Optional[str] = None


 @dataclass
 class OffloadConfig:
    """Offload configuration."""
    directory: str = "./offload"
+    strategy: str = "auto"
+    max_gpu_percent: Optional[float] = None
+    no_ram: bool = False
+    load_in_4bit: bool = False
+    load_in_8bit: bool = False
+    manual_ram_gb: Optional[float] = None
+    flash_attention: bool = False
+
+
+@dataclass
+class VulkanConfig:
+    """Vulkan backend configuration."""
+    n_gpu_layers: int = -1
+    n_ctx: int = 2048
+    device_id: int = 0
+    single_gpu: bool = False
+
+
+@dataclass
+class ImageConfig:
+    """Image generation configuration."""
+    llm_path: Optional[str] = None
+    vae_path: Optional[str] = None
+    sample_method: str = "res_multistep"
+    steps: int = 4
+    width: int = 512
+    height: int = 512
+    cfg_scale: float = 1.0
+    precision: str = "f32"
+    cpu_offload: bool = False
+    seed: Optional[int] = None
+    vae_tiling: bool = False
+    clip_on_cpu: bool = False
+
+
+@dataclass
+class WhisperConfig:
+    """Whisper ASR configuration."""
+    server_path: Optional[str] = None
+    server_port: int = 8744


 @dataclass
@@ -45,6 +87,9 @@ class Config:
    backend: BackendConfig = field(default_factory=BackendConfig)
    models: ModelsConfig = field(default_factory=ModelsConfig)
    offload: OffloadConfig = field(default_factory=OffloadConfig)
+    vulkan: VulkanConfig = field(default_factory=VulkanConfig)
+    image: ImageConfig = field(default_factory=ImageConfig)
+    whisper: WhisperConfig = field(default_factory=WhisperConfig)
    system_prompt: Optional[str] = None
    tools_closer_prompt: bool = False
    grammar_guided: bool = False
@@ -140,7 +185,8 @@ class ConfigManager:
                ph = PasswordHasher()
                default_admin_hash = ph.hash("admin")
            except ImportError:
-                default_admin_hash = "argon2id$v=19$m=65536,t=3,p=4$...admin_hash_placeholder"
+                from codai.admin.auth import hash_password
+                default_admin_hash = hash_password("admin")
            
            default_auth = {
                "users": [{
@@ -182,6 +228,9 @@ class ConfigManager:
                backend=BackendConfig(**config_data.get("backend", {})),
                models=ModelsConfig(**config_data.get("models", {})),
                offload=OffloadConfig(**config_data.get("offload", {})),
+                vulkan=VulkanConfig(**config_data.get("vulkan", {})),
+                image=ImageConfig(**config_data.get("image", {})),
+                whisper=WhisperConfig(**config_data.get("whisper", {})),
                system_prompt=config_data.get("system_prompt"),
                tools_closer_prompt=config_data.get("tools_closer_prompt", False),
                grammar_guided=config_data.get("grammar_guided", False),
@@ -242,10 +291,43 @@ class ConfigManager:
                "tts_backend": self.config.backend.tts_backend
            },
            "models": {
-                "default_load_mode": self.config.models.default_load_mode
+                "default_load_mode": self.config.models.default_load_mode,
+                "hf_cache_dir": self.config.models.hf_cache_dir,
+                "gguf_cache_dir": self.config.models.gguf_cache_dir,
            },
            "offload": {
-                "directory": self.config.offload.directory
+                "directory": self.config.offload.directory,
+                "strategy": self.config.offload.strategy,
+                "max_gpu_percent": self.config.offload.max_gpu_percent,
+                "no_ram": self.config.offload.no_ram,
+                "load_in_4bit": self.config.offload.load_in_4bit,
+                "load_in_8bit": self.config.offload.load_in_8bit,
+                "manual_ram_gb": self.config.offload.manual_ram_gb,
+                "flash_attention": self.config.offload.flash_attention
+            },
+            "vulkan": {
+                "n_gpu_layers": self.config.vulkan.n_gpu_layers,
+                "n_ctx": self.config.vulkan.n_ctx,
+                "device_id": self.config.vulkan.device_id,
+                "single_gpu": self.config.vulkan.single_gpu
+            },
+            "image": {
+                "llm_path": self.config.image.llm_path,
+                "vae_path": self.config.image.vae_path,
+                "sample_method": self.config.image.sample_method,
+                "steps": self.config.image.steps,
+                "width": self.config.image.width,
+                "height": self.config.image.height,
+                "cfg_scale": self.config.image.cfg_scale,
+                "precision": self.config.image.precision,
+                "cpu_offload": self.config.image.cpu_offload,
+                "seed": self.config.image.seed,
+                "vae_tiling": self.config.image.vae_tiling,
+                "clip_on_cpu": self.config.image.clip_on_cpu
+            },
+            "whisper": {
+                "server_path": self.config.whisper.server_path,
+                "server_port": self.config.whisper.server_port
            },
            "system_prompt": self.config.system_prompt,
            "tools_closer_prompt": self.config.tools_closer_prompt,
@@ -255,7 +337,7 @@ class ConfigManager:
            "reasoning_options": self.config.reasoning_options,
            "parser": self.config.parser
        }
-        
+
        with open(self.config_path, 'w') as f:
            json.dump(config_dict, f, indent=2)
    

--- a/codai/main.py
+++ b/codai/main.py
@@ -5,7 +5,7 @@ import os
 # Import configuration from codai modules
 from codai.cli import parse_args
 from codai.config import ConfigManager
-from codai.admin.routes import init_session_manager
+from codai.admin.routes import init_session_manager, set_config_manager


 def main():
@@ -31,10 +31,18 @@ def main():
    config_dir = args.config
    config_mgr = ConfigManager(config_dir)
    config = config_mgr.load()
-    
-    # Initialize admin session manager
+
+    # Apply cache directory overrides from config before any cache module is used
+    if config.models.hf_cache_dir:
+        os.environ['HF_HOME'] = config.models.hf_cache_dir
+        os.environ['HUGGINGFACE_HUB_CACHE'] = config.models.hf_cache_dir
+    if config.models.gguf_cache_dir:
+        os.environ['CODERAI_CACHE_DIR'] = config.models.gguf_cache_dir
+    
+    # Initialize admin session manager and expose config to admin routes
    from pathlib import Path
    init_session_manager(Path(config_dir))
+    set_config_manager(config_mgr)
    
    # Handle early exit options (before heavy imports)
    if args.list_cached_models:
@@ -294,106 +302,128 @@ def main():
            kwargs['n_gpu_layers'] = model_cfg.get('n_gpu_layers', -1)
        return kwargs
    
-    # Load text models (main LLM)
+    # =========================================================================
+    # Register and optionally pre-load all configured models
+    # Models with load_mode == "load" are pre-loaded at startup.
+    # Models with load_mode == "on-request" (default) are loaded on demand.
+    # =========================================================================
+
+    def _model_id(m):
+        """Return the model path/id from a config entry (dict or str)."""
+        if isinstance(m, str):
+            return m
+        return m.get("path") or m.get("id") or ""
+
+    def _model_cfg(m, mtype):
+        cfg = build_kwargs_from_config(m, mtype) if isinstance(m, dict) else {}
+        if isinstance(m, dict):
+            for k in ("load_mode", "used_vram_gb", "alias"):
+                if k in m:
+                    cfg[k] = m[k]
+        return cfg
+
+    # Text models
    text_models = models_config.get("text_models", [])
-    text_model_names = [m["id"] for m in text_models if m.get("enabled", True)]
-    
-    if text_model_names:
-        print(f"\nMain text model(s): {text_model_names}")
-        for idx, model_name in enumerate(text_models):
-            multi_model_manager.set_default_model(
-                model_name["id"],
-                config=build_kwargs_from_config(model_name, "text"),
-                backend_type=model_name.get("backend", "auto")
-            )
-    
-    # Load preload list
-    preload_list = models_config.get("preload", [])
-    loaded_list = models_config.get("loaded", [])
-    
-    # Determine which models to preload at startup
-    # loaded: models to load into VRAM (or CPU for loadswap) immediately
-    # preload: models to keep in CPU RAM for fast swapping
-    nopreload = False  # Config-based loading, no CLI preload skip
-    
-    # Pre-load models at startup based on config
-    if not nopreload and load_mode in ("loadall", "loadswap"):
-        all_startup_models = loaded_list + preload_list
-    elif not nopreload and load_mode == "ondemand":
-        all_startup_models = loaded_list[:1] if loaded_list else []
-    else:
-        all_startup_models = []
-    
-    # Pre-load process
+    # Also include legacy gguf_models entries (treated as text)
+    text_models = text_models + models_config.get("gguf_models", [])
+    text_model_names = [_model_id(m) for m in text_models if _model_id(m)]
+
    if text_model_names:
-        first_text = text_models[0]["id"] if text_models else None
-        
-        if not nopreload and load_mode == "ondemand" and first_text:
-            # Preload first model into VRAM
-            try:
-                print(f"Preloading first model into VRAM: {first_text}...")
-                mm = multi_model_manager._load_default_model()
-                if mm is not None and mm.backend is not None:
-                    multi_model_manager.active_in_vram = multi_model_manager.default_model
-                    print(f"Model loaded successfully: {first_text}")
-                else:
-                    print(f"Warning: Model {first_text} failed to load")
-            except Exception as e:
-                print(f"Warning: Failed to preload model: {e}")
-                print(f"Model will load on first request")
-    
-    # Load audio models (registered, load on first request)
+        print(f"\nText model(s): {text_model_names}")
+        for i, m in enumerate(text_models):
+            mid = _model_id(m)
+            if not mid:
+                continue
+            cfg = _model_cfg(m, "text")
+            if i == 0:
+                # Only the first text model becomes the default
+                multi_model_manager.set_default_model(
+                    mid, config=cfg,
+                    backend_type=m.get("backend", "auto") if isinstance(m, dict) else "auto"
+                )
+            else:
+                # Additional text models: register config only, no default override
+                multi_model_manager.config[mid] = cfg
+                multi_model_manager.model_backend_types[mid] = (
+                    m.get("backend", "auto") if isinstance(m, dict) else "auto"
+                )
+
+    # Audio models
    audio_models = models_config.get("audio_models", [])
-    for audio_m in audio_models:
-        if audio_m.get("enabled", True):
-            multi_model_manager.set_audio_model(
-                audio_m["id"],
-                config=build_kwargs_from_config(audio_m, "audio")
-            )
-    
-    # Load image models
+    for m in audio_models:
+        mid = _model_id(m)
+        if mid:
+            multi_model_manager.set_audio_model(mid, config=_model_cfg(m, "audio"))
+
+    # Image models
    image_models = models_config.get("image_models", [])
-    for img_m in image_models:
-        if img_m.get("enabled", True):
-            multi_model_manager.set_image_model(
-                img_m["id"],
-                config=build_kwargs_from_config(img_m, "image")
-            )
-    
-    # Load vision models
+    for m in image_models:
+        mid = _model_id(m)
+        if mid:
+            multi_model_manager.set_image_model(mid, config=_model_cfg(m, "image"))
+
+    # Vision models
    vision_models = models_config.get("vision_models", [])
-    for vis_m in vision_models:
-        if vis_m.get("enabled", True):
-            multi_model_manager.set_vision_model(
-                vis_m["id"],
-                config=build_kwargs_from_config(vis_m, "vision")
-            )
-    
-    # Load TTS model
-    tts_model = models_config.get("tts_models", [])
-    if tts_model:
-        for tts_m in tts_model:
-            if tts_m.get("enabled", True):
-                multi_model_manager.set_tts_model(tts_m["id"], {})
-    
+    for m in vision_models:
+        mid = _model_id(m)
+        if mid:
+            multi_model_manager.set_vision_model(mid, config=_model_cfg(m, "vision"))
+
+    # TTS models
+    tts_models = models_config.get("tts_models", [])
+    for m in tts_models:
+        mid = _model_id(m)
+        if mid:
+            multi_model_manager.set_tts_model(mid, config=_model_cfg(m, "tts") if isinstance(m, dict) else {})
+
    # Register aliases
    aliases = models_config.get("aliases", {})
    for alias, model in aliases.items():
        multi_model_manager.set_model_alias(alias, model)
+
+    # Pre-load models marked as load_mode == "load" across ALL types
+    all_model_entries = (
+        [("text", m) for m in text_models] +
+        [("audio", m) for m in audio_models] +
+        [("image", m) for m in image_models] +
+        [("vision", m) for m in vision_models] +
+        [("tts", m) for m in tts_models]
+    )
+    for mtype, m in all_model_entries:
+        mid = _model_id(m)
+        if not mid:
+            continue
+        per_load_mode = m.get("load_mode", "on-request") if isinstance(m, dict) else "on-request"
+        if per_load_mode != "load":
+            print(f"  '{mid}' — on-request (will load when needed)")
+            continue
+        print(f"  Pre-loading '{mid}' (load mode)...")
+        try:
+            if mtype == "text":
+                mm = multi_model_manager._load_model_by_name(mid)
+                if mm is not None and mm.backend is not None:
+                    multi_model_manager.active_in_vram = mid
+                    print(f"  Loaded: {mid}")
+                else:
+                    print(f"  Warning: {mid} failed to load")
+            # image/audio/vision/tts pre-loading is handled by their respective
+            # API modules on first request; we just log intent here.
+            else:
+                print(f"  Note: pre-loading for {mtype} models happens on first request")
+        except Exception as e:
+            print(f"  Warning: failed to pre-load '{mid}': {e}")
+
+
    
    # Print startup summary
    print(f"\nBackend: {backend}")
-    print(f"Load mode: {load_mode}")
-    
    available_models = multi_model_manager.list_models()
-    print(f"\nAvailable models: {[m.id for m in available_models]}")
-    
-    # Register custom aliases from config
+    print(f"Available models: {[m.id for m in available_models]}")
    if aliases:
-        print(f"\nModel aliases:")
+        print("Model aliases:")
        for alias, target in aliases.items():
            print(f"  {alias} -> {target}")
-    
+
    # Set global args for backward compatibility with existing code
    class ArgsCompat:
        pass
@@ -438,10 +468,10 @@ def main():
    global_args.force_reasoning = config.reasoning_options
    global_args.model = text_model_names
    global_args.language_model = text_model_names
-    global_args.image_model = [m["id"] for m in image_models if m.get("enabled")]
-    global_args.audio_model = [m["id"] for m in audio_models if m.get("enabled")]
-    global_args.vision_model = [m["id"] for m in vision_models if m.get("enabled")]
-    global_args.tts_model = tts_model[0]["id"] if tts_model else None
+    global_args.image_model = [_model_id(m) for m in image_models]
+    global_args.audio_model = [_model_id(m) for m in audio_models]
+    global_args.vision_model = [_model_id(m) for m in vision_models]
+    global_args.tts_model = _model_id(tts_models[0]) if tts_models else None
    global_args.model_aliases = [(k, v) for k, v in aliases.items()]
    global_args.whisper_server = config.whisper.server_path
    global_args.whisper_server_port = config.whisper.server_port
@@ -458,86 +488,46 @@ def main():
    global_args.vulkan_list_devices = False
    global_args.loadall = False
    global_args.loadswap = False
-    global_args.nopreload = nopreload
-    
+    global_args.nopreload = False
+
    set_global_args(global_args)
    set_global_args_text(global_args)
    set_load_mode_app(load_mode)
-    
+
    # Set image module global args
    from codai.api.images import set_global_args as set_images_global_args
    set_images_global_args(global_args)
-    
-    # Vulkan list devices
-    if args.vulkan_list_devices:
-        print("\nListing Vulkan devices...")
+
+    # Pre-load image models marked as load_mode == "load"
+    for m in image_models:
+        mid = _model_id(m)
+        if not mid:
+            continue
+        per_load_mode = m.get("load_mode", "on-request") if isinstance(m, dict) else "on-request"
+        if per_load_mode != "load":
+            continue
+        model_key = f"image:{mid}"
+        if model_key in multi_model_manager.models:
+            continue
        try:
-            import subprocess
-            result = subprocess.run(['vulkaninfo', '--summary'], capture_output=True, text=True)
-            if result.returncode == 0:
-                print(result.stdout)
+            from codai.api.images import _load_diffusers_pipeline, _is_gguf_model, _load_sdcpp_model
+            print(f"Pre-loading image model '{mid}' (load mode)...")
+            if _is_gguf_model(mid):
+                resolved_path = multi_model_manager.load_model(mid)
+                if resolved_path and os.path.isfile(resolved_path):
+                    sd_model = _load_sdcpp_model(resolved_path, global_args)
+                    if sd_model:
+                        multi_model_manager.add_model(model_key, sd_model)
+                        print(f"  Image model loaded: {mid}")
            else:
-                print("Could not run vulkaninfo.")
+                pipeline = _load_diffusers_pipeline(mid, global_args)
+                if pipeline:
+                    multi_model_manager.add_model(model_key, pipeline)
+                    print(f"  Image model loaded: {mid}")
        except Exception as e:
-            print(f"Error: {e}")
-        sys.exit(0)
-    
-    # Startup: Preload configured models (non-text) for loadall/loadswap
-    if not nopreload and load_mode in ("loadall", "loadswap"):
-        first_loaded = multi_model_manager.active_in_vram is not None
-        
-        if image_models:
-            print(f"\n=== Pre-loading image model(s) ===")
-            for img_m in image_models:
-                if not img_m.get("enabled", True):
-                    continue
-                model_key = f"image:{img_m['id']}"
-                if model_key in multi_model_manager.models:
-                    continue
-                try:
-                    from codai.api.images import _load_diffusers_pipeline, _is_gguf_model, _load_sdcpp_model
-                    if load_mode == "loadall":
-                        print(f"Preloading image model into VRAM: {img_m['id']}...")
-                        if _is_gguf_model(img_m['id']):
-                            resolved_path = multi_model_manager.load_model(img_m['id'])
-                            if resolved_path and os.path.isfile(resolved_path):
-                                sd_model = _load_sdcpp_model(resolved_path, global_args)
-                                if sd_model:
-                                    multi_model_manager.add_model(model_key, sd_model)
-                                    print(f"Image model loaded (VRAM): {img_m['id']}")
-                        else:
-                            try:
-                                pipeline = _load_diffusers_pipeline(img_m['id'], global_args)
-                                if pipeline:
-                                    multi_model_manager.add_model(model_key, pipeline)
-                                    print(f"Image model loaded (VRAM): {img_m['id']}")
-                            except Exception as e:
-                                em = str(e).lower()
-                                if any(x in em for x in ['out of memory', 'oom', 'cuda error']):
-                                    print(f"VRAM full for {img_m['id']}, will load on demand")
-                                else:
-                                    print(f"Warning: {e}")
-                    elif load_mode == "loadswap" and not first_loaded:
-                        print(f"Preloading image model: {img_m['id']}...")
-                        if _is_gguf_model(img_m['id']):
-                            resolved_path = multi_model_manager.load_model(img_m['id'])
-                            if resolved_path and os.path.isfile(resolved_path):
-                                sd_model = _load_sdcpp_model(resolved_path, global_args)
-                                if sd_model:
-                                    multi_model_manager.add_model(model_key, sd_model)
-                                    first_loaded = True
-                                    print(f"Image model loaded: {img_m['id']}")
-                        else:
-                            try:
-                                pipeline = _load_diffusers_pipeline(img_m['id'], global_args)
-                                if pipeline:
-                                    multi_model_manager.add_model(model_key, pipeline)
-                                    first_loaded = True
-                                    print(f"Image model loaded: {img_m['id']}")
-                            except Exception as e:
-                                print(f"Warning: {e}")
-                except Exception as e:
-                    print(f"Warning: {e}")
+            print(f"  Warning: failed to pre-load image model '{mid}': {e}")
+
+
    
    # Start the server
    import uvicorn

--- a/codai/models/cache/__init__.py
+++ b/codai/models/cache/__init__.py
@@ -30,9 +30,11 @@ import time

 def get_model_cache_dir() -> str:
    """Get or create the model cache directory."""
-    # Use XDG_CACHE_HOME if set, otherwise use ~/.cache/coderai
-    cache_home = os.environ.get('XDG_CACHE_HOME', os.path.expanduser('~/.cache'))
-    cache_dir = os.path.join(cache_home, 'coderai', 'models')
+    if os.environ.get('CODERAI_CACHE_DIR'):
+        cache_dir = os.environ['CODERAI_CACHE_DIR']
+    else:
+        cache_home = os.environ.get('XDG_CACHE_HOME', os.path.expanduser('~/.cache'))
+        cache_dir = os.path.join(cache_home, 'coderai', 'models')
    pathlib.Path(cache_dir).mkdir(parents=True, exist_ok=True)
    return cache_dir

@@ -43,20 +45,24 @@ def get_all_cache_dirs() -> dict:
    cache_home = os.environ.get('XDG_CACHE_HOME', os.path.expanduser('~/.cache'))

    # Coderai GGUF cache
-    coderai_cache = os.path.join(cache_home, 'coderai', 'models')
+    coderai_cache = get_model_cache_dir()
    if os.path.exists(coderai_cache):
        caches['coderai'] = coderai_cache

-    # HuggingFace cache (for .safetensors, PyTorch models, etc.)
-    # Check both the main directory and the hub subdirectory
-    hf_cache = os.path.join(cache_home, 'huggingface')
-    hf_hub_cache = os.path.join(hf_cache, 'hub')
-    if os.path.exists(hf_hub_cache):
-        caches['huggingface'] = hf_hub_cache  # Use hub directory if it exists
-    elif os.path.exists(hf_cache):
-        caches['huggingface'] = hf_cache
-
-    # Local diffusers cache (often stored locally by apps)
+    # HuggingFace cache — respect HF_HOME override
+    hf_home = os.environ.get('HF_HOME') or os.environ.get('HUGGINGFACE_HUB_CACHE')
+    if hf_home:
+        hf_hub_cache = os.path.join(hf_home, 'hub') if not hf_home.endswith('hub') else hf_home
+        caches['huggingface'] = hf_hub_cache if os.path.exists(hf_hub_cache) else hf_home
+    else:
+        hf_cache = os.path.join(cache_home, 'huggingface')
+        hf_hub_cache = os.path.join(hf_cache, 'hub')
+        if os.path.exists(hf_hub_cache):
+            caches['huggingface'] = hf_hub_cache
+        elif os.path.exists(hf_cache):
+            caches['huggingface'] = hf_cache
+
+    # Local diffusers cache
    local_diffusers = os.path.expanduser('~/.cache/diffusers')
    if os.path.exists(local_diffusers):
        caches['diffusers'] = local_diffusers

--- a/codai/models/manager.py
+++ b/codai/models/manager.py
@@ -384,7 +384,8 @@ class MultiModelManager:
        self.tool_parser = ModelParserAdapter()
        self.current_model_key: Optional[str] = None
        self.load_mode: str = "ondemand"
-        self.active_in_vram: Optional[str] = None
+        self.active_in_vram: Optional[str] = None  # most-recently-used model key
+        self.models_in_vram: set = set()  # all models currently in VRAM
        self.model_aliases: Dict[str, str] = {}
        self.whisper_server: Optional[WhisperServerManager] = None
        self.model_backend_types: Dict[str, str] = {}
@@ -675,9 +676,7 @@ class MultiModelManager:
    def get_all_allowed_identifiers(self) -> set:
        """
        Return the set of all model names, aliases, and identifiers that are
-        valid for API requests.  This includes every identifier that
-        ``list_models()`` would return as well as the raw model paths/names
-        registered via the command line.
+        valid for API requests.
        """
        allowed = set()

@@ -719,6 +718,25 @@ class MultiModelManager:
        for alias in self.model_aliases:
            allowed.add(alias)

+        # Also include all models from config (covers configured-but-not-yet-loaded models)
+        try:
+            from codai.admin.routes import config_manager
+            if config_manager is not None:
+                md = config_manager.models_data
+                for cat in ("text_models", "image_models", "audio_models",
+                            "gguf_models", "tts_models", "vision_models"):
+                    for m in md.get(cat, []):
+                        mid = (m if isinstance(m, str) else
+                               m.get("alias") or m.get("path") or m.get("id") or "")
+                        raw = (m if isinstance(m, str) else m.get("path") or m.get("id") or "")
+                        for val in (mid, raw):
+                            if val:
+                                allowed.add(val)
+                                short = val.split("/")[-1] if "/" in val else val
+                                allowed.add(short)
+        except Exception:
+            pass
+
        return allowed

    def is_allowed_model(self, requested_or_resolved: str, model_type: str = None) -> bool:
@@ -1112,22 +1130,76 @@ class MultiModelManager:
        except Exception as e:
            print(f"  Warning during VRAM load of '{model_key}': {e}")

+    def _get_free_vram_gb(self) -> float:
+        """Return estimated free VRAM in GB, or a large number if unavailable."""
+        try:
+            import torch
+            if torch.cuda.is_available():
+                free, total = torch.cuda.mem_get_info()
+                return free / 1e9
+        except Exception:
+            pass
+        return 999.0  # Unknown — assume enough
+
+    def _get_model_used_vram_gb(self, model_key: str) -> float:
+        """Return the configured used_vram_gb for a model, or 0 if unknown."""
+        cfg = self.config.get(model_key, {})
+        return float(cfg.get("used_vram_gb") or 0)
+
+    def _evict_models_for_vram(self, needed_gb: float):
+        """Unload loaded models (LRU first) until we have at least needed_gb free VRAM."""
+        if needed_gb <= 0:
+            return
+
+        def _evict_key(key):
+            model_obj = self.models.pop(key, None)
+            self.models_in_vram.discard(key)
+            if model_obj is not None:
+                try:
+                    if hasattr(model_obj, 'cleanup'):
+                        model_obj.cleanup()
+                    elif hasattr(model_obj, 'to'):
+                        model_obj.to('cpu')
+                except Exception as e:
+                    print(f"  Warning during eviction of '{key}': {e}")
+            gc.collect()
+            try:
+                import torch
+                if torch.cuda.is_available():
+                    torch.cuda.empty_cache()
+            except Exception:
+                pass
+
+        # First pass: evict non-active models in LRU order
+        for key in list(self.models.keys()):
+            if key == self.active_in_vram:
+                continue
+            if self._get_free_vram_gb() >= needed_gb:
+                break
+            print(f"On-request VRAM eviction: unloading '{key}' to free VRAM")
+            _evict_key(key)
+
+        # Second pass: evict active model if still not enough
+        if self._get_free_vram_gb() < needed_gb and self.active_in_vram and self.active_in_vram in self.models:
+            print(f"On-request VRAM eviction: unloading active model '{self.active_in_vram}' to free VRAM")
+            _evict_key(self.active_in_vram)
+            self.active_in_vram = None
+
    def request_model(self, requested_model: str, model_type: str = None) -> Dict[str, Any]:
        """
        Central method for API modules to request a model.
        
-        Handles three load modes:
+        Handles per-model load modes:
        
-        **loadall**: All models are pre-loaded at startup. Just return the
-        already-loaded model. No VRAM management needed.
+        **load**: Model is pre-loaded at startup and stays in VRAM.
        
-        **loadswap**: All models stay loaded (in CPU RAM or VRAM). When a
-        different model is requested, the current VRAM model is moved to CPU
-        RAM and the requested model is moved from CPU RAM to VRAM.
+        **on-request**: Model is loaded when first needed. Before loading,
+        checks free VRAM against the model's used_vram_gb config. If not
+        enough VRAM, evicts other loaded models until there is enough, then
+        loads the model.
        
-        **ondemand** (default when no flag specified): Only one model in memory
-        at a time. When a different model is requested, the current model is
-        fully unloaded (deleted) and the new one is loaded from scratch.
+        Legacy global modes (ondemand/loadall/loadswap) are still supported
+        for backward compatibility.
        
        Args:
            requested_model: The model name/alias from the API request
@@ -1226,7 +1298,63 @@ class MultiModelManager:
        
        # Step 3: Check if already loaded in self.models
        existing_model = self.models.get(model_key)
-        
+
+        # =====================================================================
+        # PER-MODEL LOAD MODE: Check per-model config first.
+        # Per-model "load" = pre-loaded (treat as loadall for this model).
+        # Per-model "on-request" = load when needed with VRAM management.
+        # =====================================================================
+        per_model_cfg = self.config.get(model_key, {})
+        per_model_load_mode = per_model_cfg.get("load_mode")  # "load" | "on-request" | None
+
+        if per_model_load_mode == "on-request":
+            if existing_model is not None:
+                # Already loaded — just return it
+                self.current_model_key = model_key
+                self.active_in_vram = model_key
+                self.models_in_vram.add(model_key)
+                return {
+                    'model_key': model_key,
+                    'model_name': resolved_name,
+                    'model_object': existing_model,
+                    'config': per_model_cfg,
+                    'already_loaded': True,
+                }
+            # Not loaded — check VRAM and evict if needed
+            needed_gb = self._get_model_used_vram_gb(model_key)
+            if needed_gb > 0:
+                free_gb = self._get_free_vram_gb()
+                if free_gb < needed_gb:
+                    print(f"On-request: need {needed_gb:.1f} GB VRAM, have {free_gb:.1f} GB free — evicting models")
+                    self._evict_models_for_vram(needed_gb)
+            return {
+                'model_key': model_key,
+                'model_name': resolved_name,
+                'model_object': None,
+                'config': per_model_cfg,
+                'already_loaded': False,
+            }
+
+        if per_model_load_mode == "load":
+            # Pre-loaded model — just return it (or signal caller to load it)
+            if existing_model is not None:
+                self.current_model_key = model_key
+                self.active_in_vram = model_key
+                return {
+                    'model_key': model_key,
+                    'model_name': resolved_name,
+                    'model_object': existing_model,
+                    'config': per_model_cfg,
+                    'already_loaded': True,
+                }
+            return {
+                'model_key': model_key,
+                'model_name': resolved_name,
+                'model_object': None,
+                'config': per_model_cfg,
+                'already_loaded': False,
+            }
+
        # =====================================================================
        # LOADALL MODE: All models should be pre-loaded. Just return it.
        # =====================================================================
@@ -1443,6 +1571,7 @@ class MultiModelManager:
        # Reset tracking state
        self.current_model_key = None
        self.active_in_vram = None
+        self.models_in_vram = set()
        
        # Force garbage collection
        for _ in range(3):
@@ -1466,6 +1595,7 @@ class MultiModelManager:
        """Add a model (ModelManager, diffusers pipeline, sd.cpp model, etc.) for a specific key."""
        self.models[key] = manager
        self.active_in_vram = key
+        self.models_in_vram.add(key)
    
    def get_model(self, key: str) -> Optional[ModelManager]:
        """Get a model manager by key."""
@@ -1480,43 +1610,77 @@ class MultiModelManager:
        return None
    
    def list_models(self) -> List[ModelInfo]:
-        """List all available models."""
+        """List all available models (configured + runtime aliases)."""
        models = []
-        
-        # Add default model(s)
-        if self.default_model:
+        seen_ids: set = set()
+
+        def _add(model_id: str):
+            if model_id not in seen_ids:
+                seen_ids.add(model_id)
+                models.append(ModelInfo(id=model_id))
+
+        # --- Models from config (the authoritative source) ---
+        try:
+            from codai.admin.routes import config_manager
+            if config_manager is not None:
+                md = config_manager.models_data
+                for cat in ("text_models", "vision_models", "image_models",
+                            "audio_models", "tts_models", "gguf_models"):
+                    for m in md.get(cat, []):
+                        if isinstance(m, str):
+                            mid = m
+                        else:
+                            mid = m.get("alias") or m.get("path") or m.get("id") or ""
+                            # Also expose the raw path/id
+                            raw = m.get("path") or m.get("id") or ""
+                            if raw and raw != mid:
+                                _add(raw)
+                                # Short name
+                                short = raw.split("/")[-1] if "/" in raw else raw
+                                if short != raw:
+                                    _add(short)
+                        if mid:
+                            _add(mid)
+                            short = mid.split("/")[-1] if "/" in mid else mid
+                            if short != mid:
+                                _add(short)
+        except Exception:
+            pass
+
+        # --- Fallback: runtime default_model (if config_manager unavailable) ---
+        if not models and self.default_model:
            model_id = self.default_model
            if not (model_id.startswith("http://") or model_id.startswith("https://")):
-                short_name = self.default_model.split("/")[-1] if "/" in self.default_model else self.default_model
-                if short_name != self.default_model:
-                    models.append(ModelInfo(id=short_name))
-                models.append(ModelInfo(id=model_id))
-                models.append(ModelInfo(id="default"))
-        
-        # Add aliases for first/default models
+                short_name = model_id.split("/")[-1] if "/" in model_id else model_id
+                if short_name != model_id:
+                    _add(short_name)
+                _add(model_id)
+                _add("default")
+
+        # --- Runtime-registered non-text models (image, audio, tts, vision) ---
        if self.audio_models:
-            models.append(ModelInfo(id="audio"))
+            _add("audio")
            for audio_id in self.audio_models:
-                models.append(ModelInfo(id=f"audio:{audio_id}"))
-        
+                _add(f"audio:{audio_id}")
+
        if self.tts_model:
-            models.append(ModelInfo(id="tts"))
-            models.append(ModelInfo(id=f"tts:{self.tts_model}"))
-        
+            _add("tts")
+            _add(f"tts:{self.tts_model}")
+
        if self.image_models:
-            models.append(ModelInfo(id="image"))
+            _add("image")
            for image_id in self.image_models:
-                models.append(ModelInfo(id=f"image:{image_id}"))
-        
+                _add(f"image:{image_id}")
+
        if self.vision_models:
-            models.append(ModelInfo(id="vision"))
+            _add("vision")
            for vision_id in self.vision_models:
-                models.append(ModelInfo(id=f"vision:{vision_id}"))
-        
-        # Add any custom aliases
+                _add(f"vision:{vision_id}")
+
+        # --- Custom aliases ---
        for alias in self.model_aliases:
-            models.append(ModelInfo(id=alias))
-        
+            _add(alias)
+
        return models



--- a/ifconfig
+++ b/ifconfig
--- a/requirements-nvidia.txt
+++ b/requirements-nvidia.txt
@@ -31,3 +31,8 @@ llama-cpp-python>=0.2.0
 # Requires specific CUDA versions and may need manual installation
 # Install with: pip install flash-attn --no-build-isolation
 # flash-attn>=2.5.0
+
+# Optional: fast path for linear attention models (RWKV, Mamba, etc.)
+causal-conv1d
+# flash-linear-attention requires CUDA and must be installed from source:
+# pip install git+https://github.com/fla-org/flash-linear-attention --no-build-isolation