Commit dd4dfff4 authored by Your Name's avatar Your Name

Add python-multipart to requirements, GGUF support for CUDA backend

- Add python-multipart to requirements.txt, requirements-nvidia.txt, requirements-vulkan.txt
- Add llama-cpp-python to requirements-nvidia.txt for GGUF support
- When using CUDA/nvidia backend with GGUF file, automatically use llama-cpp-python
parent c152ee28
......@@ -1808,11 +1808,14 @@ class ModelManager:
Args:
model_name: Model name or path
backend_type: 'nvidia', 'vulkan', or 'auto' to detect
backend_type: 'nvidia', 'vulkan', 'cuda', or 'auto' to detect
**kwargs: Additional arguments for the specific backend
"""
available = detect_available_backends()
# Check if model is a GGUF file
is_gguf = model_name.endswith('.gguf') or 'gguf' in model_name.lower()
# Determine backend
if backend_type == "auto":
if available.get('nvidia'):
......@@ -1826,6 +1829,11 @@ class ModelManager:
print("For Vulkan, install llama-cpp-python with Vulkan support.")
raise RuntimeError("No suitable backend found")
# If GGUF file and backend is nvidia/cuda, use llama-cpp-python (vulkan backend)
if is_gguf and backend_type in ("nvidia", "cuda"):
print(f"GGUF model detected, using llama-cpp-python ({backend_type} backend)")
backend_type = "vulkan" # Use llama-cpp-python for GGUF
self.backend_type = backend_type
# Create appropriate backend
......
......@@ -2,6 +2,8 @@
fastapi>=0.104.0
uvicorn[standard]>=0.24.0
pydantic>=2.5.0
python-multipart>=0.0.6 # for multipart form data parsing
requests>=2.31.0 # for HTTP requests
# ML dependencies (transformers-based for NVIDIA/CUDA)
transformers>=4.35.0
......@@ -22,6 +24,9 @@ tiktoken>=0.5.0
tokenizers>=0.15.0
protobuf>=3.20.0
# llama-cpp-python with CUDA support (for GGUF files on CUDA backend)
llama-cpp-python>=0.2.0
# Optional: Flash Attention 2 for faster inference on supported NVIDIA GPUs
# Requires specific CUDA versions and may need manual installation
# Install with: pip install flash-attn --no-build-isolation
......
......@@ -2,6 +2,8 @@
fastapi>=0.104.0
uvicorn[standard]>=0.24.0
pydantic>=2.5.0
python-multipart>=0.0.6 # for multipart form data parsing
requests>=2.31.0 # for HTTP requests
# llama-cpp-python is installed by build.sh with Vulkan support
# CMAKE_ARGS="-DGGML_VULKAN=ON" pip install llama-cpp-python --no-cache-dir
......
......@@ -5,6 +5,7 @@ pydantic>=2.5.0
# CLI dependencies
requests>=2.31.0 # for the coder CLI tool
python-multipart>=0.0.6 # for multipart form data parsing
# PyTorch - Uncomment the appropriate version for your system.
# IMPORTANT: Use quotes around version specifiers to prevent shell interpretation!
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment