Commit dd4dfff4 authored by Your Name's avatar Your Name

Add python-multipart to requirements, GGUF support for CUDA backend

- Add python-multipart to requirements.txt, requirements-nvidia.txt, requirements-vulkan.txt
- Add llama-cpp-python to requirements-nvidia.txt for GGUF support
- When using CUDA/nvidia backend with GGUF file, automatically use llama-cpp-python
parent c152ee28
...@@ -1808,11 +1808,14 @@ class ModelManager: ...@@ -1808,11 +1808,14 @@ class ModelManager:
Args: Args:
model_name: Model name or path model_name: Model name or path
backend_type: 'nvidia', 'vulkan', or 'auto' to detect backend_type: 'nvidia', 'vulkan', 'cuda', or 'auto' to detect
**kwargs: Additional arguments for the specific backend **kwargs: Additional arguments for the specific backend
""" """
available = detect_available_backends() available = detect_available_backends()
# Check if model is a GGUF file
is_gguf = model_name.endswith('.gguf') or 'gguf' in model_name.lower()
# Determine backend # Determine backend
if backend_type == "auto": if backend_type == "auto":
if available.get('nvidia'): if available.get('nvidia'):
...@@ -1826,6 +1829,11 @@ class ModelManager: ...@@ -1826,6 +1829,11 @@ class ModelManager:
print("For Vulkan, install llama-cpp-python with Vulkan support.") print("For Vulkan, install llama-cpp-python with Vulkan support.")
raise RuntimeError("No suitable backend found") raise RuntimeError("No suitable backend found")
# If GGUF file and backend is nvidia/cuda, use llama-cpp-python (vulkan backend)
if is_gguf and backend_type in ("nvidia", "cuda"):
print(f"GGUF model detected, using llama-cpp-python ({backend_type} backend)")
backend_type = "vulkan" # Use llama-cpp-python for GGUF
self.backend_type = backend_type self.backend_type = backend_type
# Create appropriate backend # Create appropriate backend
......
...@@ -2,6 +2,8 @@ ...@@ -2,6 +2,8 @@
fastapi>=0.104.0 fastapi>=0.104.0
uvicorn[standard]>=0.24.0 uvicorn[standard]>=0.24.0
pydantic>=2.5.0 pydantic>=2.5.0
python-multipart>=0.0.6 # for multipart form data parsing
requests>=2.31.0 # for HTTP requests
# ML dependencies (transformers-based for NVIDIA/CUDA) # ML dependencies (transformers-based for NVIDIA/CUDA)
transformers>=4.35.0 transformers>=4.35.0
...@@ -22,6 +24,9 @@ tiktoken>=0.5.0 ...@@ -22,6 +24,9 @@ tiktoken>=0.5.0
tokenizers>=0.15.0 tokenizers>=0.15.0
protobuf>=3.20.0 protobuf>=3.20.0
# llama-cpp-python with CUDA support (for GGUF files on CUDA backend)
llama-cpp-python>=0.2.0
# Optional: Flash Attention 2 for faster inference on supported NVIDIA GPUs # Optional: Flash Attention 2 for faster inference on supported NVIDIA GPUs
# Requires specific CUDA versions and may need manual installation # Requires specific CUDA versions and may need manual installation
# Install with: pip install flash-attn --no-build-isolation # Install with: pip install flash-attn --no-build-isolation
......
...@@ -2,6 +2,8 @@ ...@@ -2,6 +2,8 @@
fastapi>=0.104.0 fastapi>=0.104.0
uvicorn[standard]>=0.24.0 uvicorn[standard]>=0.24.0
pydantic>=2.5.0 pydantic>=2.5.0
python-multipart>=0.0.6 # for multipart form data parsing
requests>=2.31.0 # for HTTP requests
# llama-cpp-python is installed by build.sh with Vulkan support # llama-cpp-python is installed by build.sh with Vulkan support
# CMAKE_ARGS="-DGGML_VULKAN=ON" pip install llama-cpp-python --no-cache-dir # CMAKE_ARGS="-DGGML_VULKAN=ON" pip install llama-cpp-python --no-cache-dir
......
...@@ -5,6 +5,7 @@ pydantic>=2.5.0 ...@@ -5,6 +5,7 @@ pydantic>=2.5.0
# CLI dependencies # CLI dependencies
requests>=2.31.0 # for the coder CLI tool requests>=2.31.0 # for the coder CLI tool
python-multipart>=0.0.6 # for multipart form data parsing
# PyTorch - Uncomment the appropriate version for your system. # PyTorch - Uncomment the appropriate version for your system.
# IMPORTANT: Use quotes around version specifiers to prevent shell interpretation! # IMPORTANT: Use quotes around version specifiers to prevent shell interpretation!
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment