# FastAPI and server dependencies
fastapi>=0.104.0
uvicorn[standard]>=0.24.0
pydantic>=2.5.0
python-multipart>=0.0.6  # for multipart form data parsing
requests>=2.31.0  # for HTTP requests

# ML dependencies (transformers-based for NVIDIA/CUDA)
transformers>=4.35.0
accelerate>=0.24.0

# System resource detection
psutil>=5.9.0
# procname>=0.3.0  # optional - uncomment to set process name (requires libproc2-dev)

# Optional: Audio transcription dependencies
faster-whisper>=0.10.0  # For NVIDIA/CUDA whisper transcription
whispercpp>=1.0.0  # Alternative whisper library (works without PyTorch)

# Optional: for better performance with NVIDIA GPUs
bitsandbytes>=0.41.0
sentencepiece>=0.1.99
tiktoken>=0.5.0
tokenizers>=0.15.0
protobuf>=3.20.0

# llama-cpp-python with CUDA support (for GGUF files on CUDA backend)
llama-cpp-python>=0.2.0

# Optional: Flash Attention 2 for faster inference on supported NVIDIA GPUs
# Requires specific CUDA versions and may need manual installation
# Install with: pip install flash-attn --no-build-isolation
# flash-attn>=2.5.0
