Commit 88479315 authored by Your Name's avatar Your Name

Remove vulkan from available backends when --backend nvidia is used with GGUF models

When user explicitly passes --backend nvidia with a GGUF model,
vulkan is now removed from the available backends list since
llama-cpp-python will use CUDA instead of Vulkan.
parent 273ab8c8
......@@ -4809,14 +4809,15 @@ def main():
# Detect available backends
available = detect_available_backends()
# Check if any model is GGUF and backend is nvidia/cuda - in that case, vulkan uses CUDA
# If user explicitly requests nvidia/cuda backend with a GGUF model,
# remove vulkan from available since we'll use CUDA instead
if model_names:
first_model = model_names[0]
is_gguf_model = first_model.endswith('.gguf') or 'gguf' in first_model.lower()
if is_gguf_model and args.backend in ('nvidia', 'cuda'):
# When using nvidia/cuda backend with GGUF, vulkan actually uses CUDA
if 'vulkan' in available and available['vulkan']:
print("\nNote: GGUF model with nvidia backend - vulkan backend will use CUDA")
# When using nvidia/cuda backend with GGUF, vulkan uses CUDA, so remove it
if 'vulkan' in available:
del available['vulkan']
print("\nAvailable backends:")
for name, available_flag in available.items():
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment