Commit ad4ec2a5 authored by Your Name's avatar Your Name

Show actual backend being used when CUDA is forced for GGUF models

- Show 'cuda (via llama-cpp-python)' when force_cuda is enabled
- Show original backend in GGUF detection message
parent c3a5417f
...@@ -1929,7 +1929,7 @@ class ModelManager: ...@@ -1929,7 +1929,7 @@ class ModelManager:
original_backend = None original_backend = None
if is_gguf and backend_type in ("nvidia", "cuda"): if is_gguf and backend_type in ("nvidia", "cuda"):
original_backend = backend_type original_backend = backend_type
print(f"GGUF model detected, using llama-cpp-python ({backend_type} backend)") print(f"GGUF model detected, using llama-cpp-python (original backend: {original_backend})")
backend_type = "vulkan" # Use llama-cpp-python for GGUF backend_type = "vulkan" # Use llama-cpp-python for GGUF
self.backend_type = backend_type self.backend_type = backend_type
...@@ -5923,7 +5923,11 @@ def main(): ...@@ -5923,7 +5923,11 @@ def main():
print(f"\nStarting server on http://{args.host}:{args.port}") print(f"\nStarting server on http://{args.host}:{args.port}")
print(f"API documentation available at http://{args.host}:{args.port}/docs") print(f"API documentation available at http://{args.host}:{args.port}/docs")
if model_manager.backend is not None: if model_manager.backend is not None:
print(f"Using backend: {model_manager.backend_type}") # Show actual backend being used
actual_backend = model_manager.backend_type
if hasattr(model_manager.backend, 'force_cuda') and model_manager.backend.force_cuda:
actual_backend = "cuda (via llama-cpp-python)"
print(f"Using backend: {actual_backend}")
# Print available models # Print available models
models = multi_model_manager.list_models() models = multi_model_manager.list_models()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment