Commit 015c6908 authored by Your Name's avatar Your Name

Add GGUF magic bytes validation

- Check if downloaded file is valid GGUF (magic bytes = 'GGUF')
- If not valid, show clear error that URL is wrong (returns HTML instead)
- Explain that URL must be direct download link ending in .gguf
parent 611bfd8f
...@@ -4467,28 +4467,33 @@ def main(): ...@@ -4467,28 +4467,33 @@ def main():
n_ctx = 2048 n_ctx = 2048
print(f"Loading GGUF model from: {model_path}") print(f"Loading GGUF model from: {model_path}")
print(f"GGUF model file size: {os.path.getsize(model_path) / (1024*1024):.1f} MB") file_size = os.path.getsize(model_path)
print(f"File exists: {os.path.exists(model_path)}") print(f"GGUF model file size: {file_size / (1024*1024):.1f} MB")
# Try to verify it's a valid GGUF file # Verify it's a valid GGUF file (check magic bytes)
with open(model_path, 'rb') as f: with open(model_path, 'rb') as f:
magic = f.read(8) magic = f.read(8)
print(f"GGUF magic bytes: {magic.hex()}") print(f"File magic bytes: {magic}")
if magic != b'GGUF':
try: print(f"ERROR: File is NOT a valid GGUF! Expected 'GGUF', got: {magic}")
llama_model = Llama( print(f"This means the download returned an HTML error page instead of the model.")
model_path=model_path, print(f"The URL must be a DIRECT download link (ends with .gguf, not a model page)")
n_gpu_layers=n_gpu_layers, print(f"Example: https://huggingface.co/owner/repo/resolve/main/model.gguf")
n_ctx=n_ctx, print(f"Image model will load on first request")
verbose=True, # Enable verbose to see errors else:
) # Valid GGUF, try to load
multi_model_manager.add_model(model_key, llama_model) try:
print(f"GGUF image model loaded successfully: {original_model_name}") llama_model = Llama(
except Exception as llama_error: model_path=model_path,
print(f"llama.cpp load error: {llama_error}") n_gpu_layers=n_gpu_layers,
import traceback n_ctx=n_ctx,
traceback.print_exc() verbose=True,
print(f"Will try loading image model on first request instead") )
multi_model_manager.add_model(model_key, llama_model)
print(f"GGUF image model loaded successfully: {original_model_name}")
except Exception as llama_error:
print(f"llama.cpp load error: {llama_error}")
print(f"Will try loading image model on first request instead")
else: else:
print(f"Could not load GGUF image model: no valid model path") print(f"Could not load GGUF image model: no valid model path")
...@@ -4863,22 +4868,30 @@ def main(): ...@@ -4863,22 +4868,30 @@ def main():
n_ctx = 2048 n_ctx = 2048
print(f"Loading GGUF model from: {model_path}") print(f"Loading GGUF model from: {model_path}")
print(f"GGUF model file size: {os.path.getsize(model_path) / (1024*1024):.1f} MB") file_size = os.path.getsize(model_path)
print(f"GGUF model file size: {file_size / (1024*1024):.1f} MB")
try: # Verify it's a valid GGUF file (check magic bytes)
llama_model = Llama( with open(model_path, 'rb') as f:
model_path=model_path, magic = f.read(8)
n_gpu_layers=n_gpu_layers, print(f"File magic bytes: {magic}")
n_ctx=n_ctx, if magic != b'GGUF':
verbose=True, print(f"ERROR: File is NOT a valid GGUF! Expected 'GGUF', got: {magic}")
) print(f"The URL must be a DIRECT download link (ends with .gguf)")
multi_model_manager.add_model(model_key, llama_model) print(f"Image model will load on first request")
print(f"GGUF image model loaded successfully: {original_model_name}") else:
except Exception as llama_error: try:
print(f"llama.cpp load error: {llama_error}") llama_model = Llama(
import traceback model_path=model_path,
traceback.print_exc() n_gpu_layers=n_gpu_layers,
print(f"Will try loading image model on first request instead") n_ctx=n_ctx,
verbose=True,
)
multi_model_manager.add_model(model_key, llama_model)
print(f"GGUF image model loaded successfully: {original_model_name}")
except Exception as llama_error:
print(f"llama.cpp load error: {llama_error}")
print(f"Will try loading image model on first request instead")
else: else:
print(f"Could not load GGUF image model: no valid model path") print(f"Could not load GGUF image model: no valid model path")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment