Commit 9c681ed1 authored by Your Name's avatar Your Name

Auto-retry download when cached model file is corrupted

If loading a cached GGUF model fails with corruption indicators
(invalid, corrupt, magic, header), delete the corrupted cache and
re-download the model automatically.
parent 102a464b
...@@ -1542,6 +1542,69 @@ class VulkanBackend(ModelBackend): ...@@ -1542,6 +1542,69 @@ class VulkanBackend(ModelBackend):
print(f"DEBUG: Chat template: {self.chat_template}") print(f"DEBUG: Chat template: {self.chat_template}")
except Exception as e: except Exception as e:
backend_name = "CUDA" if self.force_cuda else "Vulkan" backend_name = "CUDA" if self.force_cuda else "Vulkan"
# Check if this might be a corrupted cache file
cache_dir = get_model_cache_dir()
is_cached = model_path and model_path.startswith(cache_dir) and os.path.exists(model_path)
if is_cached:
# Try to determine if it's a corruption error
error_str = str(e).lower()
corruption_indicators = ['invalid', 'corrupt', 'magic', 'header', 'file', 'open', 'read']
if any(indicator in error_str for indicator in corruption_indicators):
print(f"WARNING: Cached model appears corrupted: {e}")
print("Deleting corrupted cache and re-downloading...")
try:
os.remove(model_path)
print(f"Deleted: {model_path}")
# Re-download by setting model_path to None and re-calling the download logic
model_path = None
# Re-download from original URL
if model_name.startswith('http://') or model_name.startswith('https://'):
import requests
from huggingface_hub import hf_hub_download
import hashlib
cache_dir = get_model_cache_dir()
url_path = model_name.split('?')[0]
filename = os.path.basename(url_path)
if not filename.endswith('.gguf'):
filename = "model.gguf"
url_hash = hashlib.sha256(model_name.encode()).hexdigest()
cached_filename = f"{url_hash}_{filename}"
model_path = os.path.join(cache_dir, cached_filename)
print(f"Re-downloading model from URL: {model_name}")
response = requests.get(model_name, stream=True)
response.raise_for_status()
total_size = int(response.headers.get('content-length', 0))
downloaded = 0
with open(model_path, 'wb') as f:
for chunk in response.iter_content(chunk_size=8192*1024):
if chunk:
f.write(chunk)
downloaded += len(chunk)
if total_size > 0:
percent = (downloaded / total_size) * 100
print(f"Downloaded: {percent:.1f}%", end='\r')
print(f"\nRe-downloaded and cached to: {model_path}")
print(f"File size: {os.path.getsize(model_path) / 1e9:.2f} GB")
# Retry loading with new model_path
llama_kwargs['model_path'] = model_path
self.model = Llama(**llama_kwargs)
self.model_name = model_name
print(f"\nModel loaded successfully with {backend_name} after re-download!")
self._finalize_chat_template_detection()
print(f"DEBUG: Chat template: {self.chat_template}")
return
except Exception as redownload_error:
print(f"Failed to re-download model: {redownload_error}")
# Fall through to regular error handling
print(f"Error loading model with {backend_name}: {e}") print(f"Error loading model with {backend_name}: {e}")
if self.force_cuda: if self.force_cuda:
print("Make sure CUDA is available:") print("Make sure CUDA is available:")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment