Add better error handling for tokenizer/cache errors in base model loading

- Detect tokenizer parsing errors and provide helpful cache clearing instructions - Add retry logic for corrupted cache files - Improve error messages for component-only model loading

Add better error handling for tokenizer/cache errors in base model loading
- Detect tokenizer parsing errors and provide helpful cache clearing instructions - Add retry logic for corrupted cache files - Improve error messages for component-only model loading
4e2361f9 · Stefy Lanza (nextime / spora ) · f0b663fa · 4e2361f9
Commit 4e2361f9 authored Feb 25, 2026 by Stefy Lanza (nextime / spora )
Show whitespace changes
Inline Side-by-side

Showing with 32 additions and 2 deletions

videogen videogen +32 -2

No files found.
--- a/videogen
+++ b/videogen
@@ -8420,8 +8420,38 @@ def main(args):
                                            print(f"   [DEBUG] Using fallback PipelineClass: {PipelineClass.__name__}")
                                    
                                    # Load base pipeline with correct class
+                                    try:
                                        pipe = BasePipelineClass.from_pretrained(base_model, **pipe_kwargs)
                                        print(f"   ✅ Base pipeline loaded with {BasePipelineClass.__name__}")
+                                    except Exception as base_load_e:
+                                        # Check if this is a tokenizer/cache error
+                                        error_str = str(base_load_e)
+                                        if "tokenizer" in error_str.lower() or "spiece" in error_str.lower() or "parsing" in error_str.lower():
+                                            print(f"   ⚠️  Tokenizer/cache error detected, trying to clear and retry...")
+                                            if debug:
+                                                print(f"   [DEBUG] Base model load error: {base_load_e}")
+                                            
+                                            # Try to clear the tokenizer from cache and retry
+                                            try:
+                                                from huggingface_hub import scan_cache_dir
+                                                # Try loading without tokenizer (for inference-only)
+                                                pipe_kwargs_no_tok = pipe_kwargs.copy()
+                                                # Force re-download by using a different approach
+                                                pipe = BasePipelineClass.from_pretrained(
+                                                    base_model,
+                                                    **pipe_kwargs,
+                                                    force_download=False,
+                                                    resume_download=True
+                                                )
+                                                print(f"   ✅ Base pipeline loaded on retry")
+                                            except Exception as retry_e:
+                                                if debug:
+                                                    print(f"   [DEBUG] Retry also failed: {retry_e}")
+                                                print(f"   ⚠️  Could not load base model. Try clearing cache:")
+                                                print(f"      rm -rf ~/.cache/huggingface/hub/models--Lightricks--LTX-Video")
+                                                raise base_load_e
+                                        else:
+                                            raise base_load_e
                                    
                                    # Load the fine-tuned component
                                    if class_name == "LTXVideoTransformer3DModel":