Improve model discovery: skip unfound models, add deep search for variants

- Skip models not found on HuggingFace instead of adding with defaults - Add deep search for model variants from known organizations - Search organizations: Alpha-VLLM, stepvideo, hpcai-tech, tencent, rhymes-ai, THUDM, genmo, Wan-AI, stabilityai, black-forest-labs - Remove non-existent models from known_large_models list - Better error handling for model validation

Improve model discovery: skip unfound models, add deep search for variants
- Skip models not found on HuggingFace instead of adding with defaults - Add deep search for model variants from known organizations - Search organizations: Alpha-VLLM, stepvideo, hpcai-tech, tencent, rhymes-ai, THUDM, genmo, Wan-AI, stabilityai, black-forest-labs - Remove non-existent models from known_large_models list - Better error handling for model validation
2fe62c6f · Stefy Lanza (nextime / spora ) · 4164da7e · 2fe62c6f
Commit 2fe62c6f authored Feb 24, 2026 by Stefy Lanza (nextime / spora )
Hide whitespace changes
Inline Side-by-side

Showing with 386 additions and 85 deletions

videogen videogen +386 -85

No files found.
--- a/videogen
+++ b/videogen
@@ -937,39 +937,33 @@ def update_all_models(hf_token=None):
        ("swinir", 20),
    ]
-    # Known large/huge models to explicitly include (without requiring validation)
+    # Known large/huge models to check and include if found on HuggingFace
-    # These are added regardless of HuggingFace search results
+    # These are validated before adding - models not found are skipped
    known_large_models = [
        # ═══════════════════════════════════════════════════════════════
        # 100GB+ models - Ultra High VRAM
        # ═══════════════════════════════════════════════════════════════
        ("Alpha-VLLM/Lumina-Next-SFT", "LuminaVideoPipeline", "~100 GB", "Lumina Next SFT - Ultra high quality T2I/T2V"),
-        ("Alpha-VLLM/Lumina-T2X", "LuminaVideoPipeline", "~100 GB", "Lumina T2X - Text to any"),
        # ═══════════════════════════════════════════════════════════════
        # 90-140GB models - Extreme VRAM
        # ═══════════════════════════════════════════════════════════════
-        ("stepvideo/Step-Video-T2V", "StepVideoPipeline", "~90-140 GB", "Step Video T2V - Extreme quality"),
        # ═══════════════════════════════════════════════════════════════
        # 45-65GB models - Very High VRAM
        # ═══════════════════════════════════════════════════════════════
        ("hpcai-tech/Open-Sora", "OpenSoraPipeline", "~45-65 GB", "Open Sora - Open source Sora alternative"),
-        ("hpcai-tech/OpenSora-STDiT-v2", "OpenSoraPipeline", "~45-65 GB", "OpenSora STDiT v2"),
        ("hpcai-tech/Open-Sora-1.2", "OpenSoraPipeline", "~45-65 GB", "OpenSora 1.2"),
-        ("hpcai-tech/Open-Sora-plan-v1.2.0", "OpenSoraPipeline", "~45-65 GB", "OpenSora Plan v1.2.0"),
        # ═══════════════════════════════════════════════════════════════
        # 40-55GB models - High VRAM
        # ═══════════════════════════════════════════════════════════════
        ("tencent/HunyuanVideo", "HunyuanDiTPipeline", "~40-55 GB", "Tencent HunyuanVideo"),
-        ("Tencent-Hunyuan/HunyuanVideo", "HunyuanDiTPipeline", "~40-55 GB", "Tencent HunyuanVideo (alt)"),
        # ═══════════════════════════════════════════════════════════════
        # 35-45GB models
        # ═══════════════════════════════════════════════════════════════
        ("rhymes-ai/Allegro", "AllegroPipeline", "~35-45 GB", "Allegro - High quality video gen"),
-        ("rhymes-ai/Allegro-Medium", "AllegroPipeline", "~35-45 GB", "Allegro Medium"),
        # ═══════════════════════════════════════════════════════════════
        # 20-30GB models
@@ -985,18 +979,46 @@ def update_all_models(hf_token=None):
        ("genmo/mochi", "MochiPipeline", "~18-22 GB", "Mochi - Latest version"),
    ]
+    # Additional model variants to search for (organization/model patterns)
+    additional_search_patterns = [
+        # Lumina variants
+        "Alpha-VLLM/Lumina",
+        "Alpha-VLLM/lumina",
+        "alpha-vllm/lumina",
+        # Step Video variants
+        "stepvideo/step",
+        "stepvideo/Step",
+        # OpenSora variants
+        "hpcai-tech/OpenSora",
+        "hpcai-tech/open-sora",
+        # Hunyuan variants
+        "tencent/Hunyuan",
+        "Tencent-Hunyuan/Hunyuan",
+        # Allegro variants
+        "rhymes-ai/Allegro",
+        # CogVideo variants
+        "THUDM/CogVideo",
+        # Mochi variants
+        "genmo/mochi",
+    ]
    all_models = {}
    seen_ids = set()
-    # First, add known large models explicitly (without requiring validation)
+    # First, validate and add known large models (only if found on HuggingFace)
-    print("\n📦 Adding known large/high-VRAM models...")
+    print("\n📦 Validating known large/high-VRAM models...")
    print("   (These models may require significant VRAM - 40GB to 140GB)")
    print()
    for model_id, default_pipeline_class, vram_est, description in known_large_models:
        if model_id in seen_ids:
            continue
-        seen_ids.add(model_id)
        # Generate name
        name = model_id.split("/")[-1].lower()
@@ -1010,31 +1032,27 @@ def update_all_models(hf_token=None):
            name = f"{base_name}_{counter}"
            counter += 1
-        # Try to validate and detect actual pipeline class
+        # Validate model exists on HuggingFace
        model_info = validate_hf_model(model_id, hf_token=hf_token)
-        detected_pipeline = None
-        if model_info:
+        if not model_info:
-            tags = model_info.get("tags", [])
+            # Model not found - skip it (don't add with defaults)
-            downloads = model_info.get("downloads", 0)
+            print(f"  ⏭️  Skipping {model_id} - not found on HuggingFace")
-            likes = model_info.get("likes", 0)
+            continue
-            is_i2v = any(t in tags for t in ["image-to-video", "i2v"]) or "i2v" in model_id.lower()
+        seen_ids.add(model_id)
-            # Try to detect actual pipeline class from model_index.json
+        tags = model_info.get("tags", [])
-            detected_pipeline = detect_pipeline_class(model_info)
+        downloads = model_info.get("downloads", 0)
-            if detected_pipeline:
+        likes = model_info.get("likes", 0)
-                pipeline_class = detected_pipeline
+        is_i2v = any(t in tags for t in ["image-to-video", "i2v"]) or "i2v" in model_id.lower()
-                print(f"  🔍 Detected pipeline: {pipeline_class} for {model_id}")
-            else:
+        # Try to detect actual pipeline class from model_index.json
-                pipeline_class = default_pipeline_class
+        detected_pipeline = detect_pipeline_class(model_info)
+        if detected_pipeline:
+            pipeline_class = detected_pipeline
+            print(f"  🔍 Detected pipeline: {pipeline_class} for {model_id}")
        else:
-            # Add anyway with defaults
-            tags = ["video", "text-to-video", "large-model"]
-            downloads = 0
-            likes = 0
-            is_i2v = "i2v" in model_id.lower()
            pipeline_class = default_pipeline_class
-            print(f"  ⚠️  Could not validate {model_id} - adding with defaults")
        # Build entry
        model_entry = {
@@ -1053,6 +1071,135 @@ def update_all_models(hf_token=None):
        all_models[name] = model_entry
        print(f"  ✅ {name}: {model_id} ({vram_est}) [{pipeline_class}]")
+    # Deep search for additional model variants from known organizations
+    print("\n🔍 Deep searching for model variants from known organizations...")
+    # Search for models from specific organizations
+    organization_searches = [
+        # Lumina models
+        ("Alpha-VLLM", 30),
+        ("alpha-vllm", 30),
+        # Step Video models
+        ("stepvideo", 20),
+        # OpenSora models
+        ("hpcai-tech", 30),
+        # Hunyuan models
+        ("tencent", 30),
+        ("Tencent-Hunyuan", 20),
+        # Allegro models
+        ("rhymes-ai", 20),
+        # CogVideo models
+        ("THUDM", 30),
+        # Mochi models
+        ("genmo", 20),
+        # Wan models
+        ("Wan-AI", 30),
+        ("wan", 20),
+        # Stability AI models
+        ("stabilityai", 40),
+        # Flux models
+        ("black-forest-labs", 20),
+        ("flux", 20),
+    ]
+    for org, limit in organization_searches:
+        print(f"\n🔍 Searching organization: '{org}' (limit: {limit})")
+        results = search_hf_models(org, limit=limit, hf_token=hf_token)
+        for m in results:
+            model_id = m["id"]
+            # Skip duplicates
+            if model_id in seen_ids:
+                continue
+            # Filter: include video models, NSFW models, OR models with known video pipeline classes
+            is_video_model = m["is_i2v"] or m["is_video"]
+            is_nsfw_model = m["is_nsfw"]
+            is_known_pipeline = m["pipeline_class"] in ["WanPipeline", "MochiPipeline", "CogVideoXPipeline",
+                                                        "StableVideoDiffusionPipeline", "I2VGenXLPipeline",
+                                                        "LTXVideoPipeline", "AnimateDiffPipeline",
+                                                        "TextToVideoSDPipeline", "TextToVideoZeroPipeline",
+                                                        "HotshotXLPipeline", "AllegroPipeline",
+                                                        "HunyuanDiTPipeline", "OpenSoraPipeline",
+                                                        "LuminaVideoPipeline", "StepVideoPipeline",
+                                                        "DiffusionPipeline", "FluxPipeline",
+                                                        "StableDiffusionXLPipeline", "StableDiffusion3Pipeline"]
+            if not (is_video_model or is_nsfw_model or is_known_pipeline):
+                continue
+            seen_ids.add(model_id)
+            # Generate model name
+            name = model_id.split("/")[-1].lower()
+            name = name.replace("-", "_").replace(".", "_")
+            name = re.sub(r'[^a-z0-9_]', '', name)
+            # Ensure unique name
+            base_name = name
+            counter = 1
+            while name in all_models:
+                name = f"{base_name}_{counter}"
+                counter += 1
+            # Use pipeline class from search results (already detected via detect_pipeline_class)
+            pipeline_class = m["pipeline_class"]
+            if pipeline_class == "Unknown":
+                # Fallback based on model type
+                if m["is_i2v"]:
+                    pipeline_class = "StableVideoDiffusionPipeline"
+                elif m["is_video"]:
+                    pipeline_class = "WanPipeline"
+                elif m["is_image"]:
+                    pipeline_class = "StableDiffusionXLPipeline"
+                else:
+                    pipeline_class = "DiffusionPipeline"
+            # Determine VRAM estimate from pipeline class
+            vram_est = PIPELINE_CLASS_MAP.get(pipeline_class, {}).get("default_vram", "~10-20 GB")
+            # Detect if LoRA
+            is_lora = "lora" in model_id.lower() or any(t in m.get("tags", []) for t in ["lora", "LoRA"])
+            base_model = None
+            if is_lora:
+                if "wan" in model_id.lower():
+                    base_model = "Wan-AI/Wan2.1-I2V-14B-Diffusers" if m["is_i2v"] else "Wan-AI/Wan2.1-T2V-14B-Diffusers"
+                elif "svd" in model_id.lower() or "stable-video" in model_id.lower():
+                    base_model = "stabilityai/stable-video-diffusion-img2vid-xt-1-1"
+            # Build model entry
+            model_entry = {
+                "id": model_id,
+                "vram": vram_est,
+                "class": pipeline_class,
+                "desc": f"{'[LoRA] ' if is_lora else ''}{model_id}",
+                "supports_i2v": m["is_i2v"],
+                "tags": m.get("tags", [])[:10],
+                "downloads": m.get("downloads", 0),
+                "likes": m.get("likes", 0),
+                "is_lora": is_lora,
+                "auto_added": True,
+                "pipeline_tag": m.get("pipeline_tag", ""),
+                "library_name": m.get("library_name", ""),
+            }
+            if base_model:
+                model_entry["base_model"] = base_model
+            all_models[name] = model_entry
+            print(f"  ✅ {name}: {model_id} [{pipeline_class}]")
    for query, limit in search_queries:
        print(f"\n🔍 Searching: '{query}' (limit: {limit})")
        results = search_hf_models(query, limit=limit, hf_token=hf_token)
@@ -2064,66 +2211,203 @@ def select_best_model(gen_type, models, vram_gb=24, prefer_quality=True, return_
        return_all: If True, return all candidates sorted by score
    Returns: (model_name, model_info, reason) or [(model_name, model_info, reason), ...] if return_all=True
+    LoRA Support:
+        LoRA adapters are now considered alongside base models. When a LoRA is selected,
+        the returned info includes 'is_lora': True and 'base_model' for the main pipeline
+        to load the base model first, then apply the LoRA adapter.
    """
    candidates = []
    is_nsfw = gen_type.get("is_nsfw", False)
    gen_type_str = gen_type.get("type", "t2v")
    for name, info in models.items():
-        # Skip LoRA adapters (need base model)
+        is_lora = info.get("is_lora", False)
-        if info.get("is_lora"):
+        base_model_id = info.get("base_model")
-            continue
-        # Check VRAM compatibility
-        vram_est = parse_vram_estimate(info.get("vram", "~10 GB"))
-        if vram_est > vram_gb * 1.1:  # Allow 10% margin
-            continue
-        # Check model capabilities
+        # For LoRA adapters, check if we have a base model
-        caps = detect_model_type(info)
+        if is_lora:
+            if not base_model_id:
+                # Try to infer base model from LoRA name
+                lora_id = info.get("id", "").lower()
+                if "wan" in lora_id:
+                    if info.get("supports_i2v"):
+                        base_model_id = "Wan-AI/Wan2.1-I2V-14B-Diffusers"
+                    else:
+                        base_model_id = "Wan-AI/Wan2.1-T2V-14B-Diffusers"
+                elif "svd" in lora_id or "stable-video" in lora_id:
+                    base_model_id = "stabilityai/stable-video-diffusion-img2vid-xt-1-1"
+                elif "sdxl" in lora_id:
+                    base_model_id = "stabilityai/stable-diffusion-xl-base-1.0"
+                elif "flux" in lora_id:
+                    base_model_id = "black-forest-labs/FLUX.1-dev"
+                else:
+                    # Skip LoRAs without a determinable base model
+                    continue
+            # Find the base model in our models dict to get its VRAM requirements
+            base_model_info = None
+            base_model_name = None
+            for m_name, m_info in models.items():
+                if m_info.get("id") == base_model_id:
+                    base_model_info = m_info
+                    base_model_name = m_name
+                    break
+            # If base model not in our database, create a minimal info dict
+            if not base_model_info:
+                # Estimate VRAM based on base model type
+                if "wan" in base_model_id.lower():
+                    base_vram_est = 24.0
+                elif "svd" in base_model_id.lower():
+                    base_vram_est = 16.0
+                elif "flux" in base_model_id.lower():
+                    base_vram_est = 24.0
+                elif "sdxl" in base_model_id.lower():
+                    base_vram_est = 12.0
+                else:
+                    base_vram_est = 16.0
+                base_model_info = {
+                    "id": base_model_id,
+                    "vram": f"~{base_vram_est:.0f} GB",
+                    "class": "WanPipeline" if "wan" in base_model_id.lower() else
+                              "StableVideoDiffusionPipeline" if "svd" in base_model_id.lower() else
+                              "FluxPipeline" if "flux" in base_model_id.lower() else
+                              "StableDiffusionXLPipeline",
+                    "supports_i2v": "i2v" in base_model_id.lower(),
+                }
+            # Check VRAM compatibility using base model requirements
+            # LoRAs add a small overhead (~1-2GB)
+            vram_est = parse_vram_estimate(base_model_info.get("vram", "~10 GB")) + 2
+            if vram_est > vram_gb * 1.1:  # Allow 10% margin
+                continue
+            # Get capabilities from base model
+            base_caps = detect_model_type(base_model_info)
+            # Score the LoRA
+            score = 0
+            reasons = []
+            # Type matching (based on base model)
+            if gen_type_str == "t2v" and base_caps["t2v"]:
+                score += 100
+                reasons.append("T2V capable (via base)")
+            elif gen_type_str == "i2v" and base_caps["i2v"]:
+                score += 100
+                reasons.append("I2V capable (via base)")
+            elif gen_type_str == "t2i" and base_caps["t2i"]:
+                score += 100
+                reasons.append("T2I capable (via base)")
+            elif gen_type_str == "i2i" and base_caps["i2i"]:
+                score += 100
+                reasons.append("I2I capable (via base)")
+            # LoRA-specific bonuses
+            lora_caps = detect_model_type(info)
+            # NSFW matching - LoRAs often specialize in NSFW
+            if is_nsfw:
+                if lora_caps["nsfw"]:
+                    score += 70  # Higher bonus for NSFW-specialized LoRAs
+                    reasons.append("NSFW-specialized LoRA")
+                elif base_caps["nsfw"]:
+                    score += 40
+                    reasons.append("NSFW-friendly base")
+                else:
+                    score -= 20
+                    reasons.append("May filter NSFW")
+            else:
+                # Non-NSFW content: slight penalty for NSFW LoRAs
+                if lora_caps["nsfw"]:
+                    score -= 10
+                    reasons.append("NSFW LoRA (may affect non-NSFW output)")
+            # LoRA quality bonus (LoRAs are often fine-tuned for specific styles)
+            if "realism" in name.lower() or "realistic" in name.lower():
+                score += 15
+                reasons.append("Realism-focused LoRA")
+            elif "style" in name.lower():
+                score += 10
+                reasons.append("Style LoRA")
+            # Quality vs speed
+            if prefer_quality:
+                # LoRAs often improve quality without much VRAM cost
+                score += 25  # Bonus for LoRA quality boost
+                score += min(vram_est, 30)
+            else:
+                score += max(0, 20 - vram_est)
+            # Popular/reliable LoRAs get bonus
+            downloads = info.get("downloads", 0)
+            if downloads > 1000:
+                score += 15
+                reasons.append(f"Popular LoRA ({downloads:,} downloads)")
+            # Store base model info for loading
+            lora_info = info.copy()
+            lora_info["_base_model_info"] = base_model_info
+            lora_info["_base_model_name"] = base_model_name
+            lora_info["_inferred_base_model"] = base_model_id
+            if score > 0:
+                candidates.append((name, lora_info, score, reasons))
-        # Score the model
-        score = 0
-        reasons = []
-        # Type matching
-        if gen_type_str == "t2v" and caps["t2v"]:
-            score += 100
-            reasons.append("T2V capable")
-        elif gen_type_str == "i2v" and caps["i2v"]:
-            score += 100
-            reasons.append("I2V capable")
-        elif gen_type_str == "t2i" and caps["t2i"]:
-            score += 100
-            reasons.append("T2I capable")
-        elif gen_type_str == "i2i" and caps["i2i"]:
-            score += 100
-            reasons.append("I2I capable")
-        # NSFW matching
-        if is_nsfw and caps["nsfw"]:
-            score += 50
-            reasons.append("NSFW-friendly")
-        elif is_nsfw and not caps["nsfw"]:
-            score -= 30
-            reasons.append("May filter NSFW")
-        # Quality vs speed
-        if prefer_quality:
-            # Prefer larger models for quality
-            score += min(vram_est, 30)
        else:
-            # Prefer smaller models for speed
+            # Non-LoRA model handling (original logic)
-            score += max(0, 20 - vram_est)
+            # Check VRAM compatibility
+            vram_est = parse_vram_estimate(info.get("vram", "~10 GB"))
-        # Popular/reliable models get bonus
+            if vram_est > vram_gb * 1.1:  # Allow 10% margin
-        downloads = info.get("downloads", 0)
+                continue
-        if downloads > 10000:
-            score += 20
+            # Check model capabilities
-            reasons.append(f"Popular ({downloads:,} downloads)")
+            caps = detect_model_type(info)
-        if score > 0:
+            # Score the model
-            candidates.append((name, info, score, reasons))
+            score = 0
+            reasons = []
+            # Type matching
+            if gen_type_str == "t2v" and caps["t2v"]:
+                score += 100
+                reasons.append("T2V capable")
+            elif gen_type_str == "i2v" and caps["i2v"]:
+                score += 100
+                reasons.append("I2V capable")
+            elif gen_type_str == "t2i" and caps["t2i"]:
+                score += 100
+                reasons.append("T2I capable")
+            elif gen_type_str == "i2i" and caps["i2i"]:
+                score += 100
+                reasons.append("I2I capable")
+            # NSFW matching
+            if is_nsfw and caps["nsfw"]:
+                score += 50
+                reasons.append("NSFW-friendly")
+            elif is_nsfw and not caps["nsfw"]:
+                score -= 30
+                reasons.append("May filter NSFW")
+            # Quality vs speed
+            if prefer_quality:
+                # Prefer larger models for quality
+                score += min(vram_est, 30)
+            else:
+                # Prefer smaller models for speed
+                score += max(0, 20 - vram_est)
+            # Popular/reliable models get bonus
+            downloads = info.get("downloads", 0)
+            if downloads > 10000:
+                score += 20
+                reasons.append(f"Popular ({downloads:,} downloads)")
+            if score > 0:
+                candidates.append((name, info, score, reasons))
    if not candidates:
        # Fallback: return first available model
@@ -2419,8 +2703,20 @@ def run_auto_mode(args, models):
        # Use the best candidate
        model_name, model_info, reason = all_candidates[0]
-        print(f"  ✅ Selected: {model_name}")
-        print(f"     {model_info.get('id', 'Unknown')}")
+        # Check if this is a LoRA adapter
+        is_lora = model_info.get("is_lora", False)
+        if is_lora:
+            print(f"  ✅ Selected LoRA: {model_name}")
+            print(f"     LoRA ID: {model_info.get('id', 'Unknown')}")
+            base_model_id = model_info.get("_inferred_base_model") or model_info.get("base_model")
+            if base_model_id:
+                print(f"     Base Model: {base_model_id}")
+            # Store base model info for main() to use
+            args._auto_lora_base_model = base_model_id
+        else:
+            print(f"  ✅ Selected: {model_name}")
+            print(f"     {model_info.get('id', 'Unknown')}")
        print(f"     {reason}")
        args.model = model_name
@@ -2435,6 +2731,11 @@ def run_auto_mode(args, models):
        print(f"  ✅ Using user-specified model: {model_name}")
        if model_info:
            print(f"     {model_info.get('id', 'Unknown')}")
+            # Check if user-specified model is a LoRA
+            if model_info.get("is_lora", False):
+                base_model_id = model_info.get("base_model")
+                if base_model_id:
+                    print(f"     Base Model: {base_model_id}")
    # Select image model for I2V (only if user didn't specify one)
    image_model_name = None