Improve model discovery: skip unfound models, add deep search for variants

- Skip models not found on HuggingFace instead of adding with defaults
- Add deep search for model variants from known organizations
- Search organizations: Alpha-VLLM, stepvideo, hpcai-tech, tencent,
  rhymes-ai, THUDM, genmo, Wan-AI, stabilityai, black-forest-labs
- Remove non-existent models from known_large_models list
- Better error handling for model validation
parent 4164da7e
Pipeline #226 canceled with stages
......@@ -937,39 +937,33 @@ def update_all_models(hf_token=None):
("swinir", 20),
]
# Known large/huge models to explicitly include (without requiring validation)
# These are added regardless of HuggingFace search results
# Known large/huge models to check and include if found on HuggingFace
# These are validated before adding - models not found are skipped
known_large_models = [
# ═══════════════════════════════════════════════════════════════
# 100GB+ models - Ultra High VRAM
# ═══════════════════════════════════════════════════════════════
("Alpha-VLLM/Lumina-Next-SFT", "LuminaVideoPipeline", "~100 GB", "Lumina Next SFT - Ultra high quality T2I/T2V"),
("Alpha-VLLM/Lumina-T2X", "LuminaVideoPipeline", "~100 GB", "Lumina T2X - Text to any"),
# ═══════════════════════════════════════════════════════════════
# 90-140GB models - Extreme VRAM
# ═══════════════════════════════════════════════════════════════
("stepvideo/Step-Video-T2V", "StepVideoPipeline", "~90-140 GB", "Step Video T2V - Extreme quality"),
# ═══════════════════════════════════════════════════════════════
# 45-65GB models - Very High VRAM
# ═══════════════════════════════════════════════════════════════
("hpcai-tech/Open-Sora", "OpenSoraPipeline", "~45-65 GB", "Open Sora - Open source Sora alternative"),
("hpcai-tech/OpenSora-STDiT-v2", "OpenSoraPipeline", "~45-65 GB", "OpenSora STDiT v2"),
("hpcai-tech/Open-Sora-1.2", "OpenSoraPipeline", "~45-65 GB", "OpenSora 1.2"),
("hpcai-tech/Open-Sora-plan-v1.2.0", "OpenSoraPipeline", "~45-65 GB", "OpenSora Plan v1.2.0"),
# ═══════════════════════════════════════════════════════════════
# 40-55GB models - High VRAM
# ═══════════════════════════════════════════════════════════════
("tencent/HunyuanVideo", "HunyuanDiTPipeline", "~40-55 GB", "Tencent HunyuanVideo"),
("Tencent-Hunyuan/HunyuanVideo", "HunyuanDiTPipeline", "~40-55 GB", "Tencent HunyuanVideo (alt)"),
# ═══════════════════════════════════════════════════════════════
# 35-45GB models
# ═══════════════════════════════════════════════════════════════
("rhymes-ai/Allegro", "AllegroPipeline", "~35-45 GB", "Allegro - High quality video gen"),
("rhymes-ai/Allegro-Medium", "AllegroPipeline", "~35-45 GB", "Allegro Medium"),
# ═══════════════════════════════════════════════════════════════
# 20-30GB models
......@@ -985,18 +979,46 @@ def update_all_models(hf_token=None):
("genmo/mochi", "MochiPipeline", "~18-22 GB", "Mochi - Latest version"),
]
# Additional model variants to search for (organization/model patterns)
additional_search_patterns = [
# Lumina variants
"Alpha-VLLM/Lumina",
"Alpha-VLLM/lumina",
"alpha-vllm/lumina",
# Step Video variants
"stepvideo/step",
"stepvideo/Step",
# OpenSora variants
"hpcai-tech/OpenSora",
"hpcai-tech/open-sora",
# Hunyuan variants
"tencent/Hunyuan",
"Tencent-Hunyuan/Hunyuan",
# Allegro variants
"rhymes-ai/Allegro",
# CogVideo variants
"THUDM/CogVideo",
# Mochi variants
"genmo/mochi",
]
all_models = {}
seen_ids = set()
# First, add known large models explicitly (without requiring validation)
print("\n📦 Adding known large/high-VRAM models...")
# First, validate and add known large models (only if found on HuggingFace)
print("\n📦 Validating known large/high-VRAM models...")
print(" (These models may require significant VRAM - 40GB to 140GB)")
print()
for model_id, default_pipeline_class, vram_est, description in known_large_models:
if model_id in seen_ids:
continue
seen_ids.add(model_id)
# Generate name
name = model_id.split("/")[-1].lower()
......@@ -1010,11 +1032,15 @@ def update_all_models(hf_token=None):
name = f"{base_name}_{counter}"
counter += 1
# Try to validate and detect actual pipeline class
# Validate model exists on HuggingFace
model_info = validate_hf_model(model_id, hf_token=hf_token)
detected_pipeline = None
if model_info:
if not model_info:
# Model not found - skip it (don't add with defaults)
print(f" ⏭️ Skipping {model_id} - not found on HuggingFace")
continue
seen_ids.add(model_id)
tags = model_info.get("tags", [])
downloads = model_info.get("downloads", 0)
likes = model_info.get("likes", 0)
......@@ -1027,14 +1053,6 @@ def update_all_models(hf_token=None):
print(f" 🔍 Detected pipeline: {pipeline_class} for {model_id}")
else:
pipeline_class = default_pipeline_class
else:
# Add anyway with defaults
tags = ["video", "text-to-video", "large-model"]
downloads = 0
likes = 0
is_i2v = "i2v" in model_id.lower()
pipeline_class = default_pipeline_class
print(f" ⚠️ Could not validate {model_id} - adding with defaults")
# Build entry
model_entry = {
......@@ -1053,6 +1071,135 @@ def update_all_models(hf_token=None):
all_models[name] = model_entry
print(f" ✅ {name}: {model_id} ({vram_est}) [{pipeline_class}]")
# Deep search for additional model variants from known organizations
print("\n🔍 Deep searching for model variants from known organizations...")
# Search for models from specific organizations
organization_searches = [
# Lumina models
("Alpha-VLLM", 30),
("alpha-vllm", 30),
# Step Video models
("stepvideo", 20),
# OpenSora models
("hpcai-tech", 30),
# Hunyuan models
("tencent", 30),
("Tencent-Hunyuan", 20),
# Allegro models
("rhymes-ai", 20),
# CogVideo models
("THUDM", 30),
# Mochi models
("genmo", 20),
# Wan models
("Wan-AI", 30),
("wan", 20),
# Stability AI models
("stabilityai", 40),
# Flux models
("black-forest-labs", 20),
("flux", 20),
]
for org, limit in organization_searches:
print(f"\n🔍 Searching organization: '{org}' (limit: {limit})")
results = search_hf_models(org, limit=limit, hf_token=hf_token)
for m in results:
model_id = m["id"]
# Skip duplicates
if model_id in seen_ids:
continue
# Filter: include video models, NSFW models, OR models with known video pipeline classes
is_video_model = m["is_i2v"] or m["is_video"]
is_nsfw_model = m["is_nsfw"]
is_known_pipeline = m["pipeline_class"] in ["WanPipeline", "MochiPipeline", "CogVideoXPipeline",
"StableVideoDiffusionPipeline", "I2VGenXLPipeline",
"LTXVideoPipeline", "AnimateDiffPipeline",
"TextToVideoSDPipeline", "TextToVideoZeroPipeline",
"HotshotXLPipeline", "AllegroPipeline",
"HunyuanDiTPipeline", "OpenSoraPipeline",
"LuminaVideoPipeline", "StepVideoPipeline",
"DiffusionPipeline", "FluxPipeline",
"StableDiffusionXLPipeline", "StableDiffusion3Pipeline"]
if not (is_video_model or is_nsfw_model or is_known_pipeline):
continue
seen_ids.add(model_id)
# Generate model name
name = model_id.split("/")[-1].lower()
name = name.replace("-", "_").replace(".", "_")
name = re.sub(r'[^a-z0-9_]', '', name)
# Ensure unique name
base_name = name
counter = 1
while name in all_models:
name = f"{base_name}_{counter}"
counter += 1
# Use pipeline class from search results (already detected via detect_pipeline_class)
pipeline_class = m["pipeline_class"]
if pipeline_class == "Unknown":
# Fallback based on model type
if m["is_i2v"]:
pipeline_class = "StableVideoDiffusionPipeline"
elif m["is_video"]:
pipeline_class = "WanPipeline"
elif m["is_image"]:
pipeline_class = "StableDiffusionXLPipeline"
else:
pipeline_class = "DiffusionPipeline"
# Determine VRAM estimate from pipeline class
vram_est = PIPELINE_CLASS_MAP.get(pipeline_class, {}).get("default_vram", "~10-20 GB")
# Detect if LoRA
is_lora = "lora" in model_id.lower() or any(t in m.get("tags", []) for t in ["lora", "LoRA"])
base_model = None
if is_lora:
if "wan" in model_id.lower():
base_model = "Wan-AI/Wan2.1-I2V-14B-Diffusers" if m["is_i2v"] else "Wan-AI/Wan2.1-T2V-14B-Diffusers"
elif "svd" in model_id.lower() or "stable-video" in model_id.lower():
base_model = "stabilityai/stable-video-diffusion-img2vid-xt-1-1"
# Build model entry
model_entry = {
"id": model_id,
"vram": vram_est,
"class": pipeline_class,
"desc": f"{'[LoRA] ' if is_lora else ''}{model_id}",
"supports_i2v": m["is_i2v"],
"tags": m.get("tags", [])[:10],
"downloads": m.get("downloads", 0),
"likes": m.get("likes", 0),
"is_lora": is_lora,
"auto_added": True,
"pipeline_tag": m.get("pipeline_tag", ""),
"library_name": m.get("library_name", ""),
}
if base_model:
model_entry["base_model"] = base_model
all_models[name] = model_entry
print(f" ✅ {name}: {model_id} [{pipeline_class}]")
for query, limit in search_queries:
print(f"\n🔍 Searching: '{query}' (limit: {limit})")
results = search_hf_models(query, limit=limit, hf_token=hf_token)
......@@ -2064,16 +2211,153 @@ def select_best_model(gen_type, models, vram_gb=24, prefer_quality=True, return_
return_all: If True, return all candidates sorted by score
Returns: (model_name, model_info, reason) or [(model_name, model_info, reason), ...] if return_all=True
LoRA Support:
LoRA adapters are now considered alongside base models. When a LoRA is selected,
the returned info includes 'is_lora': True and 'base_model' for the main pipeline
to load the base model first, then apply the LoRA adapter.
"""
candidates = []
is_nsfw = gen_type.get("is_nsfw", False)
gen_type_str = gen_type.get("type", "t2v")
for name, info in models.items():
# Skip LoRA adapters (need base model)
if info.get("is_lora"):
is_lora = info.get("is_lora", False)
base_model_id = info.get("base_model")
# For LoRA adapters, check if we have a base model
if is_lora:
if not base_model_id:
# Try to infer base model from LoRA name
lora_id = info.get("id", "").lower()
if "wan" in lora_id:
if info.get("supports_i2v"):
base_model_id = "Wan-AI/Wan2.1-I2V-14B-Diffusers"
else:
base_model_id = "Wan-AI/Wan2.1-T2V-14B-Diffusers"
elif "svd" in lora_id or "stable-video" in lora_id:
base_model_id = "stabilityai/stable-video-diffusion-img2vid-xt-1-1"
elif "sdxl" in lora_id:
base_model_id = "stabilityai/stable-diffusion-xl-base-1.0"
elif "flux" in lora_id:
base_model_id = "black-forest-labs/FLUX.1-dev"
else:
# Skip LoRAs without a determinable base model
continue
# Find the base model in our models dict to get its VRAM requirements
base_model_info = None
base_model_name = None
for m_name, m_info in models.items():
if m_info.get("id") == base_model_id:
base_model_info = m_info
base_model_name = m_name
break
# If base model not in our database, create a minimal info dict
if not base_model_info:
# Estimate VRAM based on base model type
if "wan" in base_model_id.lower():
base_vram_est = 24.0
elif "svd" in base_model_id.lower():
base_vram_est = 16.0
elif "flux" in base_model_id.lower():
base_vram_est = 24.0
elif "sdxl" in base_model_id.lower():
base_vram_est = 12.0
else:
base_vram_est = 16.0
base_model_info = {
"id": base_model_id,
"vram": f"~{base_vram_est:.0f} GB",
"class": "WanPipeline" if "wan" in base_model_id.lower() else
"StableVideoDiffusionPipeline" if "svd" in base_model_id.lower() else
"FluxPipeline" if "flux" in base_model_id.lower() else
"StableDiffusionXLPipeline",
"supports_i2v": "i2v" in base_model_id.lower(),
}
# Check VRAM compatibility using base model requirements
# LoRAs add a small overhead (~1-2GB)
vram_est = parse_vram_estimate(base_model_info.get("vram", "~10 GB")) + 2
if vram_est > vram_gb * 1.1: # Allow 10% margin
continue
# Get capabilities from base model
base_caps = detect_model_type(base_model_info)
# Score the LoRA
score = 0
reasons = []
# Type matching (based on base model)
if gen_type_str == "t2v" and base_caps["t2v"]:
score += 100
reasons.append("T2V capable (via base)")
elif gen_type_str == "i2v" and base_caps["i2v"]:
score += 100
reasons.append("I2V capable (via base)")
elif gen_type_str == "t2i" and base_caps["t2i"]:
score += 100
reasons.append("T2I capable (via base)")
elif gen_type_str == "i2i" and base_caps["i2i"]:
score += 100
reasons.append("I2I capable (via base)")
# LoRA-specific bonuses
lora_caps = detect_model_type(info)
# NSFW matching - LoRAs often specialize in NSFW
if is_nsfw:
if lora_caps["nsfw"]:
score += 70 # Higher bonus for NSFW-specialized LoRAs
reasons.append("NSFW-specialized LoRA")
elif base_caps["nsfw"]:
score += 40
reasons.append("NSFW-friendly base")
else:
score -= 20
reasons.append("May filter NSFW")
else:
# Non-NSFW content: slight penalty for NSFW LoRAs
if lora_caps["nsfw"]:
score -= 10
reasons.append("NSFW LoRA (may affect non-NSFW output)")
# LoRA quality bonus (LoRAs are often fine-tuned for specific styles)
if "realism" in name.lower() or "realistic" in name.lower():
score += 15
reasons.append("Realism-focused LoRA")
elif "style" in name.lower():
score += 10
reasons.append("Style LoRA")
# Quality vs speed
if prefer_quality:
# LoRAs often improve quality without much VRAM cost
score += 25 # Bonus for LoRA quality boost
score += min(vram_est, 30)
else:
score += max(0, 20 - vram_est)
# Popular/reliable LoRAs get bonus
downloads = info.get("downloads", 0)
if downloads > 1000:
score += 15
reasons.append(f"Popular LoRA ({downloads:,} downloads)")
# Store base model info for loading
lora_info = info.copy()
lora_info["_base_model_info"] = base_model_info
lora_info["_base_model_name"] = base_model_name
lora_info["_inferred_base_model"] = base_model_id
if score > 0:
candidates.append((name, lora_info, score, reasons))
else:
# Non-LoRA model handling (original logic)
# Check VRAM compatibility
vram_est = parse_vram_estimate(info.get("vram", "~10 GB"))
if vram_est > vram_gb * 1.1: # Allow 10% margin
......@@ -2419,6 +2703,18 @@ def run_auto_mode(args, models):
# Use the best candidate
model_name, model_info, reason = all_candidates[0]
# Check if this is a LoRA adapter
is_lora = model_info.get("is_lora", False)
if is_lora:
print(f" ✅ Selected LoRA: {model_name}")
print(f" LoRA ID: {model_info.get('id', 'Unknown')}")
base_model_id = model_info.get("_inferred_base_model") or model_info.get("base_model")
if base_model_id:
print(f" Base Model: {base_model_id}")
# Store base model info for main() to use
args._auto_lora_base_model = base_model_id
else:
print(f" ✅ Selected: {model_name}")
print(f" {model_info.get('id', 'Unknown')}")
print(f" {reason}")
......@@ -2435,6 +2731,11 @@ def run_auto_mode(args, models):
print(f" ✅ Using user-specified model: {model_name}")
if model_info:
print(f" {model_info.get('id', 'Unknown')}")
# Check if user-specified model is a LoRA
if model_info.get("is_lora", False):
base_model_id = model_info.get("base_model")
if base_model_id:
print(f" Base Model: {base_model_id}")
# Select image model for I2V (only if user didn't specify one)
image_model_name = None
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment