Improve model discovery: skip unfound models, add deep search for variants

- Skip models not found on HuggingFace instead of adding with defaults
- Add deep search for model variants from known organizations
- Search organizations: Alpha-VLLM, stepvideo, hpcai-tech, tencent,
  rhymes-ai, THUDM, genmo, Wan-AI, stabilityai, black-forest-labs
- Remove non-existent models from known_large_models list
- Better error handling for model validation
parent 4164da7e
Pipeline #226 canceled with stages
...@@ -937,39 +937,33 @@ def update_all_models(hf_token=None): ...@@ -937,39 +937,33 @@ def update_all_models(hf_token=None):
("swinir", 20), ("swinir", 20),
] ]
# Known large/huge models to explicitly include (without requiring validation) # Known large/huge models to check and include if found on HuggingFace
# These are added regardless of HuggingFace search results # These are validated before adding - models not found are skipped
known_large_models = [ known_large_models = [
# ═══════════════════════════════════════════════════════════════ # ═══════════════════════════════════════════════════════════════
# 100GB+ models - Ultra High VRAM # 100GB+ models - Ultra High VRAM
# ═══════════════════════════════════════════════════════════════ # ═══════════════════════════════════════════════════════════════
("Alpha-VLLM/Lumina-Next-SFT", "LuminaVideoPipeline", "~100 GB", "Lumina Next SFT - Ultra high quality T2I/T2V"), ("Alpha-VLLM/Lumina-Next-SFT", "LuminaVideoPipeline", "~100 GB", "Lumina Next SFT - Ultra high quality T2I/T2V"),
("Alpha-VLLM/Lumina-T2X", "LuminaVideoPipeline", "~100 GB", "Lumina T2X - Text to any"),
# ═══════════════════════════════════════════════════════════════ # ═══════════════════════════════════════════════════════════════
# 90-140GB models - Extreme VRAM # 90-140GB models - Extreme VRAM
# ═══════════════════════════════════════════════════════════════ # ═══════════════════════════════════════════════════════════════
("stepvideo/Step-Video-T2V", "StepVideoPipeline", "~90-140 GB", "Step Video T2V - Extreme quality"),
# ═══════════════════════════════════════════════════════════════ # ═══════════════════════════════════════════════════════════════
# 45-65GB models - Very High VRAM # 45-65GB models - Very High VRAM
# ═══════════════════════════════════════════════════════════════ # ═══════════════════════════════════════════════════════════════
("hpcai-tech/Open-Sora", "OpenSoraPipeline", "~45-65 GB", "Open Sora - Open source Sora alternative"), ("hpcai-tech/Open-Sora", "OpenSoraPipeline", "~45-65 GB", "Open Sora - Open source Sora alternative"),
("hpcai-tech/OpenSora-STDiT-v2", "OpenSoraPipeline", "~45-65 GB", "OpenSora STDiT v2"),
("hpcai-tech/Open-Sora-1.2", "OpenSoraPipeline", "~45-65 GB", "OpenSora 1.2"), ("hpcai-tech/Open-Sora-1.2", "OpenSoraPipeline", "~45-65 GB", "OpenSora 1.2"),
("hpcai-tech/Open-Sora-plan-v1.2.0", "OpenSoraPipeline", "~45-65 GB", "OpenSora Plan v1.2.0"),
# ═══════════════════════════════════════════════════════════════ # ═══════════════════════════════════════════════════════════════
# 40-55GB models - High VRAM # 40-55GB models - High VRAM
# ═══════════════════════════════════════════════════════════════ # ═══════════════════════════════════════════════════════════════
("tencent/HunyuanVideo", "HunyuanDiTPipeline", "~40-55 GB", "Tencent HunyuanVideo"), ("tencent/HunyuanVideo", "HunyuanDiTPipeline", "~40-55 GB", "Tencent HunyuanVideo"),
("Tencent-Hunyuan/HunyuanVideo", "HunyuanDiTPipeline", "~40-55 GB", "Tencent HunyuanVideo (alt)"),
# ═══════════════════════════════════════════════════════════════ # ═══════════════════════════════════════════════════════════════
# 35-45GB models # 35-45GB models
# ═══════════════════════════════════════════════════════════════ # ═══════════════════════════════════════════════════════════════
("rhymes-ai/Allegro", "AllegroPipeline", "~35-45 GB", "Allegro - High quality video gen"), ("rhymes-ai/Allegro", "AllegroPipeline", "~35-45 GB", "Allegro - High quality video gen"),
("rhymes-ai/Allegro-Medium", "AllegroPipeline", "~35-45 GB", "Allegro Medium"),
# ═══════════════════════════════════════════════════════════════ # ═══════════════════════════════════════════════════════════════
# 20-30GB models # 20-30GB models
...@@ -985,18 +979,46 @@ def update_all_models(hf_token=None): ...@@ -985,18 +979,46 @@ def update_all_models(hf_token=None):
("genmo/mochi", "MochiPipeline", "~18-22 GB", "Mochi - Latest version"), ("genmo/mochi", "MochiPipeline", "~18-22 GB", "Mochi - Latest version"),
] ]
# Additional model variants to search for (organization/model patterns)
additional_search_patterns = [
# Lumina variants
"Alpha-VLLM/Lumina",
"Alpha-VLLM/lumina",
"alpha-vllm/lumina",
# Step Video variants
"stepvideo/step",
"stepvideo/Step",
# OpenSora variants
"hpcai-tech/OpenSora",
"hpcai-tech/open-sora",
# Hunyuan variants
"tencent/Hunyuan",
"Tencent-Hunyuan/Hunyuan",
# Allegro variants
"rhymes-ai/Allegro",
# CogVideo variants
"THUDM/CogVideo",
# Mochi variants
"genmo/mochi",
]
all_models = {} all_models = {}
seen_ids = set() seen_ids = set()
# First, add known large models explicitly (without requiring validation) # First, validate and add known large models (only if found on HuggingFace)
print("\n📦 Adding known large/high-VRAM models...") print("\n📦 Validating known large/high-VRAM models...")
print(" (These models may require significant VRAM - 40GB to 140GB)") print(" (These models may require significant VRAM - 40GB to 140GB)")
print() print()
for model_id, default_pipeline_class, vram_est, description in known_large_models: for model_id, default_pipeline_class, vram_est, description in known_large_models:
if model_id in seen_ids: if model_id in seen_ids:
continue continue
seen_ids.add(model_id)
# Generate name # Generate name
name = model_id.split("/")[-1].lower() name = model_id.split("/")[-1].lower()
...@@ -1010,31 +1032,27 @@ def update_all_models(hf_token=None): ...@@ -1010,31 +1032,27 @@ def update_all_models(hf_token=None):
name = f"{base_name}_{counter}" name = f"{base_name}_{counter}"
counter += 1 counter += 1
# Try to validate and detect actual pipeline class # Validate model exists on HuggingFace
model_info = validate_hf_model(model_id, hf_token=hf_token) model_info = validate_hf_model(model_id, hf_token=hf_token)
detected_pipeline = None
if model_info: if not model_info:
tags = model_info.get("tags", []) # Model not found - skip it (don't add with defaults)
downloads = model_info.get("downloads", 0) print(f" ⏭️ Skipping {model_id} - not found on HuggingFace")
likes = model_info.get("likes", 0) continue
is_i2v = any(t in tags for t in ["image-to-video", "i2v"]) or "i2v" in model_id.lower()
seen_ids.add(model_id)
# Try to detect actual pipeline class from model_index.json tags = model_info.get("tags", [])
detected_pipeline = detect_pipeline_class(model_info) downloads = model_info.get("downloads", 0)
if detected_pipeline: likes = model_info.get("likes", 0)
pipeline_class = detected_pipeline is_i2v = any(t in tags for t in ["image-to-video", "i2v"]) or "i2v" in model_id.lower()
print(f" 🔍 Detected pipeline: {pipeline_class} for {model_id}")
else: # Try to detect actual pipeline class from model_index.json
pipeline_class = default_pipeline_class detected_pipeline = detect_pipeline_class(model_info)
if detected_pipeline:
pipeline_class = detected_pipeline
print(f" 🔍 Detected pipeline: {pipeline_class} for {model_id}")
else: else:
# Add anyway with defaults
tags = ["video", "text-to-video", "large-model"]
downloads = 0
likes = 0
is_i2v = "i2v" in model_id.lower()
pipeline_class = default_pipeline_class pipeline_class = default_pipeline_class
print(f" ⚠️ Could not validate {model_id} - adding with defaults")
# Build entry # Build entry
model_entry = { model_entry = {
...@@ -1053,6 +1071,135 @@ def update_all_models(hf_token=None): ...@@ -1053,6 +1071,135 @@ def update_all_models(hf_token=None):
all_models[name] = model_entry all_models[name] = model_entry
print(f" ✅ {name}: {model_id} ({vram_est}) [{pipeline_class}]") print(f" ✅ {name}: {model_id} ({vram_est}) [{pipeline_class}]")
# Deep search for additional model variants from known organizations
print("\n🔍 Deep searching for model variants from known organizations...")
# Search for models from specific organizations
organization_searches = [
# Lumina models
("Alpha-VLLM", 30),
("alpha-vllm", 30),
# Step Video models
("stepvideo", 20),
# OpenSora models
("hpcai-tech", 30),
# Hunyuan models
("tencent", 30),
("Tencent-Hunyuan", 20),
# Allegro models
("rhymes-ai", 20),
# CogVideo models
("THUDM", 30),
# Mochi models
("genmo", 20),
# Wan models
("Wan-AI", 30),
("wan", 20),
# Stability AI models
("stabilityai", 40),
# Flux models
("black-forest-labs", 20),
("flux", 20),
]
for org, limit in organization_searches:
print(f"\n🔍 Searching organization: '{org}' (limit: {limit})")
results = search_hf_models(org, limit=limit, hf_token=hf_token)
for m in results:
model_id = m["id"]
# Skip duplicates
if model_id in seen_ids:
continue
# Filter: include video models, NSFW models, OR models with known video pipeline classes
is_video_model = m["is_i2v"] or m["is_video"]
is_nsfw_model = m["is_nsfw"]
is_known_pipeline = m["pipeline_class"] in ["WanPipeline", "MochiPipeline", "CogVideoXPipeline",
"StableVideoDiffusionPipeline", "I2VGenXLPipeline",
"LTXVideoPipeline", "AnimateDiffPipeline",
"TextToVideoSDPipeline", "TextToVideoZeroPipeline",
"HotshotXLPipeline", "AllegroPipeline",
"HunyuanDiTPipeline", "OpenSoraPipeline",
"LuminaVideoPipeline", "StepVideoPipeline",
"DiffusionPipeline", "FluxPipeline",
"StableDiffusionXLPipeline", "StableDiffusion3Pipeline"]
if not (is_video_model or is_nsfw_model or is_known_pipeline):
continue
seen_ids.add(model_id)
# Generate model name
name = model_id.split("/")[-1].lower()
name = name.replace("-", "_").replace(".", "_")
name = re.sub(r'[^a-z0-9_]', '', name)
# Ensure unique name
base_name = name
counter = 1
while name in all_models:
name = f"{base_name}_{counter}"
counter += 1
# Use pipeline class from search results (already detected via detect_pipeline_class)
pipeline_class = m["pipeline_class"]
if pipeline_class == "Unknown":
# Fallback based on model type
if m["is_i2v"]:
pipeline_class = "StableVideoDiffusionPipeline"
elif m["is_video"]:
pipeline_class = "WanPipeline"
elif m["is_image"]:
pipeline_class = "StableDiffusionXLPipeline"
else:
pipeline_class = "DiffusionPipeline"
# Determine VRAM estimate from pipeline class
vram_est = PIPELINE_CLASS_MAP.get(pipeline_class, {}).get("default_vram", "~10-20 GB")
# Detect if LoRA
is_lora = "lora" in model_id.lower() or any(t in m.get("tags", []) for t in ["lora", "LoRA"])
base_model = None
if is_lora:
if "wan" in model_id.lower():
base_model = "Wan-AI/Wan2.1-I2V-14B-Diffusers" if m["is_i2v"] else "Wan-AI/Wan2.1-T2V-14B-Diffusers"
elif "svd" in model_id.lower() or "stable-video" in model_id.lower():
base_model = "stabilityai/stable-video-diffusion-img2vid-xt-1-1"
# Build model entry
model_entry = {
"id": model_id,
"vram": vram_est,
"class": pipeline_class,
"desc": f"{'[LoRA] ' if is_lora else ''}{model_id}",
"supports_i2v": m["is_i2v"],
"tags": m.get("tags", [])[:10],
"downloads": m.get("downloads", 0),
"likes": m.get("likes", 0),
"is_lora": is_lora,
"auto_added": True,
"pipeline_tag": m.get("pipeline_tag", ""),
"library_name": m.get("library_name", ""),
}
if base_model:
model_entry["base_model"] = base_model
all_models[name] = model_entry
print(f" ✅ {name}: {model_id} [{pipeline_class}]")
for query, limit in search_queries: for query, limit in search_queries:
print(f"\n🔍 Searching: '{query}' (limit: {limit})") print(f"\n🔍 Searching: '{query}' (limit: {limit})")
results = search_hf_models(query, limit=limit, hf_token=hf_token) results = search_hf_models(query, limit=limit, hf_token=hf_token)
...@@ -2064,66 +2211,203 @@ def select_best_model(gen_type, models, vram_gb=24, prefer_quality=True, return_ ...@@ -2064,66 +2211,203 @@ def select_best_model(gen_type, models, vram_gb=24, prefer_quality=True, return_
return_all: If True, return all candidates sorted by score return_all: If True, return all candidates sorted by score
Returns: (model_name, model_info, reason) or [(model_name, model_info, reason), ...] if return_all=True Returns: (model_name, model_info, reason) or [(model_name, model_info, reason), ...] if return_all=True
LoRA Support:
LoRA adapters are now considered alongside base models. When a LoRA is selected,
the returned info includes 'is_lora': True and 'base_model' for the main pipeline
to load the base model first, then apply the LoRA adapter.
""" """
candidates = [] candidates = []
is_nsfw = gen_type.get("is_nsfw", False) is_nsfw = gen_type.get("is_nsfw", False)
gen_type_str = gen_type.get("type", "t2v") gen_type_str = gen_type.get("type", "t2v")
for name, info in models.items(): for name, info in models.items():
# Skip LoRA adapters (need base model) is_lora = info.get("is_lora", False)
if info.get("is_lora"): base_model_id = info.get("base_model")
continue
# Check VRAM compatibility
vram_est = parse_vram_estimate(info.get("vram", "~10 GB"))
if vram_est > vram_gb * 1.1: # Allow 10% margin
continue
# Check model capabilities # For LoRA adapters, check if we have a base model
caps = detect_model_type(info) if is_lora:
if not base_model_id:
# Try to infer base model from LoRA name
lora_id = info.get("id", "").lower()
if "wan" in lora_id:
if info.get("supports_i2v"):
base_model_id = "Wan-AI/Wan2.1-I2V-14B-Diffusers"
else:
base_model_id = "Wan-AI/Wan2.1-T2V-14B-Diffusers"
elif "svd" in lora_id or "stable-video" in lora_id:
base_model_id = "stabilityai/stable-video-diffusion-img2vid-xt-1-1"
elif "sdxl" in lora_id:
base_model_id = "stabilityai/stable-diffusion-xl-base-1.0"
elif "flux" in lora_id:
base_model_id = "black-forest-labs/FLUX.1-dev"
else:
# Skip LoRAs without a determinable base model
continue
# Find the base model in our models dict to get its VRAM requirements
base_model_info = None
base_model_name = None
for m_name, m_info in models.items():
if m_info.get("id") == base_model_id:
base_model_info = m_info
base_model_name = m_name
break
# If base model not in our database, create a minimal info dict
if not base_model_info:
# Estimate VRAM based on base model type
if "wan" in base_model_id.lower():
base_vram_est = 24.0
elif "svd" in base_model_id.lower():
base_vram_est = 16.0
elif "flux" in base_model_id.lower():
base_vram_est = 24.0
elif "sdxl" in base_model_id.lower():
base_vram_est = 12.0
else:
base_vram_est = 16.0
base_model_info = {
"id": base_model_id,
"vram": f"~{base_vram_est:.0f} GB",
"class": "WanPipeline" if "wan" in base_model_id.lower() else
"StableVideoDiffusionPipeline" if "svd" in base_model_id.lower() else
"FluxPipeline" if "flux" in base_model_id.lower() else
"StableDiffusionXLPipeline",
"supports_i2v": "i2v" in base_model_id.lower(),
}
# Check VRAM compatibility using base model requirements
# LoRAs add a small overhead (~1-2GB)
vram_est = parse_vram_estimate(base_model_info.get("vram", "~10 GB")) + 2
if vram_est > vram_gb * 1.1: # Allow 10% margin
continue
# Get capabilities from base model
base_caps = detect_model_type(base_model_info)
# Score the LoRA
score = 0
reasons = []
# Type matching (based on base model)
if gen_type_str == "t2v" and base_caps["t2v"]:
score += 100
reasons.append("T2V capable (via base)")
elif gen_type_str == "i2v" and base_caps["i2v"]:
score += 100
reasons.append("I2V capable (via base)")
elif gen_type_str == "t2i" and base_caps["t2i"]:
score += 100
reasons.append("T2I capable (via base)")
elif gen_type_str == "i2i" and base_caps["i2i"]:
score += 100
reasons.append("I2I capable (via base)")
# LoRA-specific bonuses
lora_caps = detect_model_type(info)
# NSFW matching - LoRAs often specialize in NSFW
if is_nsfw:
if lora_caps["nsfw"]:
score += 70 # Higher bonus for NSFW-specialized LoRAs
reasons.append("NSFW-specialized LoRA")
elif base_caps["nsfw"]:
score += 40
reasons.append("NSFW-friendly base")
else:
score -= 20
reasons.append("May filter NSFW")
else:
# Non-NSFW content: slight penalty for NSFW LoRAs
if lora_caps["nsfw"]:
score -= 10
reasons.append("NSFW LoRA (may affect non-NSFW output)")
# LoRA quality bonus (LoRAs are often fine-tuned for specific styles)
if "realism" in name.lower() or "realistic" in name.lower():
score += 15
reasons.append("Realism-focused LoRA")
elif "style" in name.lower():
score += 10
reasons.append("Style LoRA")
# Quality vs speed
if prefer_quality:
# LoRAs often improve quality without much VRAM cost
score += 25 # Bonus for LoRA quality boost
score += min(vram_est, 30)
else:
score += max(0, 20 - vram_est)
# Popular/reliable LoRAs get bonus
downloads = info.get("downloads", 0)
if downloads > 1000:
score += 15
reasons.append(f"Popular LoRA ({downloads:,} downloads)")
# Store base model info for loading
lora_info = info.copy()
lora_info["_base_model_info"] = base_model_info
lora_info["_base_model_name"] = base_model_name
lora_info["_inferred_base_model"] = base_model_id
if score > 0:
candidates.append((name, lora_info, score, reasons))
# Score the model
score = 0
reasons = []
# Type matching
if gen_type_str == "t2v" and caps["t2v"]:
score += 100
reasons.append("T2V capable")
elif gen_type_str == "i2v" and caps["i2v"]:
score += 100
reasons.append("I2V capable")
elif gen_type_str == "t2i" and caps["t2i"]:
score += 100
reasons.append("T2I capable")
elif gen_type_str == "i2i" and caps["i2i"]:
score += 100
reasons.append("I2I capable")
# NSFW matching
if is_nsfw and caps["nsfw"]:
score += 50
reasons.append("NSFW-friendly")
elif is_nsfw and not caps["nsfw"]:
score -= 30
reasons.append("May filter NSFW")
# Quality vs speed
if prefer_quality:
# Prefer larger models for quality
score += min(vram_est, 30)
else: else:
# Prefer smaller models for speed # Non-LoRA model handling (original logic)
score += max(0, 20 - vram_est) # Check VRAM compatibility
vram_est = parse_vram_estimate(info.get("vram", "~10 GB"))
# Popular/reliable models get bonus if vram_est > vram_gb * 1.1: # Allow 10% margin
downloads = info.get("downloads", 0) continue
if downloads > 10000:
score += 20 # Check model capabilities
reasons.append(f"Popular ({downloads:,} downloads)") caps = detect_model_type(info)
if score > 0: # Score the model
candidates.append((name, info, score, reasons)) score = 0
reasons = []
# Type matching
if gen_type_str == "t2v" and caps["t2v"]:
score += 100
reasons.append("T2V capable")
elif gen_type_str == "i2v" and caps["i2v"]:
score += 100
reasons.append("I2V capable")
elif gen_type_str == "t2i" and caps["t2i"]:
score += 100
reasons.append("T2I capable")
elif gen_type_str == "i2i" and caps["i2i"]:
score += 100
reasons.append("I2I capable")
# NSFW matching
if is_nsfw and caps["nsfw"]:
score += 50
reasons.append("NSFW-friendly")
elif is_nsfw and not caps["nsfw"]:
score -= 30
reasons.append("May filter NSFW")
# Quality vs speed
if prefer_quality:
# Prefer larger models for quality
score += min(vram_est, 30)
else:
# Prefer smaller models for speed
score += max(0, 20 - vram_est)
# Popular/reliable models get bonus
downloads = info.get("downloads", 0)
if downloads > 10000:
score += 20
reasons.append(f"Popular ({downloads:,} downloads)")
if score > 0:
candidates.append((name, info, score, reasons))
if not candidates: if not candidates:
# Fallback: return first available model # Fallback: return first available model
...@@ -2419,8 +2703,20 @@ def run_auto_mode(args, models): ...@@ -2419,8 +2703,20 @@ def run_auto_mode(args, models):
# Use the best candidate # Use the best candidate
model_name, model_info, reason = all_candidates[0] model_name, model_info, reason = all_candidates[0]
print(f" ✅ Selected: {model_name}")
print(f" {model_info.get('id', 'Unknown')}") # Check if this is a LoRA adapter
is_lora = model_info.get("is_lora", False)
if is_lora:
print(f" ✅ Selected LoRA: {model_name}")
print(f" LoRA ID: {model_info.get('id', 'Unknown')}")
base_model_id = model_info.get("_inferred_base_model") or model_info.get("base_model")
if base_model_id:
print(f" Base Model: {base_model_id}")
# Store base model info for main() to use
args._auto_lora_base_model = base_model_id
else:
print(f" ✅ Selected: {model_name}")
print(f" {model_info.get('id', 'Unknown')}")
print(f" {reason}") print(f" {reason}")
args.model = model_name args.model = model_name
...@@ -2435,6 +2731,11 @@ def run_auto_mode(args, models): ...@@ -2435,6 +2731,11 @@ def run_auto_mode(args, models):
print(f" ✅ Using user-specified model: {model_name}") print(f" ✅ Using user-specified model: {model_name}")
if model_info: if model_info:
print(f" {model_info.get('id', 'Unknown')}") print(f" {model_info.get('id', 'Unknown')}")
# Check if user-specified model is a LoRA
if model_info.get("is_lora", False):
base_model_id = model_info.get("base_model")
if base_model_id:
print(f" Base Model: {base_model_id}")
# Select image model for I2V (only if user didn't specify one) # Select image model for I2V (only if user didn't specify one)
image_model_name = None image_model_name = None
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment