Add auto-detection of model capabilities from Hugging Face

- Added detect_model_capabilities function that analyzes HF model tags and pipeline
- Auto-detects capabilities for Hugging Face models when adding via admin or --model
- Special handling for known models like Qwen VL, LLaVA, Whisper, etc.
- Updated UI with hints about auto-detection
parent 7d9685c9
......@@ -110,7 +110,8 @@
<div class="form-group">
<label for="capabilities">Capabilities</label>
<input type="text" id="capabilities" name="capabilities" placeholder="e.g., video to text, image to text, audio to text">
<input type="text" id="capabilities" name="capabilities" placeholder="Auto-detected for Hugging Face models, or enter manually">
<small style="color: #64748b;">For Hugging Face models, capabilities are auto-detected. For local models, enter manually.</small>
</div>
<div class="form-group">
......@@ -158,7 +159,8 @@
</div>
<div class="form-group">
<label for="editCapabilities">Capabilities</label>
<input type="text" id="editCapabilities" name="capabilities" placeholder="e.g., video to text, image to text, audio to text">
<input type="text" id="editCapabilities" name="capabilities" placeholder="Auto-detected for Hugging Face models, or enter manually">
<small style="color: #64748b;">For Hugging Face models, capabilities are auto-detected. For local models, enter manually.</small>
</div>
<div class="form-group">
<label for="editVram">VRAM Estimate (GB)</label>
......
......@@ -510,10 +510,15 @@ def add_model():
flash('Invalid model type', 'error')
return redirect(url_for('admin.models'))
# If Hugging Face, download the model
# If Hugging Face, download the model and detect capabilities
if model_type == 'huggingface':
try:
from .utils import download_huggingface_model
from .utils import download_huggingface_model, detect_model_capabilities
# Auto-detect capabilities if not provided
if not capabilities:
capabilities = detect_model_capabilities(path)
local_path = download_huggingface_model(path)
if local_path:
path = local_path
......
......@@ -2009,6 +2009,8 @@ def get_model_by_id(model_id: int) -> Optional[Dict[str, Any]]:
def ensure_model_exists(name: str, model_type: str, path: str, vram_estimate: int = 0, available: bool = True) -> None:
"""Ensure a model exists in the database, create if not."""
from .utils import detect_model_capabilities
conn = get_db_connection()
cursor = conn.cursor()
......@@ -2017,9 +2019,14 @@ def ensure_model_exists(name: str, model_type: str, path: str, vram_estimate: in
existing = cursor.fetchone()
if not existing:
# Auto-detect capabilities for Hugging Face models
capabilities = ''
if model_type == 'huggingface':
capabilities = detect_model_capabilities(path)
# Create the model
cursor.execute('INSERT INTO models (name, type, path, vram_estimate, available) VALUES (?, ?, ?, ?, ?)',
(name, model_type, path, vram_estimate, 1 if available else 0))
cursor.execute('INSERT INTO models (name, type, path, vram_estimate, available, capabilities) VALUES (?, ?, ?, ?, ?, ?)',
(name, model_type, path, vram_estimate, 1 if available else 0, capabilities))
conn.commit()
else:
# Update availability if it's not already available
......
......@@ -162,4 +162,73 @@ def download_huggingface_model(model_id: str) -> str:
return local_path
except Exception as e:
print(f"Failed to download model {model_id}: {e}")
return None
\ No newline at end of file
return None
def detect_model_capabilities(model_id: str) -> str:
"""Detect model capabilities from Hugging Face model info."""
try:
from huggingface_hub import HfApi
api = HfApi()
# Get model info
model_info = api.model_info(model_id)
capabilities = []
# Check tags for capabilities
tags = model_info.tags or []
tags_lower = [tag.lower() for tag in tags]
# Vision capabilities
if any(tag in tags_lower for tag in ['vision', 'image', 'ocr', 'object-detection', 'image-classification']):
capabilities.append('image to text')
# Video capabilities (inferred from vision + temporal tags)
if 'vision' in tags_lower and any(tag in tags_lower for tag in ['video', 'temporal', 'action-recognition']):
capabilities.append('video to text')
# Audio capabilities
if any(tag in tags_lower for tag in ['audio', 'speech', 'asr', 'automatic-speech-recognition']):
capabilities.append('audio to text')
# Text generation capabilities
if any(tag in tags_lower for tag in ['text-generation', 'causal-lm', 'text2text-generation']):
capabilities.append('text generation')
# Check pipeline tag
pipeline = getattr(model_info, 'pipeline_tag', None)
if pipeline:
pipeline_lower = pipeline.lower()
if pipeline_lower == 'text-generation':
capabilities.append('text generation')
elif pipeline_lower == 'image-classification':
capabilities.append('image to text')
elif pipeline_lower == 'object-detection':
capabilities.append('image to text')
elif pipeline_lower == 'automatic-speech-recognition':
capabilities.append('audio to text')
elif pipeline_lower == 'text-to-image':
capabilities.append('text to image')
elif pipeline_lower == 'image-to-text':
capabilities.append('image to text')
# Special handling for known multimodal models
model_id_lower = model_id.lower()
if 'qwen' in model_id_lower and ('vl' in model_id_lower or 'vision' in model_id_lower):
capabilities = ['video to text', 'image to text']
elif 'llava' in model_id_lower:
capabilities = ['image to text']
elif 'whisper' in model_id_lower:
capabilities = ['audio to text']
elif 'wav2vec' in model_id_lower:
capabilities = ['audio to text']
elif 'clap' in model_id_lower:
capabilities = ['audio to text']
# Remove duplicates and return
return ', '.join(sorted(set(capabilities)))
except Exception as e:
print(f"Failed to detect capabilities for {model_id}: {e}")
return ''
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment