Add auto-detection of model capabilities from Hugging Face

- Added detect_model_capabilities function that analyzes HF model tags and pipeline
- Auto-detects capabilities for Hugging Face models when adding via admin or --model
- Special handling for known models like Qwen VL, LLaVA, Whisper, etc.
- Updated UI with hints about auto-detection
parent 7d9685c9
...@@ -110,7 +110,8 @@ ...@@ -110,7 +110,8 @@
<div class="form-group"> <div class="form-group">
<label for="capabilities">Capabilities</label> <label for="capabilities">Capabilities</label>
<input type="text" id="capabilities" name="capabilities" placeholder="e.g., video to text, image to text, audio to text"> <input type="text" id="capabilities" name="capabilities" placeholder="Auto-detected for Hugging Face models, or enter manually">
<small style="color: #64748b;">For Hugging Face models, capabilities are auto-detected. For local models, enter manually.</small>
</div> </div>
<div class="form-group"> <div class="form-group">
...@@ -158,7 +159,8 @@ ...@@ -158,7 +159,8 @@
</div> </div>
<div class="form-group"> <div class="form-group">
<label for="editCapabilities">Capabilities</label> <label for="editCapabilities">Capabilities</label>
<input type="text" id="editCapabilities" name="capabilities" placeholder="e.g., video to text, image to text, audio to text"> <input type="text" id="editCapabilities" name="capabilities" placeholder="Auto-detected for Hugging Face models, or enter manually">
<small style="color: #64748b;">For Hugging Face models, capabilities are auto-detected. For local models, enter manually.</small>
</div> </div>
<div class="form-group"> <div class="form-group">
<label for="editVram">VRAM Estimate (GB)</label> <label for="editVram">VRAM Estimate (GB)</label>
......
...@@ -510,10 +510,15 @@ def add_model(): ...@@ -510,10 +510,15 @@ def add_model():
flash('Invalid model type', 'error') flash('Invalid model type', 'error')
return redirect(url_for('admin.models')) return redirect(url_for('admin.models'))
# If Hugging Face, download the model # If Hugging Face, download the model and detect capabilities
if model_type == 'huggingface': if model_type == 'huggingface':
try: try:
from .utils import download_huggingface_model from .utils import download_huggingface_model, detect_model_capabilities
# Auto-detect capabilities if not provided
if not capabilities:
capabilities = detect_model_capabilities(path)
local_path = download_huggingface_model(path) local_path = download_huggingface_model(path)
if local_path: if local_path:
path = local_path path = local_path
......
...@@ -2009,6 +2009,8 @@ def get_model_by_id(model_id: int) -> Optional[Dict[str, Any]]: ...@@ -2009,6 +2009,8 @@ def get_model_by_id(model_id: int) -> Optional[Dict[str, Any]]:
def ensure_model_exists(name: str, model_type: str, path: str, vram_estimate: int = 0, available: bool = True) -> None: def ensure_model_exists(name: str, model_type: str, path: str, vram_estimate: int = 0, available: bool = True) -> None:
"""Ensure a model exists in the database, create if not.""" """Ensure a model exists in the database, create if not."""
from .utils import detect_model_capabilities
conn = get_db_connection() conn = get_db_connection()
cursor = conn.cursor() cursor = conn.cursor()
...@@ -2017,9 +2019,14 @@ def ensure_model_exists(name: str, model_type: str, path: str, vram_estimate: in ...@@ -2017,9 +2019,14 @@ def ensure_model_exists(name: str, model_type: str, path: str, vram_estimate: in
existing = cursor.fetchone() existing = cursor.fetchone()
if not existing: if not existing:
# Auto-detect capabilities for Hugging Face models
capabilities = ''
if model_type == 'huggingface':
capabilities = detect_model_capabilities(path)
# Create the model # Create the model
cursor.execute('INSERT INTO models (name, type, path, vram_estimate, available) VALUES (?, ?, ?, ?, ?)', cursor.execute('INSERT INTO models (name, type, path, vram_estimate, available, capabilities) VALUES (?, ?, ?, ?, ?, ?)',
(name, model_type, path, vram_estimate, 1 if available else 0)) (name, model_type, path, vram_estimate, 1 if available else 0, capabilities))
conn.commit() conn.commit()
else: else:
# Update availability if it's not already available # Update availability if it's not already available
......
...@@ -162,4 +162,73 @@ def download_huggingface_model(model_id: str) -> str: ...@@ -162,4 +162,73 @@ def download_huggingface_model(model_id: str) -> str:
return local_path return local_path
except Exception as e: except Exception as e:
print(f"Failed to download model {model_id}: {e}") print(f"Failed to download model {model_id}: {e}")
return None return None
\ No newline at end of file
def detect_model_capabilities(model_id: str) -> str:
"""Detect model capabilities from Hugging Face model info."""
try:
from huggingface_hub import HfApi
api = HfApi()
# Get model info
model_info = api.model_info(model_id)
capabilities = []
# Check tags for capabilities
tags = model_info.tags or []
tags_lower = [tag.lower() for tag in tags]
# Vision capabilities
if any(tag in tags_lower for tag in ['vision', 'image', 'ocr', 'object-detection', 'image-classification']):
capabilities.append('image to text')
# Video capabilities (inferred from vision + temporal tags)
if 'vision' in tags_lower and any(tag in tags_lower for tag in ['video', 'temporal', 'action-recognition']):
capabilities.append('video to text')
# Audio capabilities
if any(tag in tags_lower for tag in ['audio', 'speech', 'asr', 'automatic-speech-recognition']):
capabilities.append('audio to text')
# Text generation capabilities
if any(tag in tags_lower for tag in ['text-generation', 'causal-lm', 'text2text-generation']):
capabilities.append('text generation')
# Check pipeline tag
pipeline = getattr(model_info, 'pipeline_tag', None)
if pipeline:
pipeline_lower = pipeline.lower()
if pipeline_lower == 'text-generation':
capabilities.append('text generation')
elif pipeline_lower == 'image-classification':
capabilities.append('image to text')
elif pipeline_lower == 'object-detection':
capabilities.append('image to text')
elif pipeline_lower == 'automatic-speech-recognition':
capabilities.append('audio to text')
elif pipeline_lower == 'text-to-image':
capabilities.append('text to image')
elif pipeline_lower == 'image-to-text':
capabilities.append('image to text')
# Special handling for known multimodal models
model_id_lower = model_id.lower()
if 'qwen' in model_id_lower and ('vl' in model_id_lower or 'vision' in model_id_lower):
capabilities = ['video to text', 'image to text']
elif 'llava' in model_id_lower:
capabilities = ['image to text']
elif 'whisper' in model_id_lower:
capabilities = ['audio to text']
elif 'wav2vec' in model_id_lower:
capabilities = ['audio to text']
elif 'clap' in model_id_lower:
capabilities = ['audio to text']
# Remove duplicates and return
return ', '.join(sorted(set(capabilities)))
except Exception as e:
print(f"Failed to detect capabilities for {model_id}: {e}")
return ''
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment