#!/usr/bin/env python3
"""
VideoGen MCP Server - Model Context Protocol wrapper for VideoGen

Copyleft © 2026 Stefy <stefy@nexlab.net>

This MCP server exposes VideoGen functionality to AI agents through the
Model Context Protocol, enabling seamless integration with Claude and other
MCP-compatible AI systems.

Installation:
    pip install mcp

Usage:
    python3 videogen_mcp_server.py

Or add to Claude Desktop config:
    {
        "mcpServers": {
            "videogen": {
                "command": "python3",
                "args": ["/path/to/videogen_mcp_server.py"]
            }
        }
    }
"""

import asyncio
import json
import os
import subprocess
import sys
from pathlib import Path
from typing import Any, Optional

from mcp.server import Server
from mcp.server.stdio import stdio_server
from mcp.types import Tool, TextContent

# Create server instance
server = Server("videogen")

# Path to videogen script
VIDEOGEN_PATH = Path(__file__).parent / "videogen"


def run_videogen_command(args: list, timeout: int = 3600):
    """Run a videogen command and return output and return code"""
    cmd = [sys.executable, str(VIDEOGEN_PATH)] + args
    
    try:
        result = subprocess.run(
            cmd,
            capture_output=True,
            text=True,
            timeout=timeout,
            cwd=str(VIDEOGEN_PATH.parent)
        )
        return result.stdout + result.stderr, result.returncode
    except subprocess.TimeoutExpired:
        return "Error: Command timed out", 1
    except Exception as e:
        return f"Error: {str(e)}", 1


def parse_model_list(output: str) -> list:
    """Parse model list output into structured data"""
    models = []
    lines = output.strip().split('\n')
    
    for line in lines:
        if line.startswith('ID') or line.startswith('-') or not line.strip():
            continue
        if 'Total shown' in line or 'Use --model' in line:
            continue
        
        parts = line.split()
        if len(parts) >= 8:
            try:
                model = {
                    "id": int(parts[0]),
                    "name": parts[1],
                    "vram": parts[2],
                    "i2v": parts[3] == "Yes",
                    "t2v": parts[4] == "Yes",
                    "t2i": parts[5] == "Yes",
                    "i2i": parts[6] == "Yes",
                    "nsfw": parts[7] == "Yes",
                }
                if len(parts) >= 9:
                    model["lora"] = parts[8] == "Yes"
                models.append(model)
            except (ValueError, IndexError):
                continue
    
    return models


@server.list_tools()
async def list_tools() -> list:
    """List available VideoGen tools"""
    return [
        Tool(
            name="videogen_generate",
            description="Generate a video using VideoGen with automatic mode selection. This is the primary tool for video generation. It automatically detects the best generation type, selects the appropriate model, configures settings, and handles NSFW content detection.",
            inputSchema={
                "type": "object",
                "properties": {
                    "prompt": {
                        "type": "string",
                        "description": "The main prompt describing what to generate"
                    },
                    "output": {
                        "type": "string",
                        "description": "Output filename (without extension)",
                        "default": "output"
                    },
                    "model": {
                        "type": "string",
                        "description": "Specific model to use (optional, auto-selected if not provided)"
                    },
                    "length": {
                        "type": "number",
                        "description": "Video duration in seconds",
                        "default": 5.0
                    },
                    "width": {
                        "type": "integer",
                        "description": "Video width in pixels",
                        "default": 832
                    },
                    "height": {
                        "type": "integer",
                        "description": "Video height in pixels",
                        "default": 480
                    },
                    "fps": {
                        "type": "integer",
                        "description": "Frames per second",
                        "default": 15
                    },
                    "seed": {
                        "type": "integer",
                        "description": "Random seed for reproducibility (-1 for random)",
                        "default": -1
                    },
                    "auto": {
                        "type": "boolean",
                        "description": "Use automatic mode (recommended)",
                        "default": True
                    },
                    "no_filter": {
                        "type": "boolean",
                        "description": "Disable NSFW filter",
                        "default": False
                    }
                },
                "required": ["prompt"]
            }
        ),
        
        Tool(
            name="videogen_generate_video",
            description="Generate a video from text (Text-to-Video). Use this when you specifically want T2V generation without auto-detection.",
            inputSchema={
                "type": "object",
                "properties": {
                    "prompt": {
                        "type": "string",
                        "description": "Description of the video to generate"
                    },
                    "model": {
                        "type": "string",
                        "description": "Model name (use videogen_list_models to see options)"
                    },
                    "output": {
                        "type": "string",
                        "description": "Output filename",
                        "default": "output"
                    },
                    "length": {
                        "type": "number",
                        "description": "Duration in seconds",
                        "default": 5.0
                    },
                    "width": {
                        "type": "integer",
                        "default": 832
                    },
                    "height": {
                        "type": "integer",
                        "default": 480
                    },
                    "fps": {
                        "type": "integer",
                        "default": 15
                    },
                    "seed": {
                        "type": "integer",
                        "default": -1
                    }
                },
                "required": ["prompt", "model"]
            }
        ),
        
        Tool(
            name="videogen_generate_image",
            description="Generate an image from text (Text-to-Image). Use this for static image generation.",
            inputSchema={
                "type": "object",
                "properties": {
                    "prompt": {
                        "type": "string",
                        "description": "Description of the image to generate"
                    },
                    "model": {
                        "type": "string",
                        "description": "Model name (e.g., flux_dev, sdxl_base, pony_v6)"
                    },
                    "output": {
                        "type": "string",
                        "description": "Output filename (should end with .png or .jpg)",
                        "default": "output.png"
                    },
                    "width": {
                        "type": "integer",
                        "default": 1024
                    },
                    "height": {
                        "type": "integer",
                        "default": 1024
                    },
                    "steps": {
                        "type": "integer",
                        "description": "Inference steps",
                        "default": 30
                    },
                    "seed": {
                        "type": "integer",
                        "default": -1
                    }
                },
                "required": ["prompt", "model"]
            }
        ),
        
        Tool(
            name="videogen_animate_image",
            description="Animate an existing image (Image-to-Video). Use this to add motion to a static image.",
            inputSchema={
                "type": "object",
                "properties": {
                    "image": {
                        "type": "string",
                        "description": "Path to the input image file"
                    },
                    "prompt": {
                        "type": "string",
                        "description": "Description of the desired animation"
                    },
                    "model": {
                        "type": "string",
                        "description": "I2V model name (e.g., svd_xt_1.1, wan_14b_i2v)"
                    },
                    "output": {
                        "type": "string",
                        "default": "output"
                    },
                    "length": {
                        "type": "number",
                        "default": 5.0
                    },
                    "fps": {
                        "type": "integer",
                        "default": 15
                    }
                },
                "required": ["image", "prompt", "model"]
            }
        ),
        
        Tool(
            name="videogen_transform_image",
            description="Transform an existing image (Image-to-Image). Use this to modify or restyle an image.",
            inputSchema={
                "type": "object",
                "properties": {
                    "image": {
                        "type": "string",
                        "description": "Path to the input image file"
                    },
                    "prompt": {
                        "type": "string",
                        "description": "Description of the desired transformation"
                    },
                    "model": {
                        "type": "string",
                        "description": "Model name (e.g., flux_dev, sdxl_base)"
                    },
                    "output": {
                        "type": "string",
                        "default": "output.png"
                    },
                    "strength": {
                        "type": "number",
                        "description": "Transformation strength (0.0-1.0)",
                        "default": 0.75
                    }
                },
                "required": ["image", "prompt", "model"]
            }
        ),
        
        Tool(
            name="videogen_generate_with_audio",
            description="Generate a video with audio (TTS or music). Use this for videos with narration or background music.",
            inputSchema={
                "type": "object",
                "properties": {
                    "prompt": {
                        "type": "string",
                        "description": "Description of the video to generate"
                    },
                    "audio_type": {
                        "type": "string",
                        "enum": ["tts", "music"],
                        "description": "Type of audio: tts for speech, music for background music"
                    },
                    "audio_text": {
                        "type": "string",
                        "description": "Text for TTS or prompt for music generation"
                    },
                    "model": {
                        "type": "string",
                        "description": "Model name (optional, auto-selected if not provided)"
                    },
                    "output": {
                        "type": "string",
                        "default": "output"
                    },
                    "tts_voice": {
                        "type": "string",
                        "description": "TTS voice name (e.g., edge_male_us, edge_female_us)",
                        "default": "edge_female_us"
                    },
                    "sync_audio": {
                        "type": "boolean",
                        "description": "Sync audio duration to video",
                        "default": True
                    },
                    "lip_sync": {
                        "type": "boolean",
                        "description": "Apply lip sync (for TTS with I2V)",
                        "default": False
                    }
                },
                "required": ["prompt", "audio_type", "audio_text"]
            }
        ),
        
        Tool(
            name="videogen_list_models",
            description="List all available models. Use this to see what models are available for generation.",
            inputSchema={
                "type": "object",
                "properties": {
                    "filter": {
                        "type": "string",
                        "enum": ["all", "i2v", "t2v", "t2i", "v2v", "v2i", "3d", "tts", "audio", "low_vram", "high_vram", "huge_vram", "nsfw"],
                        "description": "Filter models by type or VRAM requirement",
                        "default": "all"
                    }
                }
            }
        ),
        
        Tool(
            name="videogen_video_to_video",
            description="Transform an existing video (Video-to-Video). Apply style transfer or filters to a video.",
            inputSchema={
                "type": "object",
                "properties": {
                    "video": {
                        "type": "string",
                        "description": "Path to the input video file"
                    },
                    "prompt": {
                        "type": "string",
                        "description": "Description of the desired transformation"
                    },
                    "output": {
                        "type": "string",
                        "default": "output"
                    },
                    "strength": {
                        "type": "number",
                        "description": "Transformation strength (0.0-1.0)",
                        "default": 0.75
                    },
                    "fps": {
                        "type": "integer",
                        "description": "Processing FPS",
                        "default": 15
                    }
                },
                "required": ["video", "prompt"]
            }
        ),
        
        Tool(
            name="videogen_apply_video_filter",
            description="Apply a filter to a video. Available filters: grayscale, sepia, blur, sharpen, contrast, saturation, speed, slow, reverse, fade_in, fade_out, denoise, stabilize.",
            inputSchema={
                "type": "object",
                "properties": {
                    "video": {
                        "type": "string",
                        "description": "Path to the input video file"
                    },
                    "filter": {
                        "type": "string",
                        "enum": ["grayscale", "sepia", "blur", "sharpen", "contrast", "saturation", "speed", "slow", "reverse", "fade_in", "fade_out", "denoise", "stabilize"],
                        "description": "Filter to apply"
                    },
                    "params": {
                        "type": "string",
                        "description": "Filter parameters (e.g., 'factor=2.0' for speed, 'radius=10' for blur)"
                    },
                    "output": {
                        "type": "string",
                        "default": "output"
                    }
                },
                "required": ["video", "filter"]
            }
        ),
        
        Tool(
            name="videogen_extract_frames",
            description="Extract frames from a video. Can extract a single frame, keyframes, or all frames.",
            inputSchema={
                "type": "object",
                "properties": {
                    "video": {
                        "type": "string",
                        "description": "Path to the input video file"
                    },
                    "mode": {
                        "type": "string",
                        "enum": ["single", "keyframes", "all"],
                        "description": "Extraction mode: single frame, keyframes, or all frames",
                        "default": "keyframes"
                    },
                    "timestamp": {
                        "type": "number",
                        "description": "Timestamp for single frame extraction (seconds)"
                    },
                    "frame_number": {
                        "type": "integer",
                        "description": "Frame number for single frame extraction"
                    },
                    "max_frames": {
                        "type": "integer",
                        "description": "Maximum frames to extract",
                        "default": 100
                    },
                    "output_dir": {
                        "type": "string",
                        "description": "Output directory for frames",
                        "default": "frames"
                    }
                },
                "required": ["video"]
            }
        ),
        
        Tool(
            name="videogen_create_collage",
            description="Create a collage/thumbnail grid from a video.",
            inputSchema={
                "type": "object",
                "properties": {
                    "video": {
                        "type": "string",
                        "description": "Path to the input video file"
                    },
                    "grid": {
                        "type": "string",
                        "description": "Grid size (e.g., '4x4', '3x3')",
                        "default": "4x4"
                    },
                    "method": {
                        "type": "string",
                        "enum": ["uniform", "keyframes", "random"],
                        "description": "Sampling method",
                        "default": "uniform"
                    },
                    "output": {
                        "type": "string",
                        "default": "collage.png"
                    }
                },
                "required": ["video"]
            }
        ),
        
        Tool(
            name="videogen_upscale_video",
            description="Upscale a video using AI upscaling models.",
            inputSchema={
                "type": "object",
                "properties": {
                    "video": {
                        "type": "string",
                        "description": "Path to the input video file"
                    },
                    "scale": {
                        "type": "number",
                        "description": "Upscale factor (2.0 or 4.0)",
                        "default": 2.0
                    },
                    "method": {
                        "type": "string",
                        "enum": ["ffmpeg", "esrgan", "real_esrgan", "swinir"],
                        "description": "Upscaling method",
                        "default": "ffmpeg"
                    },
                    "output": {
                        "type": "string",
                        "default": "output"
                    }
                },
                "required": ["video"]
            }
        ),
        
        Tool(
            name="videogen_convert_3d",
            description="Convert 2D video to 3D format (SBS, anaglyph, or VR 360).",
            inputSchema={
                "type": "object",
                "properties": {
                    "video": {
                        "type": "string",
                        "description": "Path to the input video file"
                    },
                    "format": {
                        "type": "string",
                        "enum": ["sbs", "anaglyph", "vr"],
                        "description": "3D output format: sbs (side-by-side), anaglyph (red/cyan), vr (360)"
                    },
                    "depth_method": {
                        "type": "string",
                        "enum": ["ai", "disparity", "shift"],
                        "description": "Depth estimation method",
                        "default": "shift"
                    },
                    "disparity_scale": {
                        "type": "number",
                        "description": "Disparity scale (0.5-2.0)",
                        "default": 1.0
                    },
                    "output": {
                        "type": "string",
                        "default": "output"
                    }
                },
                "required": ["video", "format"]
            }
        ),
        
        Tool(
            name="videogen_concat_videos",
            description="Concatenate multiple videos into one.",
            inputSchema={
                "type": "object",
                "properties": {
                    "videos": {
                        "type": "array",
                        "items": {"type": "string"},
                        "description": "List of video file paths to concatenate"
                    },
                    "output": {
                        "type": "string",
                        "default": "output"
                    }
                },
                "required": ["videos"]
            }
        ),
        
        Tool(
            name="videogen_show_model",
            description="Show detailed information about a specific model.",
            inputSchema={
                "type": "object",
                "properties": {
                    "model": {
                        "type": "string",
                        "description": "Model ID (number) or model name"
                    }
                },
                "required": ["model"]
            }
        ),
        
        Tool(
            name="videogen_update_models",
            description="Update the model database from HuggingFace. Use this to get the latest available models. IMPORTANT: Run this before using videogen for the first time, or when you need new models.",
            inputSchema={
                "type": "object",
                "properties": {}
            }
        ),
        
        Tool(
            name="videogen_search_models",
            description="Search HuggingFace for models matching a query.",
            inputSchema={
                "type": "object",
                "properties": {
                    "query": {
                        "type": "string",
                        "description": "Search query (e.g., 'video generation', 'nsfw', 'anime')"
                    },
                    "limit": {
                        "type": "integer",
                        "description": "Maximum number of results",
                        "default": 20
                    }
                },
                "required": ["query"]
            }
        ),
        
        Tool(
            name="videogen_add_model",
            description="Add a model from HuggingFace to the local database.",
            inputSchema={
                "type": "object",
                "properties": {
                    "model_id": {
                        "type": "string",
                        "description": "HuggingFace model ID (e.g., stabilityai/stable-video-diffusion-img2vid-xt-1.1)"
                    },
                    "name": {
                        "type": "string",
                        "description": "Short name for the model (optional, auto-generated if not provided)"
                    }
                },
                "required": ["model_id"]
            }
        ),
        
        Tool(
            name="videogen_disable_model",
            description="Disable a model from auto-selection.",
            inputSchema={
                "type": "object",
                "properties": {
                    "model": {
                        "type": "string",
                        "description": "Model ID (number), name, or HuggingFace ID to disable"
                    }
                },
                "required": ["model"]
            }
        ),
        
        Tool(
            name="videogen_enable_model",
            description="Enable a model for auto-selection.",
            inputSchema={
                "type": "object",
                "properties": {
                    "model": {
                        "type": "string",
                        "description": "Model ID (number), name, or HuggingFace ID to enable"
                    }
                },
                "required": ["model"]
            }
        ),
        
        Tool(
            name="videogen_list_cached_models",
            description="List locally cached HuggingFace models with their sizes.",
            inputSchema={
                "type": "object",
                "properties": {}
            }
        ),
        
        Tool(
            name="videogen_remove_cached_model",
            description="Remove a specific model from the local HuggingFace cache.",
            inputSchema={
                "type": "object",
                "properties": {
                    "model_id": {
                        "type": "string",
                        "description": "HuggingFace model ID to remove from cache (e.g., stabilityai/stable-video-diffusion-img2vid-xt-1.1)"
                    }
                },
                "required": ["model_id"]
            }
        ),
        
        Tool(
            name="videogen_clear_cache",
            description="Clear the entire local HuggingFace cache.",
            inputSchema={
                "type": "object",
                "properties": {}
            }
        ),
        
        Tool(
            name="videogen_allow_bigger_models",
            description="Allow models larger than available VRAM by using system RAM for offloading (implies sequential offload strategy).",
            inputSchema={
                "type": "object",
                "properties": {
                    "prompt": {
                        "type": "string",
                        "description": "Description of the video to generate"
                    },
                    "output": {
                        "type": "string",
                        "description": "Output filename",
                        "default": "output"
                    },
                    "model": {
                        "type": "string",
                        "description": "Specific model to use (optional, auto-selected if not provided)"
                    },
                    "length": {
                        "type": "number",
                        "description": "Video duration in seconds",
                        "default": 5.0
                    },
                    "width": {
                        "type": "integer",
                        "description": "Video width in pixels",
                        "default": 832
                    },
                    "height": {
                        "type": "integer",
                        "description": "Video height in pixels",
                        "default": 480
                    },
                    "fps": {
                        "type": "integer",
                        "description": "Frames per second",
                        "default": 15
                    },
                    "seed": {
                        "type": "integer",
                        "description": "Random seed for reproducibility (-1 for random)",
                        "default": -1
                    },
                    "auto": {
                        "type": "boolean",
                        "description": "Use automatic mode (recommended)",
                        "default": True
                    },
                    "no_filter": {
                        "type": "boolean",
                        "description": "Disable NSFW filter",
                        "default": False
                    }
                },
                "required": ["prompt"]
            }
        ),
        
        Tool(
            name="videogen_list_tts_voices",
            description="List all available TTS voices for audio generation.",
            inputSchema={
                "type": "object",
                "properties": {}
            }
        ),
        
        Tool(
            name="videogen_transcribe_video",
            description="Transcribe audio from a video using Whisper AI. Extracts spoken text with timestamps.",
            inputSchema={
                "type": "object",
                "properties": {
                    "video": {
                        "type": "string",
                        "description": "Path to the input video file"
                    },
                    "model_size": {
                        "type": "string",
                        "enum": ["tiny", "base", "small", "medium", "large"],
                        "description": "Whisper model size (larger = more accurate, slower)",
                        "default": "base"
                    },
                    "language": {
                        "type": "string",
                        "description": "Source language code (e.g., en, es, fr). Auto-detected if not specified."
                    }
                },
                "required": ["video"]
            }
        ),
        
        Tool(
            name="videogen_create_subtitles",
            description="Create SRT subtitles from video audio. Optionally translate to another language.",
            inputSchema={
                "type": "object",
                "properties": {
                    "video": {
                        "type": "string",
                        "description": "Path to the input video file"
                    },
                    "target_lang": {
                        "type": "string",
                        "description": "Target language code for translation (e.g., en, es, fr, de, zh, ja)"
                    },
                    "source_lang": {
                        "type": "string",
                        "description": "Source language code. Auto-detected if not specified."
                    },
                    "model_size": {
                        "type": "string",
                        "enum": ["tiny", "base", "small", "medium", "large"],
                        "default": "base"
                    },
                    "burn": {
                        "type": "boolean",
                        "description": "Burn subtitles into video",
                        "default": False
                    },
                    "output": {
                        "type": "string",
                        "description": "Output video path (if burning) or SRT file path",
                        "default": "output"
                    }
                },
                "required": ["video"]
            }
        ),
        
        Tool(
            name="videogen_dub_video",
            description="Translate and dub a video with voice preservation. Replaces original audio with translated speech while maintaining the original voice characteristics.",
            inputSchema={
                "type": "object",
                "properties": {
                    "video": {
                        "type": "string",
                        "description": "Path to the input video file"
                    },
                    "target_lang": {
                        "type": "string",
                        "description": "Target language code (e.g., en, es, fr, de, zh, ja)"
                    },
                    "source_lang": {
                        "type": "string",
                        "description": "Source language code. Auto-detected if not specified."
                    },
                    "voice_clone": {
                        "type": "boolean",
                        "description": "Use voice cloning to preserve original voice",
                        "default": True
                    },
                    "tts_voice": {
                        "type": "string",
                        "description": "TTS voice to use if not voice cloning (e.g., edge_female_us)"
                    },
                    "model_size": {
                        "type": "string",
                        "enum": ["tiny", "base", "small", "medium", "large"],
                        "default": "base"
                    },
                    "output": {
                        "type": "string",
                        "description": "Output video path",
                        "default": "output"
                    }
                },
                "required": ["video", "target_lang"]
            }
        ),
        
        Tool(
            name="videogen_translate_text",
            description="Translate text between languages using MarianMT models.",
            inputSchema={
                "type": "object",
                "properties": {
                    "text": {
                        "type": "string",
                        "description": "Text to translate"
                    },
                    "source_lang": {
                        "type": "string",
                        "description": "Source language code (e.g., en, es, fr)"
                    },
                    "target_lang": {
                        "type": "string",
                        "description": "Target language code (e.g., en, es, fr)"
                    }
                },
                "required": ["text", "source_lang", "target_lang"]
            }
        ),
        
        # Character Consistency Tools
        Tool(
            name="videogen_create_character",
            description="Create a character profile from reference images for consistent character generation across multiple images/videos.",
            inputSchema={
                "type": "object",
                "properties": {
                    "name": {
                        "type": "string",
                        "description": "Character name (alphanumeric, underscores, hyphens only)"
                    },
                    "reference_images": {
                        "type": "array",
                        "items": {"type": "string"},
                        "description": "List of paths to reference images (1-5 images)"
                    },
                    "description": {
                        "type": "string",
                        "description": "Optional description of the character"
                    }
                },
                "required": ["name", "reference_images"]
            }
        ),
        
        Tool(
            name="videogen_list_characters",
            description="List all saved character profiles.",
            inputSchema={
                "type": "object",
                "properties": {}
            }
        ),
        
        Tool(
            name="videogen_show_character",
            description="Show details of a specific character profile.",
            inputSchema={
                "type": "object",
                "properties": {
                    "name": {
                        "type": "string",
                        "description": "Character profile name"
                    }
                },
                "required": ["name"]
            }
        ),
        
        Tool(
            name="videogen_delete_character",
            description="Delete a character profile.",
            inputSchema={
                "type": "object",
                "properties": {
                    "name": {
                        "type": "string",
                        "description": "Character profile name to delete"
                    }
                },
                "required": ["name"]
            }
        ),
        
        Tool(
            name="videogen_generate_with_character",
            description="Generate an image or video with a specific character using IP-Adapter and/or InstantID for consistency.",
            inputSchema={
                "type": "object",
                "properties": {
                    "prompt": {
                        "type": "string",
                        "description": "Description of what to generate with the character"
                    },
                    "character": {
                        "type": "string",
                        "description": "Character profile name to use"
                    },
                    "model": {
                        "type": "string",
                        "description": "Model name (e.g., flux_dev, sdxl_base)"
                    },
                    "output": {
                        "type": "string",
                        "default": "output"
                    },
                    "use_ipadapter": {
                        "type": "boolean",
                        "description": "Use IP-Adapter for character consistency",
                        "default": True
                    },
                    "use_instantid": {
                        "type": "boolean",
                        "description": "Use InstantID for face identity preservation",
                        "default": False
                    },
                    "ipadapter_scale": {
                        "type": "number",
                        "description": "IP-Adapter influence scale (0.0-1.0)",
                        "default": 0.8
                    },
                    "instantid_scale": {
                        "type": "number",
                        "description": "InstantID influence scale (0.0-1.0)",
                        "default": 0.8
                    },
                    "animate": {
                        "type": "boolean",
                        "description": "Generate video instead of image (I2V)",
                        "default": False
                    }
                },
                "required": ["prompt", "character", "model"]
            }
        ),
        
        Tool(
            name="videogen_generate_with_reference",
            description="Generate an image using reference images directly (without creating a character profile).",
            inputSchema={
                "type": "object",
                "properties": {
                    "prompt": {
                        "type": "string",
                        "description": "Description of what to generate"
                    },
                    "reference_images": {
                        "type": "array",
                        "items": {"type": "string"},
                        "description": "List of paths to reference images"
                    },
                    "model": {
                        "type": "string",
                        "description": "Model name (e.g., flux_dev, sdxl_base)"
                    },
                    "output": {
                        "type": "string",
                        "default": "output"
                    },
                    "ipadapter_scale": {
                        "type": "number",
                        "description": "IP-Adapter influence scale (0.0-1.0)",
                        "default": 0.8
                    },
                    "use_instantid": {
                        "type": "boolean",
                        "description": "Use InstantID for face identity",
                        "default": False
                    }
                },
                "required": ["prompt", "reference_images", "model"]
            }
        ),
    ]


@server.call_tool()
async def call_tool(name: str, arguments: dict) -> list:
    """Handle tool calls"""
    
    if name == "videogen_generate":
        args = []
        
        if arguments.get("auto", True):
            args.append("--auto")
        
        if arguments.get("model"):
            args.extend(["--model", arguments["model"]])
        
        args.extend(["--prompt", arguments["prompt"]])
        args.extend(["--output", arguments.get("output", "output")])
        
        if arguments.get("length"):
            args.extend(["--length", str(arguments["length"])])
        if arguments.get("width"):
            args.extend(["--width", str(arguments["width"])])
        if arguments.get("height"):
            args.extend(["--height", str(arguments["height"])])
        if arguments.get("fps"):
            args.extend(["--fps", str(arguments["fps"])])
        if arguments.get("seed", -1) >= 0:
            args.extend(["--seed", str(arguments["seed"])])
        if arguments.get("no_filter"):
            args.append("--no_filter")
        
        output, code = run_videogen_command(args)
        return [TextContent(type="text", text=output)]
    
    elif name == "videogen_generate_video":
        args = [
            "--model", arguments["model"],
            "--prompt", arguments["prompt"],
            "--output", arguments.get("output", "output"),
            "--length", str(arguments.get("length", 5.0)),
            "--width", str(arguments.get("width", 832)),
            "--height", str(arguments.get("height", 480)),
            "--fps", str(arguments.get("fps", 15)),
        ]
        if arguments.get("seed", -1) >= 0:
            args.extend(["--seed", str(arguments["seed"])])
        
        output, code = run_videogen_command(args)
        return [TextContent(type="text", text=output)]
    
    elif name == "videogen_generate_image":
        args = [
            "--model", arguments["model"],
            "--prompt", arguments["prompt"],
            "--output", arguments.get("output", "output.png"),
            "--width", str(arguments.get("width", 1024)),
            "--height", str(arguments.get("height", 1024)),
            "--image-steps", str(arguments.get("steps", 30)),
        ]
        if arguments.get("seed", -1) >= 0:
            args.extend(["--seed", str(arguments["seed"])])
        
        output, code = run_videogen_command(args)
        return [TextContent(type="text", text=output)]
    
    elif name == "videogen_animate_image":
        args = [
            "--model", arguments["model"],
            "--image", arguments["image"],
            "--prompt", arguments["prompt"],
            "--output", arguments.get("output", "output"),
            "--length", str(arguments.get("length", 5.0)),
            "--fps", str(arguments.get("fps", 15)),
        ]
        output, code = run_videogen_command(args)
        return [TextContent(type="text", text=output)]
    
    elif name == "videogen_transform_image":
        args = [
            "--model", arguments["model"],
            "--image-to-image",
            "--image", arguments["image"],
            "--prompt", arguments["prompt"],
            "--output", arguments.get("output", "output.png"),
            "--strength", str(arguments.get("strength", 0.75)),
        ]
        output, code = run_videogen_command(args)
        return [TextContent(type="text", text=output)]
    
    elif name == "videogen_generate_with_audio":
        args = []
        
        if arguments.get("model"):
            args.extend(["--model", arguments["model"]])
        else:
            args.append("--auto")
        
        args.extend([
            "--prompt", arguments["prompt"],
            "--output", arguments.get("output", "output"),
            "--generate_audio",
            "--audio_type", arguments["audio_type"],
            "--audio_text", arguments["audio_text"],
        ])
        
        if arguments.get("tts_voice"):
            args.extend(["--tts_voice", arguments["tts_voice"]])
        
        if arguments.get("sync_audio", True):
            args.append("--sync_audio")
        
        if arguments.get("lip_sync"):
            args.append("--lip_sync")
        
        output, code = run_videogen_command(args)
        return [TextContent(type="text", text=output)]
    
    elif name == "videogen_list_models":
        args = ["--model-list"]
        filter_type = arguments.get("filter", "all")
        
        if filter_type == "i2v":
            args.append("--i2v-only")
        elif filter_type == "t2v":
            args.append("--t2v-only")
        elif filter_type == "t2i":
            args.append("--t2i-only")
        elif filter_type == "v2v":
            args.append("--v2v-only")
        elif filter_type == "v2i":
            args.append("--v2i-only")
        elif filter_type == "3d":
            args.append("--3d-only")
        elif filter_type == "tts":
            args.append("--tts-only")
        elif filter_type == "audio":
            args.append("--audio-only")
        elif filter_type == "low_vram":
            args.append("--low-vram")
        elif filter_type == "high_vram":
            args.append("--high-vram")
        elif filter_type == "huge_vram":
            args.append("--huge-vram")
        elif filter_type == "nsfw":
            args.append("--nsfw-friendly")
        
        output, code = run_videogen_command(args)
        return [TextContent(type="text", text=output)]
    
    elif name == "videogen_show_model":
        args = ["--show-model", arguments["model"]]
        output, code = run_videogen_command(args)
        return [TextContent(type="text", text=output)]
    
    elif name == "videogen_update_models":
        args = ["--update-models"]
        output, code = run_videogen_command(args, timeout=600)
        return [TextContent(type="text", text=output)]
    
    elif name == "videogen_search_models":
        args = [
            "--search-models", arguments["query"],
            "--search-limit", str(arguments.get("limit", 20))
        ]
        output, code = run_videogen_command(args)
        return [TextContent(type="text", text=output)]
    
    elif name == "videogen_add_model":
        args = ["--add-model", arguments["model_id"]]
        if arguments.get("name"):
            args.extend(["--name", arguments["name"]])
        output, code = run_videogen_command(args)
        return [TextContent(type="text", text=output)]
    
    elif name == "videogen_disable_model":
        args = ["--disable-model", arguments["model"]]
        output, code = run_videogen_command(args)
        return [TextContent(type="text", text=output)]
    
    elif name == "videogen_enable_model":
        args = ["--enable-model", arguments["model"]]
        output, code = run_videogen_command(args)
        return [TextContent(type="text", text=output)]
    
    elif name == "videogen_list_cached_models":
        args = ["--list-cached-models"]
        output, code = run_videogen_command(args)
        return [TextContent(type="text", text=output)]
    
    elif name == "videogen_remove_cached_model":
        args = ["--remove-cached-model", arguments["model_id"]]
        output, code = run_videogen_command(args)
        return [TextContent(type="text", text=output)]
    
    elif name == "videogen_clear_cache":
        args = ["--clear-cache"]
        output, code = run_videogen_command(args)
        return [TextContent(type="text", text=output)]
    
    elif name == "videogen_allow_bigger_models":
        args = []
        
        if arguments.get("auto", True):
            args.append("--auto")
        
        if arguments.get("model"):
            args.extend(["--model", arguments["model"]])
        
        args.extend(["--prompt", arguments["prompt"]])
        args.extend(["--output", arguments.get("output", "output")])
        
        if arguments.get("length"):
            args.extend(["--length", str(arguments["length"])])
        if arguments.get("width"):
            args.extend(["--width", str(arguments["width"])])
        if arguments.get("height"):
            args.extend(["--height", str(arguments["height"])])
        if arguments.get("fps"):
            args.extend(["--fps", str(arguments["fps"])])
        if arguments.get("seed", -1) >= 0:
            args.extend(["--seed", str(arguments["seed"])])
        if arguments.get("no_filter"):
            args.append("--no_filter")
        
        args.append("--allow-bigger-models")
        
        output, code = run_videogen_command(args)
        return [TextContent(type="text", text=output)]
    
    elif name == "videogen_list_tts_voices":
        args = ["--tts-list"]
        output, code = run_videogen_command(args)
        return [TextContent(type="text", text=output)]
    
    elif name == "videogen_video_to_video":
        args = [
            "--video", arguments["video"],
            "--video-to-video",
            "--prompt", arguments["prompt"],
            "--output", arguments.get("output", "output"),
            "--v2v-strength", str(arguments.get("strength", 0.75)),
            "--v2v-fps", str(arguments.get("fps", 15)),
        ]
        output, code = run_videogen_command(args, timeout=3600)
        return [TextContent(type="text", text=output)]
    
    elif name == "videogen_apply_video_filter":
        args = [
            "--video", arguments["video"],
            "--video-filter", arguments["filter"],
            "--output", arguments.get("output", "output"),
        ]
        if arguments.get("params"):
            args.extend(["--filter-params", arguments["params"]])
        output, code = run_videogen_command(args, timeout=1800)
        return [TextContent(type="text", text=output)]
    
    elif name == "videogen_extract_frames":
        mode = arguments.get("mode", "keyframes")
        args = ["--video", arguments["video"]]
        
        if mode == "single":
            args.append("--extract-frame")
            if arguments.get("timestamp"):
                args.extend(["--timestamp", str(arguments["timestamp"])])
            if arguments.get("frame_number"):
                args.extend(["--frame-number", str(arguments["frame_number"])])
        elif mode == "keyframes":
            args.append("--extract-keyframes")
            if arguments.get("max_frames"):
                args.extend(["--max-keyframes", str(arguments["max_frames"])])
        else:  # all
            args.append("--extract-frames")
            if arguments.get("max_frames"):
                args.extend(["--v2v-max-frames", str(arguments["max_frames"])])
        
        if arguments.get("output_dir"):
            args.extend(["--frames-dir", arguments["output_dir"]])
        
        output, code = run_videogen_command(args)
        return [TextContent(type="text", text=output)]
    
    elif name == "videogen_create_collage":
        args = [
            "--video", arguments["video"],
            "--video-collage",
            "--collage-grid", arguments.get("grid", "4x4"),
            "--collage-method", arguments.get("method", "uniform"),
            "--output", arguments.get("output", "collage.png"),
        ]
        output, code = run_videogen_command(args)
        return [TextContent(type="text", text=output)]
    
    elif name == "videogen_upscale_video":
        args = [
            "--video", arguments["video"],
            "--upscale-video",
            "--upscale-factor", str(arguments.get("scale", 2.0)),
            "--upscale-method", arguments.get("method", "ffmpeg"),
            "--output", arguments.get("output", "output"),
        ]
        output, code = run_videogen_command(args, timeout=3600)
        return [TextContent(type="text", text=output)]
    
    elif name == "videogen_convert_3d":
        format_type = arguments["format"]
        args = [
            "--video", arguments["video"],
            "--output", arguments.get("output", "output"),
        ]
        
        if format_type == "sbs":
            args.append("--convert-3d-sbs")
        elif format_type == "anaglyph":
            args.append("--convert-3d-anaglyph")
        elif format_type == "vr":
            args.append("--convert-vr")
        
        if arguments.get("depth_method"):
            args.extend(["--depth-method", arguments["depth_method"]])
        if arguments.get("disparity_scale"):
            args.extend(["--disparity-scale", str(arguments["disparity_scale"])])
        
        output, code = run_videogen_command(args, timeout=3600)
        return [TextContent(type="text", text=output)]
    
    elif name == "videogen_concat_videos":
        videos = arguments["videos"]
        args = ["--concat-videos"] + videos + ["--output", arguments.get("output", "output")]
        output, code = run_videogen_command(args)
        return [TextContent(type="text", text=output)]
    
    elif name == "videogen_transcribe_video":
        args = [
            "--video", arguments["video"],
            "--transcribe",
            "--whisper-model", arguments.get("model_size", "base"),
        ]
        if arguments.get("language"):
            args.extend(["--source-lang", arguments["language"]])
        output, code = run_videogen_command(args, timeout=1800)
        return [TextContent(type="text", text=output)]
    
    elif name == "videogen_create_subtitles":
        args = [
            "--video", arguments["video"],
            "--create-subtitles",
            "--whisper-model", arguments.get("model_size", "base"),
            "--output", arguments.get("output", "output"),
        ]
        if arguments.get("source_lang"):
            args.extend(["--source-lang", arguments["source_lang"]])
        if arguments.get("target_lang"):
            args.extend(["--target-lang", arguments["target_lang"], "--translate-subtitles"])
        if arguments.get("burn"):
            args.append("--burn-subtitles")
        output, code = run_videogen_command(args, timeout=1800)
        return [TextContent(type="text", text=output)]
    
    elif name == "videogen_dub_video":
        args = [
            "--video", arguments["video"],
            "--dub-video",
            "--target-lang", arguments["target_lang"],
            "--whisper-model", arguments.get("model_size", "base"),
            "--output", arguments.get("output", "output"),
        ]
        if arguments.get("source_lang"):
            args.extend(["--source-lang", arguments["source_lang"]])
        if arguments.get("voice_clone", True):
            args.append("--voice-clone")
        else:
            args.append("--no-voice-clone")
        if arguments.get("tts_voice"):
            args.extend(["--tts_voice", arguments["tts_voice"]])
        output, code = run_videogen_command(args, timeout=3600)
        return [TextContent(type="text", text=output)]
    
    elif name == "videogen_translate_text":
        # This is a direct translation without video - we'll use the main script's translation
        args = [
            "--translate-text", arguments["text"],
            "--source-lang", arguments["source_lang"],
            "--target-lang", arguments["target_lang"],
        ]
        output, code = run_videogen_command(args)
        return [TextContent(type="text", text=output)]
    
    # Character Consistency Tools
    elif name == "videogen_create_character":
        args = [
            "--create-character", arguments["name"],
        ]
        # Add reference images
        for img in arguments["reference_images"][:5]:  # Max 5 images
            args.extend(["--character-images", img])
        if arguments.get("description"):
            args.extend(["--character-desc", arguments["description"]])
        output, code = run_videogen_command(args)
        return [TextContent(type="text", text=output)]
    
    elif name == "videogen_list_characters":
        args = ["--list-characters"]
        output, code = run_videogen_command(args)
        return [TextContent(type="text", text=output)]
    
    elif name == "videogen_show_character":
        args = ["--show-character", arguments["name"]]
        output, code = run_videogen_command(args)
        return [TextContent(type="text", text=output)]
    
    elif name == "videogen_delete_character":
        args = ["--delete-character", arguments["name"]]
        output, code = run_videogen_command(args)
        return [TextContent(type="text", text=output)]
    
    elif name == "videogen_generate_with_character":
        args = [
            "--model", arguments["model"],
            "--character", arguments["character"],
            "--prompt", arguments["prompt"],
            "--output", arguments.get("output", "output"),
        ]
        
        # IP-Adapter options
        if arguments.get("use_ipadapter", True):
            args.append("--ipadapter")
            if arguments.get("ipadapter_scale"):
                args.extend(["--ipadapter-scale", str(arguments["ipadapter_scale"])])
        
        # InstantID options
        if arguments.get("use_instantid", False):
            args.append("--instantid")
            if arguments.get("instantid_scale"):
                args.extend(["--instantid-scale", str(arguments["instantid_scale"])])
        
        # Animate for I2V
        if arguments.get("animate", False):
            args.append("--image_to_video")
        
        output, code = run_videogen_command(args)
        return [TextContent(type="text", text=output)]
    
    elif name == "videogen_generate_with_reference":
        args = [
            "--model", arguments["model"],
            "--prompt", arguments["prompt"],
            "--output", arguments.get("output", "output"),
        ]
        
        # Add reference images
        for img in arguments["reference_images"]:
            args.extend(["--reference-images", img])
        
        # IP-Adapter options
        args.append("--ipadapter")
        if arguments.get("ipadapter_scale"):
            args.extend(["--ipadapter-scale", str(arguments["ipadapter_scale"])])
        
        # InstantID options
        if arguments.get("use_instantid", False):
            args.append("--instantid")
        
        output, code = run_videogen_command(args)
        return [TextContent(type="text", text=output)]
    
    else:
        return [TextContent(type="text", text=f"Unknown tool: {name}")]


async def main():
    """Main entry point"""
    async with stdio_server() as (read_stream, write_stream):
        await server.run(
            read_stream,
            write_stream,
            server.create_initialization_options()
        )


if __name__ == "__main__":
    asyncio.run(main())