#!/usr/bin/env python3
"""
VideoGen MCP Server - Model Context Protocol wrapper for VideoGen

Copyleft © 2026 Stefy <stefy@nexlab.net>

This MCP server exposes VideoGen functionality to AI agents through the
Model Context Protocol, enabling seamless integration with Claude and other
MCP-compatible AI systems.

Installation:
    pip install mcp

Usage:
    python3 videogen_mcp_server.py

Or add to Claude Desktop config:
    {
        "mcpServers": {
            "videogen": {
                "command": "python3",
                "args": ["/path/to/videogen_mcp_server.py"]
            }
        }
    }
"""

import asyncio
import json
import os
import subprocess
import sys
from pathlib import Path
from typing import Any, Optional

from mcp.server import Server
from mcp.server.stdio import stdio_server
from mcp.types import Tool, TextContent

# Create server instance
server = Server("videogen")

# Path to videogen script
VIDEOGEN_PATH = Path(__file__).parent / "videogen"


def run_videogen_command(args: list, timeout: int = 3600):
    """Run a videogen command and return output and return code"""
    cmd = [sys.executable, str(VIDEOGEN_PATH)] + args
    
    try:
        result = subprocess.run(
            cmd,
            capture_output=True,
            text=True,
            timeout=timeout,
            cwd=str(VIDEOGEN_PATH.parent)
        )
        return result.stdout + result.stderr, result.returncode
    except subprocess.TimeoutExpired:
        return "Error: Command timed out", 1
    except Exception as e:
        return f"Error: {str(e)}", 1


def parse_model_list(output: str) -> list:
    """Parse model list output into structured data"""
    models = []
    lines = output.strip().split('\n')
    
    for line in lines:
        if line.startswith('ID') or line.startswith('-') or not line.strip():
            continue
        if 'Total shown' in line or 'Use --model' in line:
            continue
        
        parts = line.split()
        if len(parts) >= 8:
            try:
                model = {
                    "id": int(parts[0]),
                    "name": parts[1],
                    "vram": parts[2],
                    "i2v": parts[3] == "Yes",
                    "t2v": parts[4] == "Yes",
                    "t2i": parts[5] == "Yes",
                    "i2i": parts[6] == "Yes",
                    "nsfw": parts[7] == "Yes",
                }
                if len(parts) >= 9:
                    model["lora"] = parts[8] == "Yes"
                models.append(model)
            except (ValueError, IndexError):
                continue
    
    return models


@server.list_tools()
async def list_tools() -> list:
    """List available VideoGen tools"""
    return [
        Tool(
            name="videogen_generate",
            description="Generate a video using VideoGen with automatic mode selection. This is the primary tool for video generation. It automatically detects the best generation type, selects the appropriate model, configures settings, and handles NSFW content detection.",
            inputSchema={
                "type": "object",
                "properties": {
                    "prompt": {
                        "type": "string",
                        "description": "The main prompt describing what to generate"
                    },
                    "output": {
                        "type": "string",
                        "description": "Output filename (without extension)",
                        "default": "output"
                    },
                    "model": {
                        "type": "string",
                        "description": "Specific model to use (optional, auto-selected if not provided)"
                    },
                    "length": {
                        "type": "number",
                        "description": "Video duration in seconds",
                        "default": 5.0
                    },
                    "width": {
                        "type": "integer",
                        "description": "Video width in pixels",
                        "default": 832
                    },
                    "height": {
                        "type": "integer",
                        "description": "Video height in pixels",
                        "default": 480
                    },
                    "fps": {
                        "type": "integer",
                        "description": "Frames per second",
                        "default": 15
                    },
                    "seed": {
                        "type": "integer",
                        "description": "Random seed for reproducibility (-1 for random)",
                        "default": -1
                    },
                    "auto": {
                        "type": "boolean",
                        "description": "Use automatic mode (recommended)",
                        "default": True
                    },
                    "no_filter": {
                        "type": "boolean",
                        "description": "Disable NSFW filter",
                        "default": False
                    }
                },
                "required": ["prompt"]
            }
        ),
        
        Tool(
            name="videogen_generate_video",
            description="Generate a video from text (Text-to-Video). Use this when you specifically want T2V generation without auto-detection.",
            inputSchema={
                "type": "object",
                "properties": {
                    "prompt": {
                        "type": "string",
                        "description": "Description of the video to generate"
                    },
                    "model": {
                        "type": "string",
                        "description": "Model name (use videogen_list_models to see options)"
                    },
                    "output": {
                        "type": "string",
                        "description": "Output filename",
                        "default": "output"
                    },
                    "length": {
                        "type": "number",
                        "description": "Duration in seconds",
                        "default": 5.0
                    },
                    "width": {
                        "type": "integer",
                        "default": 832
                    },
                    "height": {
                        "type": "integer",
                        "default": 480
                    },
                    "fps": {
                        "type": "integer",
                        "default": 15
                    },
                    "seed": {
                        "type": "integer",
                        "default": -1
                    }
                },
                "required": ["prompt", "model"]
            }
        ),
        
        Tool(
            name="videogen_generate_image",
            description="Generate an image from text (Text-to-Image). Use this for static image generation.",
            inputSchema={
                "type": "object",
                "properties": {
                    "prompt": {
                        "type": "string",
                        "description": "Description of the image to generate"
                    },
                    "model": {
                        "type": "string",
                        "description": "Model name (e.g., flux_dev, sdxl_base, pony_v6)"
                    },
                    "output": {
                        "type": "string",
                        "description": "Output filename (should end with .png or .jpg)",
                        "default": "output.png"
                    },
                    "width": {
                        "type": "integer",
                        "default": 1024
                    },
                    "height": {
                        "type": "integer",
                        "default": 1024
                    },
                    "steps": {
                        "type": "integer",
                        "description": "Inference steps",
                        "default": 30
                    },
                    "seed": {
                        "type": "integer",
                        "default": -1
                    }
                },
                "required": ["prompt", "model"]
            }
        ),
        
        Tool(
            name="videogen_animate_image",
            description="Animate an existing image (Image-to-Video). Use this to add motion to a static image.",
            inputSchema={
                "type": "object",
                "properties": {
                    "image": {
                        "type": "string",
                        "description": "Path to the input image file"
                    },
                    "prompt": {
                        "type": "string",
                        "description": "Description of the desired animation"
                    },
                    "model": {
                        "type": "string",
                        "description": "I2V model name (e.g., svd_xt_1.1, wan_14b_i2v)"
                    },
                    "output": {
                        "type": "string",
                        "default": "output"
                    },
                    "length": {
                        "type": "number",
                        "default": 5.0
                    },
                    "fps": {
                        "type": "integer",
                        "default": 15
                    }
                },
                "required": ["image", "prompt", "model"]
            }
        ),
        
        Tool(
            name="videogen_transform_image",
            description="Transform an existing image (Image-to-Image). Use this to modify or restyle an image.",
            inputSchema={
                "type": "object",
                "properties": {
                    "image": {
                        "type": "string",
                        "description": "Path to the input image file"
                    },
                    "prompt": {
                        "type": "string",
                        "description": "Description of the desired transformation"
                    },
                    "model": {
                        "type": "string",
                        "description": "Model name (e.g., flux_dev, sdxl_base)"
                    },
                    "output": {
                        "type": "string",
                        "default": "output.png"
                    },
                    "strength": {
                        "type": "number",
                        "description": "Transformation strength (0.0-1.0)",
                        "default": 0.75
                    }
                },
                "required": ["image", "prompt", "model"]
            }
        ),
        
        Tool(
            name="videogen_generate_with_audio",
            description="Generate a video with audio (TTS or music). Use this for videos with narration or background music.",
            inputSchema={
                "type": "object",
                "properties": {
                    "prompt": {
                        "type": "string",
                        "description": "Description of the video to generate"
                    },
                    "audio_type": {
                        "type": "string",
                        "enum": ["tts", "music"],
                        "description": "Type of audio: tts for speech, music for background music"
                    },
                    "audio_text": {
                        "type": "string",
                        "description": "Text for TTS or prompt for music generation"
                    },
                    "model": {
                        "type": "string",
                        "description": "Model name (optional, auto-selected if not provided)"
                    },
                    "output": {
                        "type": "string",
                        "default": "output"
                    },
                    "tts_voice": {
                        "type": "string",
                        "description": "TTS voice name (e.g., edge_male_us, edge_female_us)",
                        "default": "edge_female_us"
                    },
                    "sync_audio": {
                        "type": "boolean",
                        "description": "Sync audio duration to video",
                        "default": True
                    },
                    "lip_sync": {
                        "type": "boolean",
                        "description": "Apply lip sync (for TTS with I2V)",
                        "default": False
                    }
                },
                "required": ["prompt", "audio_type", "audio_text"]
            }
        ),
        
        Tool(
            name="videogen_list_models",
            description="List all available models. Use this to see what models are available for generation.",
            inputSchema={
                "type": "object",
                "properties": {
                    "filter": {
                        "type": "string",
                        "enum": ["all", "i2v", "t2v", "low_vram", "high_vram", "huge_vram", "nsfw"],
                        "description": "Filter models by type or VRAM requirement",
                        "default": "all"
                    }
                }
            }
        ),
        
        Tool(
            name="videogen_show_model",
            description="Show detailed information about a specific model.",
            inputSchema={
                "type": "object",
                "properties": {
                    "model": {
                        "type": "string",
                        "description": "Model ID (number) or model name"
                    }
                },
                "required": ["model"]
            }
        ),
        
        Tool(
            name="videogen_update_models",
            description="Update the model database from HuggingFace. Use this to get the latest available models. IMPORTANT: Run this before using videogen for the first time, or when you need new models.",
            inputSchema={
                "type": "object",
                "properties": {}
            }
        ),
        
        Tool(
            name="videogen_search_models",
            description="Search HuggingFace for models matching a query.",
            inputSchema={
                "type": "object",
                "properties": {
                    "query": {
                        "type": "string",
                        "description": "Search query (e.g., 'video generation', 'nsfw', 'anime')"
                    },
                    "limit": {
                        "type": "integer",
                        "description": "Maximum number of results",
                        "default": 20
                    }
                },
                "required": ["query"]
            }
        ),
        
        Tool(
            name="videogen_add_model",
            description="Add a model from HuggingFace to the local database.",
            inputSchema={
                "type": "object",
                "properties": {
                    "model_id": {
                        "type": "string",
                        "description": "HuggingFace model ID (e.g., stabilityai/stable-video-diffusion-img2vid-xt-1.1)"
                    },
                    "name": {
                        "type": "string",
                        "description": "Short name for the model (optional, auto-generated if not provided)"
                    }
                },
                "required": ["model_id"]
            }
        ),
        
        Tool(
            name="videogen_list_tts_voices",
            description="List all available TTS voices for audio generation.",
            inputSchema={
                "type": "object",
                "properties": {}
            }
        ),
    ]


@server.call_tool()
async def call_tool(name: str, arguments: dict) -> list:
    """Handle tool calls"""
    
    if name == "videogen_generate":
        args = []
        
        if arguments.get("auto", True):
            args.append("--auto")
        
        if arguments.get("model"):
            args.extend(["--model", arguments["model"]])
        
        args.extend(["--prompt", arguments["prompt"]])
        args.extend(["--output", arguments.get("output", "output")])
        
        if arguments.get("length"):
            args.extend(["--length", str(arguments["length"])])
        if arguments.get("width"):
            args.extend(["--width", str(arguments["width"])])
        if arguments.get("height"):
            args.extend(["--height", str(arguments["height"])])
        if arguments.get("fps"):
            args.extend(["--fps", str(arguments["fps"])])
        if arguments.get("seed", -1) >= 0:
            args.extend(["--seed", str(arguments["seed"])])
        if arguments.get("no_filter"):
            args.append("--no_filter")
        
        output, code = run_videogen_command(args)
        return [TextContent(type="text", text=output)]
    
    elif name == "videogen_generate_video":
        args = [
            "--model", arguments["model"],
            "--prompt", arguments["prompt"],
            "--output", arguments.get("output", "output"),
            "--length", str(arguments.get("length", 5.0)),
            "--width", str(arguments.get("width", 832)),
            "--height", str(arguments.get("height", 480)),
            "--fps", str(arguments.get("fps", 15)),
        ]
        if arguments.get("seed", -1) >= 0:
            args.extend(["--seed", str(arguments["seed"])])
        
        output, code = run_videogen_command(args)
        return [TextContent(type="text", text=output)]
    
    elif name == "videogen_generate_image":
        args = [
            "--model", arguments["model"],
            "--prompt", arguments["prompt"],
            "--output", arguments.get("output", "output.png"),
            "--width", str(arguments.get("width", 1024)),
            "--height", str(arguments.get("height", 1024)),
            "--image-steps", str(arguments.get("steps", 30)),
        ]
        if arguments.get("seed", -1) >= 0:
            args.extend(["--seed", str(arguments["seed"])])
        
        output, code = run_videogen_command(args)
        return [TextContent(type="text", text=output)]
    
    elif name == "videogen_animate_image":
        args = [
            "--model", arguments["model"],
            "--image", arguments["image"],
            "--prompt", arguments["prompt"],
            "--output", arguments.get("output", "output"),
            "--length", str(arguments.get("length", 5.0)),
            "--fps", str(arguments.get("fps", 15)),
        ]
        output, code = run_videogen_command(args)
        return [TextContent(type="text", text=output)]
    
    elif name == "videogen_transform_image":
        args = [
            "--model", arguments["model"],
            "--image-to-image",
            "--image", arguments["image"],
            "--prompt", arguments["prompt"],
            "--output", arguments.get("output", "output.png"),
            "--strength", str(arguments.get("strength", 0.75)),
        ]
        output, code = run_videogen_command(args)
        return [TextContent(type="text", text=output)]
    
    elif name == "videogen_generate_with_audio":
        args = []
        
        if arguments.get("model"):
            args.extend(["--model", arguments["model"]])
        else:
            args.append("--auto")
        
        args.extend([
            "--prompt", arguments["prompt"],
            "--output", arguments.get("output", "output"),
            "--generate_audio",
            "--audio_type", arguments["audio_type"],
            "--audio_text", arguments["audio_text"],
        ])
        
        if arguments.get("tts_voice"):
            args.extend(["--tts_voice", arguments["tts_voice"]])
        
        if arguments.get("sync_audio", True):
            args.append("--sync_audio")
        
        if arguments.get("lip_sync"):
            args.append("--lip_sync")
        
        output, code = run_videogen_command(args)
        return [TextContent(type="text", text=output)]
    
    elif name == "videogen_list_models":
        args = ["--model-list"]
        filter_type = arguments.get("filter", "all")
        
        if filter_type == "i2v":
            args.append("--i2v-only")
        elif filter_type == "t2v":
            args.append("--t2v-only")
        elif filter_type == "low_vram":
            args.append("--low-vram")
        elif filter_type == "high_vram":
            args.append("--high-vram")
        elif filter_type == "huge_vram":
            args.append("--huge-vram")
        elif filter_type == "nsfw":
            args.append("--nsfw-friendly")
        
        output, code = run_videogen_command(args)
        return [TextContent(type="text", text=output)]
    
    elif name == "videogen_show_model":
        args = ["--show-model", arguments["model"]]
        output, code = run_videogen_command(args)
        return [TextContent(type="text", text=output)]
    
    elif name == "videogen_update_models":
        args = ["--update-models"]
        output, code = run_videogen_command(args, timeout=600)
        return [TextContent(type="text", text=output)]
    
    elif name == "videogen_search_models":
        args = [
            "--search-models", arguments["query"],
            "--search-limit", str(arguments.get("limit", 20))
        ]
        output, code = run_videogen_command(args)
        return [TextContent(type="text", text=output)]
    
    elif name == "videogen_add_model":
        args = ["--add-model", arguments["model_id"]]
        if arguments.get("name"):
            args.extend(["--name", arguments["name"]])
        output, code = run_videogen_command(args)
        return [TextContent(type="text", text=output)]
    
    elif name == "videogen_list_tts_voices":
        args = ["--tts-list"]
        output, code = run_videogen_command(args)
        return [TextContent(type="text", text=output)]
    
    else:
        return [TextContent(type="text", text=f"Unknown tool: {name}")]


async def main():
    """Main entry point"""
    async with stdio_server() as (read_stream, write_stream):
        await server.run(
            read_stream,
            write_stream,
            server.create_initialization_options()
        )


if __name__ == "__main__":
    asyncio.run(main())