import pandas as pd
import logging
import re
import uuid
from datetime import datetime
from typing import Dict, List, Tuple, Optional
from app import db
from app.models import Match, MatchOutcome
from app.utils.logging import log_file_operation, log_database_operation

logger = logging.getLogger(__name__)

class FixtureParser:
    """Parse CSV/XLSX fixture files with intelligent column detection"""
    
    # Required columns mapping
    REQUIRED_COLUMNS = {
        'match_number': ['match #', 'match_number', 'match no', 'match_no', 'match'],
        'fighter1': ['fighter1 (township)', 'fighter1', 'fighter 1', 'fighter_1'],
        'fighter2': ['fighter2 (township)', 'fighter2', 'fighter 2', 'fighter_2'],
        'venue': ['venue (kampala township)', 'venue', 'location', 'kampala township']
    }
    
    def __init__(self):
        self.supported_formats = ['.csv', '.xlsx', '.xls']
        self.encoding_options = ['utf-8', 'latin-1', 'cp1252', 'iso-8859-1']
    
    def detect_file_format(self, file_path: str) -> str:
        """
        Detect file format from extension
        
        Args:
            file_path: Path to the file
        
        Returns:
            str: File format ('csv', 'xlsx', 'xls')
        """
        extension = file_path.lower().split('.')[-1]
        if extension in ['xlsx', 'xls']:
            return extension
        return 'csv'
    
    def read_file(self, file_path: str) -> Optional[pd.DataFrame]:
        """
        Read file into pandas DataFrame with format detection
        
        Args:
            file_path: Path to the file
        
        Returns:
            pd.DataFrame or None: Parsed data or None if failed
        """
        try:
            file_format = self.detect_file_format(file_path)
            
            if file_format == 'csv':
                # Try different encodings for CSV
                for encoding in self.encoding_options:
                    try:
                        df = pd.read_csv(file_path, encoding=encoding)
                        logger.info(f"Successfully read CSV with encoding: {encoding}")
                        return df
                    except UnicodeDecodeError:
                        continue
                    except Exception as e:
                        logger.error(f"CSV read error with {encoding}: {str(e)}")
                        continue
                
                # If all encodings fail, try with error handling
                try:
                    df = pd.read_csv(file_path, encoding='utf-8', errors='replace')
                    logger.warning("Read CSV with character replacement due to encoding issues")
                    return df
                except Exception as e:
                    logger.error(f"Final CSV read attempt failed: {str(e)}")
                    return None
            
            elif file_format in ['xlsx', 'xls']:
                try:
                    # Try to read Excel file
                    df = pd.read_excel(file_path, engine='openpyxl' if file_format == 'xlsx' else 'xlrd')
                    logger.info(f"Successfully read {file_format.upper()} file")
                    return df
                except Exception as e:
                    logger.error(f"Excel read error: {str(e)}")
                    return None
            
            else:
                logger.error(f"Unsupported file format: {file_format}")
                return None
                
        except Exception as e:
            logger.error(f"File read error: {str(e)}")
            return None
    
    def normalize_column_name(self, column_name: str) -> str:
        """
        Normalize column name for comparison
        
        Args:
            column_name: Original column name
        
        Returns:
            str: Normalized column name
        """
        if pd.isna(column_name):
            return ''
        
        # Convert to string and lowercase
        normalized = str(column_name).lower().strip()
        
        # Remove extra whitespace and special characters
        normalized = re.sub(r'\s+', ' ', normalized)
        normalized = re.sub(r'[^\w\s()]', '', normalized)
        
        return normalized
    
    def detect_required_columns(self, df: pd.DataFrame) -> Dict[str, str]:
        """
        Detect required columns in the DataFrame
        
        Args:
            df: Input DataFrame
        
        Returns:
            dict: Mapping of required field to actual column name
        """
        column_mapping = {}
        normalized_columns = {self.normalize_column_name(col): col for col in df.columns}
        
        for field, possible_names in self.REQUIRED_COLUMNS.items():
            found = False
            for possible_name in possible_names:
                normalized_possible = self.normalize_column_name(possible_name)
                if normalized_possible in normalized_columns:
                    column_mapping[field] = normalized_columns[normalized_possible]
                    found = True
                    break
            
            if not found:
                logger.warning(f"Required column not found for field: {field}")
                # Try partial matching
                for col_name in normalized_columns:
                    for possible_name in possible_names:
                        if possible_name.split()[0] in col_name:
                            column_mapping[field] = normalized_columns[col_name]
                            logger.info(f"Found partial match for {field}: {col_name}")
                            found = True
                            break
                    if found:
                        break
        
        return column_mapping
    
    def detect_outcome_columns(self, df: pd.DataFrame, required_columns: Dict[str, str]) -> List[str]:
        """
        Detect optional outcome columns (numeric columns not in required set)
        
        Args:
            df: Input DataFrame
            required_columns: Already detected required columns
        
        Returns:
            list: List of outcome column names
        """
        outcome_columns = []
        required_column_names = set(required_columns.values())
        
        for column in df.columns:
            if column not in required_column_names:
                # Check if column contains numeric data
                try:
                    # Try to convert to numeric, ignoring errors
                    numeric_series = pd.to_numeric(df[column], errors='coerce')
                    # If more than 50% of non-null values are numeric, consider it an outcome column
                    non_null_count = numeric_series.count()
                    total_non_null = df[column].count()
                    
                    if total_non_null > 0 and (non_null_count / total_non_null) >= 0.5:
                        outcome_columns.append(column)
                        logger.info(f"Detected outcome column: {column}")
                except Exception:
                    continue
        
        return outcome_columns
    
    def validate_required_data(self, df: pd.DataFrame, column_mapping: Dict[str, str]) -> Tuple[bool, List[str]]:
        """
        Validate that all required columns are present and have data
        
        Args:
            df: Input DataFrame
            column_mapping: Mapping of required fields to column names
        
        Returns:
            tuple: (is_valid, list_of_errors)
        """
        errors = []
        
        # Check if all required fields are mapped
        for field in self.REQUIRED_COLUMNS.keys():
            if field not in column_mapping:
                errors.append(f"Required field '{field}' not found in file")
        
        if errors:
            return False, errors
        
        # Check for data in required columns
        for field, column_name in column_mapping.items():
            if column_name not in df.columns:
                errors.append(f"Column '{column_name}' not found in DataFrame")
                continue
            
            # Check for empty values
            null_count = df[column_name].isnull().sum()
            if null_count > 0:
                errors.append(f"Column '{column_name}' has {null_count} empty values")
            
            # Special validation for match_number (should be integers)
            if field == 'match_number':
                try:
                    # Try to convert to numeric
                    numeric_values = pd.to_numeric(df[column_name], errors='coerce')
                    invalid_count = numeric_values.isnull().sum()
                    if invalid_count > 0:
                        errors.append(f"Match number column has {invalid_count} non-numeric values")
                except Exception as e:
                    errors.append(f"Match number validation failed: {str(e)}")
        
        return len(errors) == 0, errors
    
    def parse_fixture_file(self, file_path: str, filename: str, user_id: int) -> Tuple[bool, str, List[Dict]]:
        """
        Parse fixture file and extract match data
        
        Args:
            file_path: Path to the fixture file
            filename: Original filename
            user_id: ID of user uploading the file
        
        Returns:
            tuple: (success, error_message, parsed_matches)
        """
        try:
            log_file_operation('FIXTURE_PARSE_START', filename, user_id=user_id)
            
            # Read file
            df = self.read_file(file_path)
            if df is None:
                error_msg = "Failed to read fixture file"
                log_file_operation('FIXTURE_PARSE_FAILED', filename, user_id=user_id, 
                                 status='FAILED', error_message=error_msg)
                return False, error_msg, []
            
            logger.info(f"Read {len(df)} rows from fixture file")
            
            # Remove completely empty rows
            df = df.dropna(how='all')
            
            if len(df) == 0:
                error_msg = "No data found in fixture file"
                log_file_operation('FIXTURE_PARSE_FAILED', filename, user_id=user_id,
                                 status='FAILED', error_message=error_msg)
                return False, error_msg, []
            
            # Detect required columns
            column_mapping = self.detect_required_columns(df)
            
            # Validate required data
            is_valid, validation_errors = self.validate_required_data(df, column_mapping)
            if not is_valid:
                error_msg = f"Validation failed: {'; '.join(validation_errors)}"
                log_file_operation('FIXTURE_PARSE_FAILED', filename, user_id=user_id,
                                 status='FAILED', error_message=error_msg)
                return False, error_msg, []
            
            # Detect outcome columns
            outcome_columns = self.detect_outcome_columns(df, column_mapping)
            
            # Parse matches
            parsed_matches = []
            
            for index, row in df.iterrows():
                try:
                    # Extract required fields
                    match_data = {
                        'match_number': int(pd.to_numeric(row[column_mapping['match_number']])),
                        'fighter1_township': str(row[column_mapping['fighter1']]).strip(),
                        'fighter2_township': str(row[column_mapping['fighter2']]).strip(),
                        'venue_kampala_township': str(row[column_mapping['venue']]).strip(),
                        'filename': filename,
                        'created_by': user_id,
                        'outcomes': {}
                    }
                    
                    # Extract outcome data
                    for outcome_col in outcome_columns:
                        try:
                            value = pd.to_numeric(row[outcome_col], errors='coerce')
                            if not pd.isna(value):
                                match_data['outcomes'][outcome_col] = float(value)
                        except Exception as e:
                            logger.warning(f"Failed to parse outcome {outcome_col} for match {match_data['match_number']}: {str(e)}")
                    
                    parsed_matches.append(match_data)
                    
                except Exception as e:
                    logger.error(f"Failed to parse row {index}: {str(e)}")
                    continue
            
            if not parsed_matches:
                error_msg = "No valid matches found in fixture file"
                log_file_operation('FIXTURE_PARSE_FAILED', filename, user_id=user_id,
                                 status='FAILED', error_message=error_msg)
                return False, error_msg, []
            
            log_file_operation('FIXTURE_PARSE_SUCCESS', filename, user_id=user_id,
                             extra_data={
                                 'matches_parsed': len(parsed_matches),
                                 'outcome_columns': outcome_columns
                             })
            
            logger.info(f"Successfully parsed {len(parsed_matches)} matches from {filename}")
            return True, None, parsed_matches
            
        except Exception as e:
            error_msg = f"Fixture parsing failed: {str(e)}"
            logger.error(error_msg)
            log_file_operation('FIXTURE_PARSE_ERROR', filename, user_id=user_id,
                             status='ERROR', error_message=error_msg)
            return False, error_msg, []
    
    def save_matches_to_database(self, parsed_matches: List[Dict], file_sha1sum: str) -> Tuple[bool, str, List[int]]:
        """
        Save parsed matches to database
        
        Args:
            parsed_matches: List of parsed match data
            file_sha1sum: SHA1 checksum of the fixture file
        
        Returns:
            tuple: (success, error_message, list_of_match_ids)
        """
        try:
            saved_match_ids = []
            
            # Generate a single fixture_id for all matches from this file upload
            fixture_id = str(uuid.uuid4())
            logger.info(f"Generated fixture_id {fixture_id} for {len(parsed_matches)} matches")
            
            for match_data in parsed_matches:
                try:
                    # Check if match number already exists
                    existing_match = Match.query.filter_by(match_number=match_data['match_number']).first()
                    if existing_match:
                        logger.warning(f"Match number {match_data['match_number']} already exists, skipping")
                        continue
                    
                    # Create match record with shared fixture_id
                    match = Match(
                        match_number=match_data['match_number'],
                        fighter1_township=match_data['fighter1_township'],
                        fighter2_township=match_data['fighter2_township'],
                        venue_kampala_township=match_data['venue_kampala_township'],
                        filename=match_data['filename'],
                        file_sha1sum=file_sha1sum,
                        fixture_id=fixture_id,  # Use shared fixture_id
                        created_by=match_data['created_by']
                    )
                    
                    db.session.add(match)
                    db.session.flush()  # Get the ID without committing
                    
                    # Add outcome records
                    for outcome_name, outcome_value in match_data['outcomes'].items():
                        outcome = match.add_outcome(outcome_name, outcome_value)
                        db.session.add(outcome)
                    
                    saved_match_ids.append(match.id)
                    
                    log_database_operation('CREATE', 'matches', match.id, 
                                         user_id=match_data['created_by'])
                    
                except Exception as e:
                    logger.error(f"Failed to save match {match_data.get('match_number', 'unknown')}: {str(e)}")
                    db.session.rollback()
                    continue
            
            # Commit all changes
            db.session.commit()
            
            logger.info(f"Successfully saved {len(saved_match_ids)} matches to database")
            return True, None, saved_match_ids
            
        except Exception as e:
            db.session.rollback()
            error_msg = f"Database save failed: {str(e)}"
            logger.error(error_msg)
            return False, error_msg, []
    
    def get_parsing_statistics(self) -> Dict:
        """Get fixture parsing statistics"""
        try:
            from app.models import Match, MatchOutcome
            stats = {
                'total_matches': Match.query.count(),
                'active_matches': Match.query.filter_by(active_status=True).count(),
                'pending_zip_uploads': Match.query.filter_by(zip_upload_status='pending').count(),
                'completed_uploads': Match.query.filter_by(zip_upload_status='completed').count(),
                'failed_uploads': Match.query.filter_by(zip_upload_status='failed').count(),
                'total_outcomes': MatchOutcome.query.count()
            }
            
            # Get unique filenames
            unique_files = db.session.query(Match.filename).distinct().count()
            stats['unique_fixture_files'] = unique_files
            
            return stats
            
        except Exception as e:
            logger.error(f"Statistics calculation error: {str(e)}")
            return {}

# Global fixture parser instance
fixture_parser = FixtureParser()

def get_fixture_parser():
    """Get fixture parser instance"""
    return fixture_parser