"""
Common processing utilities for data anonymization
Shared functions and data structures
"""

import re
from typing import Dict, List, Any, Union, Optional
from pydantic import BaseModel
from dataclasses import dataclass

@dataclass
class ProcessResult:
    """Result of content processing"""
    data: Any
    mapping: Dict[str, str]
    replaced_fields: List[str]
    processed_info: Dict[str, Any]  # Additional processing information

class NeutralizationAttribute(BaseModel):
    """Single attribute describing a replacement mapping."""
    original: str
    placeholder: str
    patternType: Optional[str] = None

class NeutralizationResult(BaseModel):
    """Unified result for all content types, suitable for API responses."""
    neutralized_text: str
    mapping: Dict[str, str]
    attributes: List[NeutralizationAttribute]
    processed_info: Dict[str, Any]

class CommonUtils:
    """Common utility functions for data processing"""
    
    @staticmethod
    def normalize_whitespace(text: str) -> str:
        """
        Normalize whitespace in text
        
        Args:
            text: Text to normalize
            
        Returns:
            str: Normalized text
        """
        text = re.sub(r'\s+', ' ', text)
        text = text.replace('\r\n', '\n').replace('\r', '\n')
        return text.strip()
    
    @staticmethod
    def is_table_line(line: str) -> bool:
        """
        Check if a line represents a table row
        
        Args:
            line: Line to check
            
        Returns:
            bool: True if line is a table row
        """
        return bool(re.match(r'^\s*[^:]+:\s*[^:]+$', line) or 
                   re.match(r'^\s*[^\t]+\t[^\t]+$', line))
    
    @staticmethod
    def detect_content_type(content: str) -> str:
        """
        Detect the type of content based on its structure
        
        Args:
            content: Content to analyze
            
        Returns:
            str: Content type ('csv', 'json', 'xml', 'text', 'binary')
        """
        content = content.strip()
        
        # Check for JSON
        if content.startswith('{') and content.endswith('}'):
            return 'json'
        if content.startswith('[') and content.endswith(']'):
            return 'json'
        
        # Check for XML
        if content.startswith('<') and content.endswith('>'):
            return 'xml'
        
        # Check for CSV (has commas and newlines)
        if ',' in content and '\n' in content:
            lines = content.split('\n')
            if len(lines) > 1 and all(',' in line for line in lines[:3]):
                return 'csv'
        
        # Check for binary
        if len(content) > 100 and '\x00' in content:
            return 'binary'
        
        # Default to text
        return 'text'
    
    @staticmethod
    def merge_mappings(*mappings: Dict[str, str]) -> Dict[str, str]:
        """
        Merge multiple mapping dictionaries
        
        Args:
            *mappings: Mapping dictionaries to merge
            
        Returns:
            Dict[str, str]: Merged mapping dictionary
        """
        merged = {}
        for mapping in mappings:
            merged.update(mapping)
        return merged
    
    @staticmethod
    def create_placeholder(placeholder_type: str, placeholder_id: str) -> str:
        """
        Create a placeholder string in the format [type.uuid]
        
        Args:
            placeholder_type: Type of placeholder (email, phone, name, etc.)
            placeholder_id: Unique identifier for the placeholder
            
        Returns:
            str: Formatted placeholder string
        """
        return f"[{placeholder_type}.{placeholder_id}]"
    
    @staticmethod
    def validate_placeholder(placeholder: str) -> bool:
        """
        Validate if a string is a valid placeholder
        
        Args:
            placeholder: String to validate
            
        Returns:
            bool: True if valid placeholder
        """
        return bool(re.match(r'^\[[a-z]+\.[a-f0-9-]+\]$', placeholder))
    
    @staticmethod
    def extract_placeholder_info(placeholder: str) -> Optional[tuple]:
        """
        Extract type and ID from a placeholder
        
        Args:
            placeholder: Placeholder string
            
        Returns:
            Optional[tuple]: (type, id) or None if invalid
        """
        match = re.match(r'^\[([a-z]+)\.([a-f0-9-]+)\]$', placeholder)
        if match:
            return match.group(1), match.group(2)
        return None