# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Text processing module for data anonymization
Handles plain text processing without header information
"""

from typing import Dict, List, Any
from dataclasses import dataclass
from .subParseString import StringParser

@dataclass
class PlainText:
    """Repräsentiert normalen Text"""
    content: str
    source_type: str  # 'txt', 'docx', 'text_plain'

class TextProcessor:
    """Handles plain text processing for anonymization"""
    
    def __init__(self, NamesToParse: List[str] = None):
        """
        Initialize the text processor
        
        Args:
            NamesToParse: List of names to parse and replace
        """
        self.string_parser = StringParser(NamesToParse)
    
    def _extractTablesFromText(self, content: str) -> tuple:
        """
        Extract tables and plain text from content
        
        Args:
            content: Content to process
            
        Returns:
            Tuple of (list of tables, list of plain text sections)
        """
        # For now, process the entire content as plain text
        # This can be extended later to detect table-like structures
        tables = []
        plainTexts = [PlainText(content=content, source_type='text_plain')]
        
        return tables, plainTexts
    
    def _anonymizePlainText(self, text: PlainText) -> PlainText:
        """
        Anonymize plain text content
        
        Args:
            text: PlainText object to anonymize
            
        Returns:
            PlainText: Anonymized text
        """
        # Use the string parser to process the content
        anonymizedContent = self.string_parser.processString(text.content)
        
        return PlainText(content=anonymizedContent, source_type=text.source_type)
    
    def processTextContent(self, content: str) -> tuple:
        """
        Process text content and return anonymized data
        
        Args:
            content: Text content to process
            
        Returns:
            Tuple of (anonymized_content, mapping, replaced_fields, processed_info)
        """
        # Extract tables and plain text sections
        tables, plainTexts = self._extractTablesFromText(content)
        
        # Process plain text sections
        anonymizedTexts = [self._anonymizePlainText(text) for text in plainTexts]
        
        # Combine all processed content
        result = content
        for text, anonymizedText in zip(plainTexts, anonymizedTexts):
            if text.content != anonymizedText.content:
                result = result.replace(text.content, anonymizedText.content)
        
        # Get processing information
        processedInfo = {
            'type': 'text',
            'tables': ([{'headers': t.headers, 'row_count': len(t.rows)} for t in tables]
                       if tables else [])
        }
        
        return result, self.string_parser.getMapping(), [], processedInfo
    
    def getMapping(self) -> Dict[str, str]:
        """
        Get the current mapping of original values to placeholders
        
        Returns:
            Dict[str, str]: Mapping dictionary
        """
        return self.string_parser.getMapping()
    
    def clearMapping(self):
        """Clear the current mapping"""
        self.string_parser.clearMapping()