""" DSGVO-konformer Daten-Neutralisierer für KI-Agentensysteme Unterstützt TXT, JSON, CSV, Excel und Word-Dateien Mehrsprachig: DE, EN, FR, IT """ import logging from typing import Dict, List, Any # Import all necessary classes and functions from modules.neutralizer.subProcessCommon import ProcessResult, CommonUtils from modules.neutralizer.subProcessText import TextProcessor, PlainText from modules.neutralizer.subProcessList import ListProcessor, TableData from modules.neutralizer.subProcessBinary import BinaryProcessor, BinaryData from modules.neutralizer.subParseString import StringParser from modules.neutralizer.subPatterns import Pattern, HeaderPatterns, DataPatterns, TextTablePatterns # Configure logging logger = logging.getLogger(__name__) # Export all classes and functions for external use __all__ = [ 'DataAnonymizer', 'ProcessResult', 'CommonUtils', 'TextProcessor', 'PlainText', 'ListProcessor', 'TableData', 'BinaryProcessor', 'BinaryData', 'StringParser', 'Pattern', 'HeaderPatterns', 'DataPatterns', 'TextTablePatterns' ] class DataAnonymizer: """Hauptklasse für die Datenanonymisierung""" def __init__(self, names_to_parse: List[str] = None): """Initialize the anonymizer with specialized processors Args: names_to_parse: List of names to parse and replace (case-insensitive) """ self.names_to_parse = names_to_parse or [] # Initialize specialized processors self.text_processor = TextProcessor(names_to_parse) self.list_processor = ListProcessor(names_to_parse) self.binary_processor = BinaryProcessor() # Common utilities self.common_utils = CommonUtils() def process_content(self, content: str, content_type: str = None) -> ProcessResult: """ Process content and return anonymized data Args: content: Content to process content_type: Type of content ('csv', 'json', 'xml', 'text', 'binary') If None, will auto-detect Returns: ProcessResult: Contains anonymized data, mapping, replaced fields and processing info """ try: # Auto-detect content type if not provided if content_type is None: content_type = self.common_utils.detect_content_type(content) # Check if content is binary data if self.binary_processor.is_binary_content(content): return self.binary_processor.process_binary_content(content) # Route to appropriate processor based on content type if content_type in ['csv', 'json', 'xml']: if content_type == 'csv': result, mapping, replaced_fields, processed_info = self.list_processor.process_csv_content(content) elif content_type == 'json': result, mapping, replaced_fields, processed_info = self.list_processor.process_json_content(content) else: # xml result, mapping, replaced_fields, processed_info = self.list_processor.process_xml_content(content) return ProcessResult(result, mapping, replaced_fields, processed_info) else: # Handle as text result, mapping, replaced_fields, processed_info = self.text_processor.process_text_content(content) return ProcessResult(result, mapping, replaced_fields, processed_info) except Exception as e: logger.error(f"Error processing content: {str(e)}") return ProcessResult(None, {}, [], {'type': 'error', 'error': str(e)}) def get_mapping(self) -> Dict[str, str]: """ Get the combined mapping from all processors Returns: Dict[str, str]: Combined mapping dictionary """ text_mapping = self.text_processor.get_mapping() list_mapping = self.list_processor.get_mapping() return self.common_utils.merge_mappings(text_mapping, list_mapping) def clear_mapping(self): """Clear the mapping in all processors""" self.text_processor.clear_mapping() self.list_processor.clear_mapping()