""" DSGVO-konformer Daten-Neutralisierer für KI-Agentensysteme Unterstützt TXT, JSON, CSV, Excel und Word-Dateien Mehrsprachig: DE, EN, FR, IT """ import logging from typing import Dict, List, Any # Import all necessary classes and functions from modules.services.serviceNeutralization.subProcessCommon import ProcessResult, CommonUtils from modules.services.serviceNeutralization.subProcessText import TextProcessor, PlainText from modules.services.serviceNeutralization.subProcessList import ListProcessor, TableData from modules.services.serviceNeutralization.subProcessBinary import BinaryProcessor, BinaryData from modules.services.serviceNeutralization.subParseString import StringParser from modules.services.serviceNeutralization.subPatterns import Pattern, HeaderPatterns, DataPatterns, TextTablePatterns # Configure logging logger = logging.getLogger(__name__) # Export all classes and functions for external use __all__ = [ 'DataAnonymizer', 'ProcessResult', 'CommonUtils', 'TextProcessor', 'PlainText', 'ListProcessor', 'TableData', 'BinaryProcessor', 'BinaryData', 'StringParser', 'Pattern', 'HeaderPatterns', 'DataPatterns', 'TextTablePatterns' ] class DataAnonymizer: """Hauptklasse für die Datenanonymisierung""" def __init__(self, names_to_parse: List[str] = None): """Initialize the anonymizer with specialized processors Args: names_to_parse: List of names to parse and replace (case-insensitive) """ self.names_to_parse = names_to_parse or [] # Initialize specialized processors self.text_processor = TextProcessor(names_to_parse) self.list_processor = ListProcessor(names_to_parse) self.binary_processor = BinaryProcessor() # Common utilities self.common_utils = CommonUtils() def process_content(self, content: str, content_type: str = None) -> ProcessResult: """ Process content and return anonymized data Args: content: Content to process content_type: Type of content ('csv', 'json', 'xml', 'text', 'binary') If None, will auto-detect Returns: ProcessResult: Contains anonymized data, mapping, replaced fields and processing info """ try: # Auto-detect content type if not provided if content_type is None: content_type = self.common_utils.detect_content_type(content) # Check if content is binary data if self.binary_processor.is_binary_content(content): return self.binary_processor.process_binary_content(content) # Route to appropriate processor based on content type if content_type in ['csv', 'json', 'xml']: if content_type == 'csv': result, mapping, replaced_fields, processed_info = self.list_processor.process_csv_content(content) elif content_type == 'json': result, mapping, replaced_fields, processed_info = self.list_processor.process_json_content(content) else: # xml result, mapping, replaced_fields, processed_info = self.list_processor.process_xml_content(content) return ProcessResult(result, mapping, replaced_fields, processed_info) else: # Handle as text result, mapping, replaced_fields, processed_info = self.text_processor.process_text_content(content) return ProcessResult(result, mapping, replaced_fields, processed_info) except Exception as e: logger.error(f"Error processing content: {str(e)}") return ProcessResult(None, {}, [], {'type': 'error', 'error': str(e)}) def get_mapping(self) -> Dict[str, str]: """ Get the combined mapping from all processors Returns: Dict[str, str]: Combined mapping dictionary """ text_mapping = self.text_processor.get_mapping() list_mapping = self.list_processor.get_mapping() return self.common_utils.merge_mappings(text_mapping, list_mapping) def clear_mapping(self): """Clear the mapping in all processors""" self.text_processor.clear_mapping() self.list_processor.clear_mapping()