gateway/modules/services/serviceNeutralization/neutralizer.py
2025-09-22 23:34:47 +02:00

112 lines
No EOL
4.4 KiB
Python

"""
DSGVO-konformer Daten-Neutralisierer für KI-Agentensysteme
Unterstützt TXT, JSON, CSV, Excel und Word-Dateien
Mehrsprachig: DE, EN, FR, IT
"""
import logging
from typing import Dict, List, Any
# Import all necessary classes and functions
from modules.services.serviceNeutralization.subProcessCommon import ProcessResult, CommonUtils
from modules.services.serviceNeutralization.subProcessText import TextProcessor, PlainText
from modules.services.serviceNeutralization.subProcessList import ListProcessor, TableData
from modules.services.serviceNeutralization.subProcessBinary import BinaryProcessor, BinaryData
from modules.services.serviceNeutralization.subParseString import StringParser
from modules.services.serviceNeutralization.subPatterns import Pattern, HeaderPatterns, DataPatterns, TextTablePatterns
# Configure logging
logger = logging.getLogger(__name__)
# Export all classes and functions for external use
__all__ = [
'DataAnonymizer',
'ProcessResult',
'CommonUtils',
'TextProcessor',
'PlainText',
'ListProcessor',
'TableData',
'BinaryProcessor',
'BinaryData',
'StringParser',
'Pattern',
'HeaderPatterns',
'DataPatterns',
'TextTablePatterns'
]
class DataAnonymizer:
"""Hauptklasse für die Datenanonymisierung"""
def __init__(self, names_to_parse: List[str] = None):
"""Initialize the anonymizer with specialized processors
Args:
names_to_parse: List of names to parse and replace (case-insensitive)
"""
self.names_to_parse = names_to_parse or []
# Initialize specialized processors
self.text_processor = TextProcessor(names_to_parse)
self.list_processor = ListProcessor(names_to_parse)
self.binary_processor = BinaryProcessor()
# Common utilities
self.common_utils = CommonUtils()
def process_content(self, content: str, content_type: str = None) -> ProcessResult:
"""
Process content and return anonymized data
Args:
content: Content to process
content_type: Type of content ('csv', 'json', 'xml', 'text', 'binary')
If None, will auto-detect
Returns:
ProcessResult: Contains anonymized data, mapping, replaced fields and processing info
"""
try:
# Auto-detect content type if not provided
if content_type is None:
content_type = self.common_utils.detect_content_type(content)
# Check if content is binary data
if self.binary_processor.is_binary_content(content):
return self.binary_processor.process_binary_content(content)
# Route to appropriate processor based on content type
if content_type in ['csv', 'json', 'xml']:
if content_type == 'csv':
result, mapping, replaced_fields, processed_info = self.list_processor.process_csv_content(content)
elif content_type == 'json':
result, mapping, replaced_fields, processed_info = self.list_processor.process_json_content(content)
else: # xml
result, mapping, replaced_fields, processed_info = self.list_processor.process_xml_content(content)
return ProcessResult(result, mapping, replaced_fields, processed_info)
else:
# Handle as text
result, mapping, replaced_fields, processed_info = self.text_processor.process_text_content(content)
return ProcessResult(result, mapping, replaced_fields, processed_info)
except Exception as e:
logger.error(f"Error processing content: {str(e)}")
return ProcessResult(None, {}, [], {'type': 'error', 'error': str(e)})
def get_mapping(self) -> Dict[str, str]:
"""
Get the combined mapping from all processors
Returns:
Dict[str, str]: Combined mapping dictionary
"""
text_mapping = self.text_processor.get_mapping()
list_mapping = self.list_processor.get_mapping()
return self.common_utils.merge_mappings(text_mapping, list_mapping)
def clear_mapping(self):
"""Clear the mapping in all processors"""
self.text_processor.clear_mapping()
self.list_processor.clear_mapping()