neutralized pdf files and better pattern recognition

2026-02-23 11:18:28 +01:00 · 2026-02-23 11:18:28 +01:00 · 5120fbc503
commit 5120fbc503
parent 7163397fd3
11 changed files with 984 additions and 86 deletions
--- a/modules/features/neutralization/interfaceFeatureNeutralizer.py
+++ b/modules/features/neutralization/interfaceFeatureNeutralizer.py
@ -83,14 +83,16 @@ class InterfaceFeatureNeutralizer:
        self.featureInstanceId = featureInstanceId

    def getNeutralizationConfig(self) -> Optional[DataNeutraliserConfig]:
-        """Get the data neutralization configuration for the current user's mandate"""
+        """Get the data neutralization configuration for the current user's mandate and instance"""
        try:
-            # Use RBAC filtering
+            record_filter = {"mandateId": self.mandateId}
+            if self.featureInstanceId:
+                record_filter["featureInstanceId"] = self.featureInstanceId
            filteredConfigs = getRecordsetWithRBAC(
                self.db,
                DataNeutraliserConfig,
                self.currentUser,
-                recordFilter={"mandateId": self.mandateId},
+                recordFilter=record_filter,
                mandateId=self.mandateId
            )
            
@ -130,6 +132,8 @@ class InterfaceFeatureNeutralizer:
                # Create new config
                configData["mandateId"] = self.mandateId
                configData["userId"] = self.userId
+                if self.featureInstanceId:
+                    configData["featureInstanceId"] = self.featureInstanceId

                newConfig = DataNeutraliserConfig(**configData)
                createdRecord = self.db.recordCreate(DataNeutraliserConfig, newConfig)
@ -200,13 +204,44 @@ class InterfaceFeatureNeutralizer:
                DataNeutralizerAttributes,
                recordFilter={"mandateId": self.mandateId, "id": attributeId}
            )
-            if attributes:
-                return attributes[0]
-            return None
+            if not attributes:
+                return None
+            attr = attributes[0]
+            return {k: v for k, v in attr.items() if not k.startswith("_")}
        except Exception as e:
            logger.error(f"Error getting attribute by ID: {str(e)}")
            return None

+    def createAttribute(
+        self,
+        attributeId: str,
+        originalText: str,
+        patternType: str,
+        fileId: Optional[str] = None
+    ) -> Optional[DataNeutralizerAttributes]:
+        """Create a neutralization attribute for placeholder resolution."""
+        try:
+            mandate_id = self.mandateId or ""
+            feature_instance_id = self.featureInstanceId or ""
+            if not self.userId:
+                logger.warning("Cannot create attribute: missing userId")
+                return None
+            attr = DataNeutralizerAttributes(
+                id=attributeId,
+                mandateId=self.mandateId,
+                featureInstanceId=self.featureInstanceId,
+                userId=self.userId,
+                originalText=originalText,
+                fileId=fileId,
+                patternType=patternType,
+            )
+
+            created = self.db.recordCreate(DataNeutralizerAttributes, attr.model_dump())
+            return DataNeutralizerAttributes(**{k: v for k, v in created.items() if not k.startswith("_")})
+        except Exception as e:
+            logger.error(f"Error creating attribute: {str(e)}")
+            return None
+

 def getInterface(currentUser: Optional[User] = None, mandateId: Optional[str] = None, featureInstanceId: Optional[str] = None) -> InterfaceFeatureNeutralizer:
    """
--- a/modules/features/neutralization/mainNeutralization.py
+++ b/modules/features/neutralization/mainNeutralization.py
@ -21,17 +21,7 @@ UI_OBJECTS = [
        "objectKey": "ui.feature.neutralization.playground",
        "label": {"en": "Playground", "de": "Spielwiese", "fr": "Bac à sable"},
        "meta": {"area": "playground"}
-    },
-    {
-        "objectKey": "ui.feature.neutralization.config",
-        "label": {"en": "Configuration", "de": "Konfiguration", "fr": "Configuration"},
-        "meta": {"area": "config"}
-    },
-    {
-        "objectKey": "ui.feature.neutralization.attributes",
-        "label": {"en": "Attributes", "de": "Attribute", "fr": "Attributs"},
-        "meta": {"area": "attributes"}
-    },
+    }
 ]

 # Resource Objects for RBAC catalog
@ -130,9 +120,106 @@ def registerFeature(catalogService) -> bool:
                meta=resObj.get("meta")
            )
        
+        # Sync template roles to database
+        _syncTemplateRolesToDb()
+        
        logger.info(f"Feature '{FEATURE_CODE}' registered {len(UI_OBJECTS)} UI objects and {len(RESOURCE_OBJECTS)} resource objects")
        return True
        
    except Exception as e:
        logger.error(f"Failed to register feature '{FEATURE_CODE}': {e}")
        return False
+
+
+def _syncTemplateRolesToDb() -> int:
+    """
+    Sync template roles and their AccessRules to the database.
+    Creates global template roles (mandateId=None) if they don't exist.
+    
+    Returns:
+        Number of roles created
+    """
+    try:
+        from modules.interfaces.interfaceDbApp import getRootInterface
+        from modules.datamodels.datamodelRbac import Role, AccessRule, AccessRuleContext
+        
+        rootInterface = getRootInterface()
+        
+        existingRoles = rootInterface.getRolesByFeatureCode(FEATURE_CODE)
+        templateRoles = [r for r in existingRoles if r.mandateId is None]
+        existingRoleLabels = {r.roleLabel: str(r.id) for r in templateRoles}
+        
+        createdCount = 0
+        for roleTemplate in TEMPLATE_ROLES:
+            roleLabel = roleTemplate["roleLabel"]
+            
+            if roleLabel in existingRoleLabels:
+                roleId = existingRoleLabels[roleLabel]
+                _ensureAccessRulesForRole(rootInterface, roleId, roleTemplate.get("accessRules", []))
+            else:
+                newRole = Role(
+                    roleLabel=roleLabel,
+                    description=roleTemplate.get("description", {}),
+                    featureCode=FEATURE_CODE,
+                    mandateId=None,
+                    featureInstanceId=None,
+                    isSystemRole=False
+                )
+                createdRole = rootInterface.db.recordCreate(Role, newRole.model_dump())
+                roleId = createdRole.get("id")
+                _ensureAccessRulesForRole(rootInterface, roleId, roleTemplate.get("accessRules", []))
+                logger.info(f"Created template role '{roleLabel}' with ID {roleId}")
+                createdCount += 1
+        
+        if createdCount > 0:
+            logger.info(f"Feature '{FEATURE_CODE}': Created {createdCount} template roles")
+        
+        return createdCount
+        
+    except Exception as e:
+        logger.error(f"Error syncing template roles for feature '{FEATURE_CODE}': {e}")
+        return 0
+
+
+def _ensureAccessRulesForRole(rootInterface, roleId: str, ruleTemplates: List[Dict[str, Any]]) -> int:
+    """Ensure AccessRules exist for a role based on templates."""
+    from modules.datamodels.datamodelRbac import AccessRule, AccessRuleContext
+    
+    existingRules = rootInterface.getAccessRulesByRole(roleId)
+    existingSignatures = set()
+    for rule in existingRules:
+        sig = (rule.context.value if rule.context else None, rule.item)
+        existingSignatures.add(sig)
+    
+    createdCount = 0
+    for template in ruleTemplates:
+        context = template.get("context", "UI")
+        item = template.get("item")
+        sig = (context, item)
+        
+        if sig in existingSignatures:
+            continue
+        
+        if context == "UI":
+            contextEnum = AccessRuleContext.UI
+        elif context == "DATA":
+            contextEnum = AccessRuleContext.DATA
+        elif context == "RESOURCE":
+            contextEnum = AccessRuleContext.RESOURCE
+        else:
+            contextEnum = context
+        
+        newRule = AccessRule(
+            roleId=roleId,
+            context=contextEnum,
+            item=item,
+            view=template.get("view", False),
+            read=template.get("read"),
+            create=template.get("create"),
+            update=template.get("update"),
+            delete=template.get("delete"),
+        )
+        rootInterface.db.recordCreate(AccessRule, newRule.model_dump())
+        createdCount += 1
+    
+    return createdCount
--- a/modules/features/neutralization/neutralizePlayground.py
+++ b/modules/features/neutralization/neutralizePlayground.py
@ -15,14 +15,96 @@ logger = logging.getLogger(__name__)
 class NeutralizationPlayground:
    """Feature/UI wrapper around NeutralizationService for playground & routes."""

-    def __init__(self, currentUser: User, mandateId: str):
+    def __init__(self, currentUser: User, mandateId: str, featureInstanceId: Optional[str] = None):
        self.currentUser = currentUser
        self.mandateId = mandateId
-        self.services = getServices(currentUser, None, mandateId=mandateId)
+        self.featureInstanceId = featureInstanceId
+        self.services = getServices(currentUser, None, mandateId=mandateId, featureInstanceId=featureInstanceId)

    def processText(self, text: str) -> Dict[str, Any]:
        return self.services.neutralization.processText(text)

+    async def processUploadedFileAsync(self, file_bytes: bytes, filename: str) -> Dict[str, Any]:
+        """Process an uploaded file (bytes + filename). Returns neutralized result for text or binary.
+        Saves both original and neutralized files to user files (component storage) when available."""
+        import base64
+        name_lower = (filename or '').lower()
+        mime_map = {
+            '.pdf': 'application/pdf',
+            '.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
+            '.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
+            '.xlsm': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
+            '.pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
+        }
+        mime = next((mime_map[ext] for ext in mime_map if name_lower.endswith(ext)), 'text/plain')
+        binary_exts = {'.pdf', '.docx', '.xlsx', '.xlsm', '.pptx'}
+        is_binary = any(name_lower.endswith(ext) for ext in binary_exts)
+
+        original_file_id = None
+        neutralized_file_id = None
+
+        # Save original file to user files
+        if self.services.interfaceDbComponent:
+            try:
+                file_item, _ = self.services.interfaceDbComponent.saveUploadedFile(file_bytes, filename)
+                original_file_id = str(file_item.id)
+            except Exception as e:
+                logger.warning(f"Could not save original file to user files: {e}")
+
+        if is_binary:
+            result = await self.services.neutralization.processBinaryBytesAsync(file_bytes, filename, mime)
+            neu_bytes = result.get('neutralized_bytes')
+            logger.debug(f"Binary result: neu_bytes type={type(neu_bytes).__name__}, len={len(neu_bytes) if neu_bytes is not None else 0}")
+            if neu_bytes is not None and len(neu_bytes) > 0:
+                result['neutralized_file_base64'] = base64.b64encode(neu_bytes).decode('ascii')
+                result['neutralized_file_name'] = result.get('neutralized_file_name', f'neutralized_{filename}')
+                result['mime_type'] = result.get('mime_type', mime)
+                # Save neutralized binary to user files
+                if self.services.interfaceDbComponent:
+                    try:
+                        neu_name = result['neutralized_file_name']
+                        file_item, _ = self.services.interfaceDbComponent.saveUploadedFile(neu_bytes, neu_name)
+                        neutralized_file_id = str(file_item.id)
+                    except Exception as e:
+                        logger.warning(f"Could not save neutralized file to user files: {e}")
+                # Remove raw bytes before JSON response (avoid serialization issues; use base64 only)
+                result.pop('neutralized_bytes', None)
+            result['original_file_id'] = original_file_id
+            result['neutralized_file_id'] = neutralized_file_id
+            return result
+
+        try:
+            text_content = file_bytes.decode('utf-8')
+        except UnicodeDecodeError:
+            try:
+                text_content = file_bytes.decode('latin-1')
+            except UnicodeDecodeError:
+                return {
+                    'neutralized_text': None,
+                    'original_file_id': original_file_id,
+                    'neutralized_file_id': None,
+                    'processed_info': {'type': 'error', 'error': 'File could not be decoded as text. Supported: UTF-8, Latin-1. For PDF/Word/Excel, use supported binary formats.'}
+                }
+        result = self.services.neutralization.processText(text_content)
+        result['neutralized_file_name'] = f'neutralized_{filename}'
+        # Save neutralized text as file to user files
+        if self.services.interfaceDbComponent and result.get('neutralized_text') is not None:
+            try:
+                neu_text = result['neutralized_text']
+                neu_bytes = neu_text.encode('utf-8')
+                neu_name = result['neutralized_file_name']
+                file_item, _ = self.services.interfaceDbComponent.saveUploadedFile(neu_bytes, neu_name)
+                neutralized_file_id = str(file_item.id)
+            except Exception as e:
+                logger.warning(f"Could not save neutralized text file to user files: {e}")
+        result['original_file_id'] = original_file_id
+        result['neutralized_file_id'] = neutralized_file_id
+        return result
+
+    def processUploadedFile(self, file_bytes: bytes, filename: str) -> Dict[str, Any]:
+        """Sync wrapper for sync callers. Uses asyncio.run; do NOT call from async routes (use processUploadedFileAsync)."""
+        return asyncio.run(self.processUploadedFileAsync(file_bytes, filename))
+
    def processFiles(self, fileIds: List[str]) -> Dict[str, Any]:
        results: List[Dict[str, Any]] = []
        errors: List[str] = []
@ -273,18 +355,42 @@ class SharepointProcessor:
            processed: List[Dict[str, Any]] = []
            errors: List[str] = []

+            BINARY_EXTS = {'.pdf', '.docx', '.doc', '.xlsx', '.xlsm', '.pptx', '.ppt'}
+
            async def _processSingle(fileInfo: Dict[str, Any]):
                try:
                    fileContent = await self.services.sharepoint.downloadFile(sourceSiteInfo['id'], fileInfo['id'])
                    if not fileContent:
                        return {'error': f"Failed to download file: {fileInfo['name']}"}
-                    try:
-                        textContent = fileContent.decode('utf-8')
-                    except UnicodeDecodeError:
-                        textContent = fileContent.decode('latin-1')
-                    result = self.services.neutralization.processText(textContent)
+                    name_lower = (fileInfo.get('name') or '').lower()
+                    is_binary = any(name_lower.endswith(ext) for ext in BINARY_EXTS)
+                    mime_map = {
+                        '.pdf': 'application/pdf',
+                        '.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
+                        '.doc': 'application/msword',
+                        '.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
+                        '.xlsm': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
+                        '.pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
+                        '.ppt': 'application/vnd.ms-powerpoint',
+                    }
+                    mime = next((mime_map[ext] for ext in BINARY_EXTS if name_lower.endswith(ext)), 'text/plain')
+
+                    if is_binary:
+                        result = self.services.neutralization.processBinaryBytes(fileContent, fileInfo['name'], mime)
+                        if result.get('neutralized_bytes'):
+                            content_to_upload = result['neutralized_bytes']
+                        else:
+                            return {'error': f"Failed to neutralize binary file {fileInfo['name']}: {result.get('processed_info', {}).get('error', 'Unknown error')}"}
+                    else:
+                        try:
+                            textContent = fileContent.decode('utf-8')
+                        except UnicodeDecodeError:
+                            textContent = fileContent.decode('latin-1')
+                        result = self.services.neutralization.processText(textContent)
+                        content_to_upload = (result.get('neutralized_text') or '').encode('utf-8')
+
                    neutralizedFilename = f"neutralized_{fileInfo['name']}"
-                    uploadResult = await self.services.sharepoint.uploadFile(targetSiteInfo['id'], targetFolder, neutralizedFilename, result['neutralized_text'].encode('utf-8'))
+                    uploadResult = await self.services.sharepoint.uploadFile(targetSiteInfo['id'], targetFolder, neutralizedFilename, content_to_upload)
                    if 'error' in uploadResult:
                        return {'error': f"Failed to upload neutralized file: {neutralizedFilename} - {uploadResult['error']}"}
                    return {
--- a/modules/features/neutralization/routeFeatureNeutralizer.py
+++ b/modules/features/neutralization/routeFeatureNeutralizer.py
@ -1,6 +1,6 @@
 # Copyright (c) 2025 Patrick Motsch
 # All rights reserved.
-from fastapi import APIRouter, HTTPException, Depends, Path, Request, status, Query, Body
+from fastapi import APIRouter, HTTPException, Depends, Path, Request, status, Query, Body, File, UploadFile
 from typing import List, Dict, Any, Optional
 import logging

@ -35,13 +35,18 @@ def get_neutralization_config(
 ) -> DataNeutraliserConfig:
    """Get data neutralization configuration"""
    try:
-        service = NeutralizationPlayground(context.user, str(context.mandateId))
+        mandate_id = str(context.mandateId) if context.mandateId else ""
+        feature_instance_id = str(context.featureInstanceId) if context.featureInstanceId else ""
+        service = NeutralizationPlayground(
+            context.user, mandate_id, featureInstanceId=feature_instance_id or None
+        )
        config = service.getConfig()
        
        if not config:
-            # Return default config instead of 404
+            # Return default config instead of 404 (requires mandateId and featureInstanceId for instance-scoped config)
            return DataNeutraliserConfig(
-                mandateId=context.mandateId,
+                mandateId=mandate_id,
+                featureInstanceId=feature_instance_id,
                userId=context.user.id,
                enabled=True,
                namesToParse="",
@ -69,7 +74,11 @@ def save_neutralization_config(
 ) -> DataNeutraliserConfig:
    """Save or update data neutralization configuration"""
    try:
-        service = NeutralizationPlayground(context.user, str(context.mandateId))
+        service = NeutralizationPlayground(
+            context.user,
+            str(context.mandateId) if context.mandateId else "",
+            featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None
+        )
        config = service.saveConfig(config_data)
        
        return config
@ -81,6 +90,44 @@ def save_neutralization_config(
            detail=f"Error saving neutralization config: {str(e)}"
        )

+@router.post("/neutralize-file")
+@limiter.limit("20/minute")
+async def neutralize_file(
+    request: Request,
+    file: UploadFile = File(..., description="File to neutralize (PDF, DOCX, XLSX, PPTX, TXT, CSV, JSON)"),
+    context: RequestContext = Depends(getRequestContext)
+) -> Dict[str, Any]:
+    """Upload and neutralize a file. Returns neutralized text or base64-encoded file for download."""
+    try:
+        if not file.filename or not file.filename.strip():
+            raise HTTPException(
+                status_code=status.HTTP_400_BAD_REQUEST,
+                detail="File name is required"
+            )
+        content = await file.read()
+        if not content:
+            raise HTTPException(
+                status_code=status.HTTP_400_BAD_REQUEST,
+                detail="File is empty"
+            )
+        service = NeutralizationPlayground(
+            context.user,
+            str(context.mandateId) if context.mandateId else "",
+            featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None
+        )
+        result = await service.processUploadedFileAsync(content, file.filename or "file")
+        logger.info(f"Neutralize file result keys: {list(result.keys())}, has_base64={bool(result.get('neutralized_file_base64'))}, has_text={result.get('neutralized_text') is not None}")
+        return result
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Error neutralizing file: {str(e)}", exc_info=True)
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=f"Error neutralizing file: {str(e)}"
+        )
+
+
@router.post("/neutralize-text", response_model=Dict[str, Any])
@limiter.limit("20/minute")
 def neutralize_text(
@ -99,7 +146,11 @@ def neutralize_text(
                detail="Text content is required"
            )
        
-        service = NeutralizationPlayground(context.user, str(context.mandateId))
+        service = NeutralizationPlayground(
+            context.user,
+            str(context.mandateId) if context.mandateId else "",
+            featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None
+        )
        result = service.neutralizeText(text, file_id)
        
        return result
@ -130,7 +181,11 @@ def resolve_text(
                detail="Text content is required"
            )
        
-        service = NeutralizationPlayground(context.user, str(context.mandateId))
+        service = NeutralizationPlayground(
+            context.user,
+            str(context.mandateId) if context.mandateId else "",
+            featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None
+        )
        resolved_text = service.resolveText(text)
        
        return {"resolved_text": resolved_text}
@ -153,7 +208,11 @@ def get_neutralization_attributes(
 ) -> List[DataNeutralizerAttributes]:
    """Get neutralization attributes, optionally filtered by file ID"""
    try:
-        service = NeutralizationPlayground(context.user, str(context.mandateId))
+        service = NeutralizationPlayground(
+            context.user,
+            str(context.mandateId) if context.mandateId else "",
+            featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None
+        )
        attributes = service.getAttributes(fileId)
        
        return attributes
@ -183,7 +242,11 @@ async def process_sharepoint_files(
                detail="Both source and target paths are required"
            )
        
-        service = NeutralizationPlayground(context.user, str(context.mandateId))
+        service = NeutralizationPlayground(
+            context.user,
+            str(context.mandateId) if context.mandateId else "",
+            featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None
+        )
        result = await service.processSharepointFiles(source_path, target_path)
        
        return result
@ -212,7 +275,11 @@ def batch_process_files(
                detail="Files data is required"
            )
        
-        service = NeutralizationPlayground(context.user, str(context.mandateId))
+        service = NeutralizationPlayground(
+            context.user,
+            str(context.mandateId) if context.mandateId else "",
+            featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None
+        )
        result = service.batchNeutralizeFiles(files_data)
        
        return result
@ -234,7 +301,11 @@ def get_neutralization_stats(
 ) -> Dict[str, Any]:
    """Get neutralization processing statistics"""
    try:
-        service = NeutralizationPlayground(context.user, str(context.mandateId))
+        service = NeutralizationPlayground(
+            context.user,
+            str(context.mandateId) if context.mandateId else "",
+            featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None
+        )
        stats = service.getProcessingStats()
        
        return stats
@ -255,7 +326,11 @@ def cleanup_file_attributes(
 ) -> Dict[str, str]:
    """Clean up neutralization attributes for a specific file"""
    try:
-        service = NeutralizationPlayground(context.user, str(context.mandateId))
+        service = NeutralizationPlayground(
+            context.user,
+            str(context.mandateId) if context.mandateId else "",
+            featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None
+        )
        success = service.cleanupFileAttributes(fileId)
        
        if success:
--- a/modules/features/neutralization/serviceNeutralization/mainServiceNeutralization.py
+++ b/modules/features/neutralization/serviceNeutralization/mainServiceNeutralization.py
@ -4,10 +4,11 @@
 Data Neutralization Service
 Handles file processing for data neutralization including SharePoint integration
 DSGVO-konformer Daten-Neutralisierer für KI-Agentensysteme
-Unterstützt TXT, JSON, CSV, Excel und Word-Dateien
+Supports TXT, JSON, CSV, PDF, DOCX, XLSX, PPTX (extract -> neutralize -> generate)
 Mehrsprachig: DE, EN, FR, IT
 """

+import asyncio
 import logging
 import re
 import json
@ -21,10 +22,20 @@ from .subProcessCommon import CommonUtils, NeutralizationResult, NeutralizationA
 from .subProcessText import TextProcessor, PlainText
 from .subProcessList import ListProcessor, TableData
 from .subProcessBinary import BinaryProcessor
+from .subProcessPdfInPlace import neutralize_pdf_in_place
 from .subPatterns import HeaderPatterns, DataPatterns, TextTablePatterns
+from .subContentPartAdapter import content_parts_to_renderer_schema

 logger = logging.getLogger(__name__)

+# MIME types that can be processed via extract -> neutralize -> generate
+EXTRACTABLE_BINARY_MIME_TYPES = frozenset({
+    "application/pdf",
+    "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+    "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+    "application/vnd.openxmlformats-officedocument.presentationml.presentation",
+})
+
 class NeutralizationService:
    """Service for handling data neutralization operations"""
    
@ -44,8 +55,8 @@ class NeutralizationService:
            dbApp = serviceCenter.interfaceDbApp
            self.interfaceNeutralizer = getNeutralizerInterface(
                currentUser=dbApp.currentUser,
-                mandateId=dbApp.mandateId,
-                featureInstanceId=getattr(dbApp, 'featureInstanceId', None)
+                mandateId=serviceCenter.mandateId or dbApp.mandateId,
+                featureInstanceId=getattr(serviceCenter, 'featureInstanceId', None) or getattr(dbApp, 'featureInstanceId', None)
            )
        
        # Initialize anonymization processors
@ -71,47 +82,61 @@ class NeutralizationService:

    def processText(self, text: str) -> Dict[str, Any]:
        """Neutralize a raw text string and return a standard result dict."""
-        return self._neutralizeText(text, 'text')
+        result = self._neutralizeText(text, 'text')
+        self._persistAttributes(result.get('mapping', {}), None)
+        return result

    def processFile(self, fileId: str) -> Dict[str, Any]:
        """Neutralize a file referenced by its fileId using component interface.
-        Binary files are not neutralized but will be indicated in the result."""
+        Supports text files directly; PDF/DOCX/XLSX/PPTX via extract -> neutralize -> generate."""
        if not self.interfaceDbComponent:
            raise ValueError("Component interface is required to process a file by fileId")
-        # Fetch file data and metadata
        fileInfo = None
        try:
-            # getFile returns an object; fallback to dict-like
            fileInfo = self.interfaceDbComponent.getFile(fileId)
        except Exception:
            fileInfo = None
        fileName = getattr(fileInfo, 'fileName', None) if fileInfo else None
        mimeType = getattr(fileInfo, 'mimeType', None) if fileInfo else None
-        
-        # Check if file is binary and cannot be neutralized
+
+        fileData = self.interfaceDbComponent.getFileData(fileId)
+        if not fileData:
+            raise ValueError(f"No file data found for fileId: {fileId}")
+
+        mime_lower = (mimeType or '').lower()
+
+        # Binary but extractable: PDF, DOCX, XLSX, PPTX
+        if mime_lower in EXTRACTABLE_BINARY_MIME_TYPES:
+            try:
+                result = asyncio.run(self._processBinaryFile(fileData, fileName or "document", mime_lower, fileId))
+                if result:
+                    result['file_id'] = fileId
+                    result['neutralized_file_name'] = f"neutralized_{fileName}" if fileName else "neutralized_document"
+                    return result
+            except Exception as e:
+                logger.error(f"Binary file neutralization failed: {str(e)}")
+                return {
+                    'file_id': fileId,
+                    'is_binary': True,
+                    'mime_type': mimeType or 'unknown',
+                    'file_name': fileName or 'unknown',
+                    'neutralized_text': None,
+                    'processed_info': {'type': 'binary', 'status': 'error', 'error': str(e)}
+                }
+
+        # Binary but not extractable
        if self._isBinaryMimeType(mimeType or ''):
-            # Return a result indicating binary file (not neutralized)
            return {
                'file_id': fileId,
                'is_binary': True,
                'mime_type': mimeType or 'unknown',
                'file_name': fileName or 'unknown',
                'neutralized_text': None,
-                'processed_info': {
-                    'type': 'binary',
-                    'status': 'skipped',
-                    'message': 'Binary file neutralization will be implemented in the future'
-                }
+                'processed_info': {'type': 'binary', 'status': 'skipped', 'message': 'File type not supported for neutralization'}
            }
-        
-        fileData = self.interfaceDbComponent.getFileData(fileId)
-        if not fileData:
-            raise ValueError(f"No file data found for fileId: {fileId}")

-        # Determine textType from mime
+        # Text-based file
        textType = self._getContentTypeFromMime(mimeType or '')
-
-        # Decode to text
        try:
            textContent = fileData.decode('utf-8')
        except UnicodeDecodeError:
@ -123,17 +148,59 @@ class NeutralizationService:
                except UnicodeDecodeError:
                    continue
            if decoded is None:
-                raise ValueError("Unable to decode file content as text. This may indicate a binary file that cannot be neutralized.")
+                raise ValueError("Unable to decode file content as text.")
            textContent = decoded

        result = self._neutralizeText(textContent, textType)
-        # Add a reasonable output filename if original known
+        self._persistAttributes(result.get('mapping', {}), fileId)
        if fileName:
            result['neutralized_file_name'] = f"neutralized_{fileName}"
        result['file_id'] = fileId
        result['is_binary'] = False
        return result

+    def processBinaryBytes(self, fileBytes: bytes, fileName: str, mimeType: str) -> Dict[str, Any]:
+        """Neutralize binary file bytes (sync - use from sync callers). Uses asyncio.run when event loop not running."""
+        mime_lower = (mimeType or '').lower()
+        if mime_lower not in EXTRACTABLE_BINARY_MIME_TYPES:
+            return {
+                'neutralized_text': None,
+                'neutralized_bytes': None,
+                'is_binary': True,
+                'processed_info': {'type': 'binary', 'status': 'skipped', 'message': 'File type not supported'}
+            }
+        try:
+            return asyncio.run(self._processBinaryFile(fileBytes, fileName, mime_lower, None))
+        except Exception as e:
+            logger.error(f"Binary neutralization failed: {str(e)}")
+            return {
+                'neutralized_text': None,
+                'neutralized_bytes': None,
+                'is_binary': True,
+                'processed_info': {'type': 'binary', 'status': 'error', 'error': str(e)}
+            }
+
+    async def processBinaryBytesAsync(self, fileBytes: bytes, fileName: str, mimeType: str) -> Dict[str, Any]:
+        """Neutralize binary file bytes (async - use from async routes to avoid event loop conflict)."""
+        mime_lower = (mimeType or '').lower()
+        if mime_lower not in EXTRACTABLE_BINARY_MIME_TYPES:
+            return {
+                'neutralized_text': None,
+                'neutralized_bytes': None,
+                'is_binary': True,
+                'processed_info': {'type': 'binary', 'status': 'skipped', 'message': 'File type not supported'}
+            }
+        try:
+            return await self._processBinaryFile(fileBytes, fileName, mime_lower, None)
+        except Exception as e:
+            logger.error(f"Binary neutralization failed: {str(e)}")
+            return {
+                'neutralized_text': None,
+                'neutralized_bytes': None,
+                'is_binary': True,
+                'processed_info': {'type': 'binary', 'status': 'error', 'error': str(e)}
+            }
+
    def resolveText(self, text: str) -> str:
        if not self.interfaceNeutralizer:
            return text
@ -167,6 +234,195 @@ class NeutralizationService:
            return False
        return self.interfaceNeutralizer.deleteNeutralizationAttributes(fileId)

+    def _persistAttributes(self, mapping: Dict[str, str], fileId: Optional[str]) -> None:
+        """Persist mapping to DB for resolve to work. mapping: originalText -> placeholder e.g. '[email.uuid]'"""
+        if not self.interfaceNeutralizer or not mapping:
+            return
+        import re
+        placeholder_re = re.compile(r'^\[([a-z]+)\.([a-f0-9-]{36})\]$')
+        for original_text, placeholder in mapping.items():
+            m = placeholder_re.match(placeholder)
+            if m:
+                pattern_type, uid = m.group(1), m.group(2)
+                try:
+                    self.interfaceNeutralizer.createAttribute(
+                        attributeId=uid,
+                        originalText=original_text,
+                        patternType=pattern_type,
+                        fileId=fileId
+                    )
+                except Exception as e:
+                    logger.debug(f"Could not persist attribute {uid}: {e}")
+
+    async def _processBinaryFile(
+        self,
+        fileBytes: bytes,
+        fileName: str,
+        mimeType: str,
+        fileId: Optional[str]
+    ) -> Dict[str, Any]:
+        """Extract -> neutralize -> adapt -> generate for PDF/DOCX/XLSX/PPTX."""
+        from modules.services.serviceExtraction.mainServiceExtraction import ExtractionService
+        from modules.services.serviceExtraction.subPipeline import runExtraction
+        from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy
+
+        # Ensure registries exist
+        if ExtractionService._sharedExtractorRegistry is None:
+            ExtractionService(self.services)
+        registry = ExtractionService._sharedExtractorRegistry
+        chunker = ExtractionService._sharedChunkerRegistry
+        opts = ExtractionOptions(prompt="neutralize", mergeStrategy=MergeStrategy(preserveChunks=True))
+
+        # 1. Extract
+        extracted = runExtraction(registry, chunker, fileBytes, fileName, mimeType, opts)
+        parts = extracted.parts if hasattr(extracted, 'parts') else []
+
+        if not parts:
+            return {
+                'neutralized_text': None,
+                'neutralized_bytes': None,
+                'is_binary': True,
+                'processed_info': {'type': 'binary', 'status': 'error', 'error': 'No content extracted'}
+            }
+
+        # 2. Neutralize each text/table part
+        all_mapping: Dict[str, str] = {}
+        neutralized_parts: List[Any] = []
+        neutralization_error: Optional[str] = None
+        for part in parts:
+            p = part if isinstance(part, dict) else part.model_dump() if hasattr(part, 'model_dump') else part
+            type_group = p.get('typeGroup', '')
+            data = p.get('data', '')
+            if type_group in ('binary', 'image') or not (data and str(data).strip()):
+                neutralized_parts.append(part)
+                continue
+            nr = self._neutralizeText(str(data), 'text' if type_group != 'table' else 'csv')
+            proc = nr.get('processed_info', {}) or {}
+            if isinstance(proc, dict) and proc.get('type') == 'error':
+                neutralization_error = proc.get('error', 'Neutralization failed')
+            neu_text = nr.get('neutralized_text', str(data))
+            mapping = nr.get('mapping', {})
+            all_mapping.update(mapping)
+            new_part = {**p, 'data': neu_text}
+            neutralized_parts.append(new_part)
+        self._persistAttributes(all_mapping, fileId)
+
+        # 3. PDF: Use in-place only; no fallback to render
+        if mimeType == "application/pdf":
+            if neutralization_error:
+                logger.error(f"PDF neutralization aborted: {neutralization_error}")
+                return {
+                    'neutralized_text': None,
+                    'neutralized_bytes': None,
+                    'is_binary': True,
+                    'processed_info': {'type': 'binary', 'status': 'error', 'error': neutralization_error}
+                }
+            in_place_bytes = neutralize_pdf_in_place(fileBytes, all_mapping)
+            if in_place_bytes is not None:
+                logger.info("PDF neutralization completed via in-place redaction (layout preserved)")
+                return {
+                    'neutralized_text': None,
+                    'neutralized_bytes': in_place_bytes,
+                    'neutralized_file_name': f"neutralized_{fileName}",
+                    'is_binary': True,
+                    'mime_type': 'application/pdf',
+                    'attributes': [{'original': k, 'placeholder': v} for k, v in all_mapping.items()],
+                    'processed_info': {'type': 'binary', 'status': 'success', 'format': 'pdf', 'method': 'in-place'}
+                }
+            logger.error("PDF in-place neutralization failed")
+            return {
+                'neutralized_text': None,
+                'neutralized_bytes': None,
+                'is_binary': True,
+                'processed_info': {'type': 'binary', 'status': 'error', 'error': 'PDF in-place neutralization failed'}
+            }
+
+        # 4. Adapter: ContentPart list -> renderer schema (non-PDF only)
+        schema = content_parts_to_renderer_schema(neutralized_parts, title=fileName or "Neutralized")
+
+        # 5. Render to format
+        renderer, output_mime = self._getRendererForMime(mimeType)
+        if not renderer:
+            return {
+                'neutralized_text': None,
+                'neutralized_bytes': None,
+                'is_binary': True,
+                'processed_info': {'type': 'binary', 'status': 'error', 'error': f'No renderer for {mimeType}'}
+            }
+
+        try:
+            logger.info(f"Calling renderer.render for mime={mimeType}, renderer={type(renderer).__name__}")
+            rendered = await renderer.render(schema, fileName or "document", None, None)
+            logger.info(f"Renderer returned: type={type(rendered).__name__}, len={len(rendered) if rendered else 0}")
+            if not rendered or len(rendered) == 0:
+                logger.error("Renderer returned empty list")
+                return {
+                    'neutralized_text': None,
+                    'neutralized_bytes': None,
+                    'is_binary': True,
+                    'processed_info': {'type': 'binary', 'status': 'error', 'error': 'Render produced no output'}
+                }
+            doc = rendered[0]
+            logger.info(f"First doc: type={type(doc).__name__}, isinstance(dict)={isinstance(doc, dict)}, has documentData attr={hasattr(doc, 'documentData')}")
+            # Extract documentData: Pydantic v2 models may need model_dump() for reliable access
+            if isinstance(doc, dict):
+                doc_data = doc.get('documentData')
+            elif hasattr(doc, 'model_dump'):
+                d = doc.model_dump(mode='python')
+                doc_data = d.get('documentData')
+            else:
+                doc_data = getattr(doc, 'documentData', None)
+            logger.info(f"doc_data: type={type(doc_data).__name__ if doc_data is not None else 'None'}, len={len(doc_data) if doc_data else 0}")
+            if doc_data is None:
+                logger.error("Renderer returned document with no documentData")
+                return {
+                    'neutralized_text': None,
+                    'neutralized_bytes': None,
+                    'is_binary': True,
+                    'processed_info': {'type': 'binary', 'status': 'error', 'error': 'Renderer returned no data'}
+                }
+            if isinstance(doc_data, str):
+                doc_data = doc_data.encode('utf-8')
+            return {
+                'neutralized_text': None,
+                'neutralized_bytes': doc_data,
+                'neutralized_file_name': f"neutralized_{fileName}",
+                'is_binary': True,
+                'mime_type': output_mime,
+                'attributes': [{'original': k, 'placeholder': v} for k, v in all_mapping.items()],
+                'processed_info': {'type': 'binary', 'status': 'success', 'format': mimeType}
+            }
+        except Exception as e:
+            logger.error(f"Render failed for {mimeType}: {str(e)}", exc_info=True)
+            raise
+
+        return {
+            'neutralized_text': None,
+            'neutralized_bytes': None,
+            'is_binary': True,
+            'processed_info': {'type': 'binary', 'status': 'error', 'error': 'Render produced no output'}
+        }
+
+    def _getRendererForMime(self, mimeType: str):
+        """Get renderer instance and output mime for the given input MIME type."""
+        from modules.services.serviceGeneration.renderers.rendererPdf import RendererPdf
+        from modules.services.serviceGeneration.renderers.rendererDocx import RendererDocx
+        from modules.services.serviceGeneration.renderers.rendererXlsx import RendererXlsx
+        from modules.services.serviceGeneration.renderers.rendererPptx import RendererPptx
+
+        mime_map = {
+            "application/pdf": (RendererPdf, "application/pdf"),
+            "application/vnd.openxmlformats-officedocument.wordprocessingml.document": (RendererDocx, "application/vnd.openxmlformats-officedocument.wordprocessingml.document"),
+            "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": (RendererXlsx, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"),
+            "application/vnd.openxmlformats-officedocument.presentationml.presentation": (RendererPptx, "application/vnd.openxmlformats-officedocument.presentationml.presentation"),
+        }
+        pair = mime_map.get(mimeType)
+        if not pair:
+            return None, None
+        cls, out_mime = pair
+        renderer = cls(self.services)
+        return renderer, out_mime
+
    def _reloadNamesFromConfig(self) -> None:
        """Reload names from config and update processors"""
        try:
--- a/modules/features/neutralization/serviceNeutralization/subContentPartAdapter.py
+++ b/modules/features/neutralization/serviceNeutralization/subContentPartAdapter.py
@ -0,0 +1,115 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+"""
+Adapter to convert ContentPart list (from extraction) to renderer JSON schema.
+Schema: { metadata: {...}, documents: [{ sections: [{ content_type, elements: [...] }] }] }
+"""
+
+import csv
+import io
+from typing import Dict, List, Any
+
+from modules.datamodels.datamodelExtraction import ContentPart
+
+
+def content_parts_to_renderer_schema(parts: List[ContentPart], title: str = "Neutralized Document") -> Dict[str, Any]:
+    """
+    Convert ContentPart list to the standardized renderer schema.
+    
+    Args:
+        parts: List of ContentPart from extraction
+        title: Document title for metadata
+        
+    Returns:
+        Dict with metadata, documents[0].sections structure for renderers
+    """
+    sections: List[Dict[str, Any]] = []
+    
+    for part in parts:
+        if not hasattr(part, 'typeGroup') or not hasattr(part, 'data'):
+            part_dict = part if isinstance(part, dict) else part.model_dump()
+            type_group = part_dict.get("typeGroup", "text")
+            data = part_dict.get("data", "")
+            label = part_dict.get("label", "")
+        else:
+            type_group = part.typeGroup
+            data = part.data or ""
+            label = part.label or ""
+        
+        # Skip binary/image parts without text - they can't be neutralized meaningfully
+        if type_group in ("binary", "image"):
+            continue
+        
+        # Skip empty data
+        if not (data and str(data).strip()):
+            continue
+        
+        section = _part_to_section(type_group, data, label)
+        if section:
+            sections.append(section)
+    
+    # Ensure at least one section (renderers require it)
+    if not sections:
+        sections = [{
+            "content_type": "paragraph",
+            "elements": [{"type": "paragraph", "content": {"text": ""}}]
+        }]
+    
+    return {
+        "metadata": {"title": title},
+        "documents": [{
+            "sections": sections
+        }]
+    }
+
+
+def _part_to_section(type_group: str, data: str, label: str) -> Dict[str, Any]:
+    """Convert a single ContentPart to a section dict."""
+    data_str = str(data).strip()
+    
+    if type_group == "table" and ("csv" in label.lower() or "," in data_str or "\t" in data_str):
+        # Parse CSV/TSV into table structure
+        try:
+            rows = list(csv.reader(io.StringIO(data_str)))
+            if rows:
+                headers = rows[0]
+                rows_data = rows[1:]
+                return {
+                    "content_type": "table",
+                    "elements": [{
+                        "type": "table",
+                        "content": {"headers": headers, "rows": rows_data}
+                    }]
+                }
+        except Exception:
+            pass
+        # Fallback: treat as paragraph
+        return {
+            "content_type": "paragraph",
+            "elements": [{
+                "type": "extracted_text",
+                "content": data_str,
+                "source": label
+            }]
+        }
+    
+    if type_group == "structure":
+        # PPTX slide content - often markdown-like
+        return {
+            "content_type": "paragraph",
+            "elements": [{
+                "type": "extracted_text",
+                "content": data_str,
+                "source": label
+            }]
+        }
+    
+    # Default: text/paragraph
+    return {
+        "content_type": "paragraph",
+        "elements": [{
+            "type": "extracted_text",
+            "content": data_str,
+            "source": label
+        }]
+    }
--- a/modules/features/neutralization/serviceNeutralization/subParseString.py
+++ b/modules/features/neutralization/serviceNeutralization/subParseString.py
@ -10,6 +10,18 @@ import uuid
 from typing import Dict, List, Tuple, Any
 from .subPatterns import DataPatterns, findPatternsInText

+# Phrases or words that must never be neutralized (labels, Anrede, etc.)
+_NEUTRALIZATION_BLACKLIST = frozenset({
+    "Für Sie", "Ihre Ansprechperson", "AXA 24", "General Agent",
+    "Your Contact", "Contact Person", "Bei Fragen", "Mit Freundlichen",
+    "Frau", "Herr",  # Anrede
+    "Reise", "Reisebeginn", "Reiseende", "Vertragsbeginn", "Zahlbar",
+    "Versicherte", "Versicherungsnehmer", "Versicherung", "Insurance",
+    "Leistungen", "Basis", "Benefits",  # Section labels
+    "Start", "Beginn", "Ende", "End", "trip",  # Contract labels (Start of trip, End of trip, etc.)
+})
+
+
 class StringParser:
    """Handles string parsing and replacement operations"""
    
@ -48,7 +60,17 @@ class StringParser:
        """
        patternMatches = findPatternsInText(text, self.data_patterns)
        
-        # Process pattern matches from right to left to avoid position shifts
+        # Exclude matches that are fully contained in a longer match (e.g. skip "2026" inside "17.02.2026")
+        def is_contained(m, all_matches):
+            for other in all_matches:
+                if other is m:
+                    continue
+                if other[2] <= m[2] and m[3] <= other[3] and (other[3] - other[2]) > (m[3] - m[2]):
+                    return True
+            return False
+        patternMatches = [m for m in patternMatches if not is_contained(m, patternMatches)]
+        
+        # Process from right to left to avoid position shifts
        for patternName, matchedText, start, end in reversed(patternMatches):
            # Skip if already a placeholder
            if self._isPlaceholder(matchedText):
@ -58,15 +80,27 @@ class StringParser:
            if '[' in matchedText or ']' in matchedText:
                continue
            
+            # Skip blacklisted text (labels, Anrede, etc.) – never neutralize
+            if matchedText.strip() in _NEUTRALIZATION_BLACKLIST:
+                continue
+            # Skip if match contains any blacklisted word (e.g. "2026 Reise" or "2026 Reisebeginn" from address pattern)
+            if any(w in _NEUTRALIZATION_BLACKLIST for w in matchedText.split()):
+                continue
+
            if matchedText not in self.mapping:
                # Generate a UUID for the placeholder
                placeholderId = str(uuid.uuid4())
                # Create placeholder in format [type.uuid]
                typeMapping = {
                    'email': 'email',
-                    'phone': 'phone', 
+                    'phone': 'phone',
                    'address': 'address',
-                    'id': 'id'
+                    'date': 'date',
+                    'policy': 'policy',
+                    'name': 'name',
+                    'id': 'id',
+                    'iban': 'iban',
+                    'ssn': 'ssn',
                }
                placeholderType = typeMapping.get(patternName, 'data')
                self.mapping[matchedText] = f"[{placeholderType}.{placeholderId}]"
--- a/modules/features/neutralization/serviceNeutralization/subPatterns.py
+++ b/modules/features/neutralization/serviceNeutralization/subPatterns.py
@ -234,6 +234,29 @@ class HeaderPatterns:
 class DataPatterns:
    """Patterns for identifying sensitive data in content"""
    patterns = [
+        # Name patterns (before email so "name@domain" is not matched as name)
+        Pattern(
+            name="name",
+            patterns=[
+                # Contact person context (fixed-width lookbehind for Python re)
+                r'(?<=Ansprechperson: )[A-Za-zäöüßÄÖÜ][a-zäöüß]+\s+[A-Za-zäöüßÄÖÜ][a-zäöüß]+',
+                r'(?<=Leiter: )[A-Za-zäöüßÄÖÜ][a-zäöüß]+\s+[A-Za-zäöüßÄÖÜ][a-zäöüß]+',
+                r'(?<=Kontaktperson: )[A-Za-zäöüßÄÖÜ][a-zäöüß]+\s+[A-Za-zäöüßÄÖÜ][a-zäöüß]+',
+                # Name only after Anrede (keep Frau/Herr; replace only the name) – fixed-width lookbehind
+                r'(?<=Frau )[A-Za-zäöüßÄÖÜ][a-zäöüß]*(?:\s+[A-Za-zäöüßÄÖÜ][a-zäöüß]*)+',
+                r'(?<=Herr )[A-Za-zäöüßÄÖÜ][a-zäöüß]*(?:\s+[A-Za-zäöüßÄÖÜ][a-zäöüß]*)+',
+                r'(?<=Mr )[A-Za-zäöüßÄÖÜ][a-zäöüß]*(?:\s+[A-Za-zäöüßÄÖÜ][a-zäöüß]*)+',
+                r'(?<=Mr\. )[A-Za-zäöüßÄÖÜ][a-zäöüß]*(?:\s+[A-Za-zäöüßÄÖÜ][a-zäöüß]*)+',
+                r'(?<=Mrs )[A-Za-zäöüßÄÖÜ][a-zäöüß]*(?:\s+[A-Za-zäöüßÄÖÜ][a-zäöüß]*)+',
+                r'(?<=Mrs\. )[A-Za-zäöüßÄÖÜ][a-zäöüß]*(?:\s+[A-Za-zäöüßÄÖÜ][a-zäöüß]*)+',
+                r'(?<=Ms )[A-Za-zäöüßÄÖÜ][a-zäöüß]*(?:\s+[A-Za-zäöüßÄÖÜ][a-zäöüß]*)+',
+                r'(?<=Ms\. )[A-Za-zäöüßÄÖÜ][a-zäöüß]*(?:\s+[A-Za-zäöüßÄÖÜ][a-zäöüß]*)+',
+                r'(?<=Dr )[A-Za-zäöüßÄÖÜ][a-zäöüß]*(?:\s+[A-Za-zäöüßÄÖÜ][a-zäöüß]*)+',
+                r'(?<=Dr\. )[A-Za-zäöüßÄÖÜ][a-zäöüß]*(?:\s+[A-Za-zäöüßÄÖÜ][a-zäöüß]*)+',
+            ],
+            replacement_template="[NAME_{}]"
+        ),
+        
        # Email pattern for plain text
        Pattern(
            name="email",
@ -276,12 +299,18 @@ class DataPatterns:
            replacement_template="[IBAN_{}]"
        ),
        
-        # Address patterns
+        # Address patterns (compound first so full footer = one UUID)
        Pattern(
            name="address",
            patterns=[
-                r'\b(?:[A-Za-zäöüßÄÖÜ]+(?:strasse|str\.|gasse|weg|platz|allee|boulevard|avenue|via|strada|rue|chemin|route))\s+\d{1,4}(?:[a-z])?\b',
-                r'\b\d{4}\s+[A-Za-zäöüßÄÖÜ]+\b'
+                # Full address block: company, street, postfach, postal+city (stop before domain like , AXA.ch)
+                r'\b[^,\n]+(?:,\s*[^,\n]+)*,\s*\d{4}\s+[A-Za-zäöüßÄÖÜ]+\s*(?=,\s*[a-zA-Z0-9.-]+\.(?:ch|com|org|net)\b|$)',
+                # Street + house number (standalone)
+                r'\b(?:[A-Za-zäöüßÄÖÜ]+(?:-[A-Za-zäöüßÄÖÜ]+)*(?:strasse|str\.|gasse|weg|platz|allee|boulevard|avenue|via|strada|rue|chemin|route))\s+\d{1,4}(?:/\d{1,4})?(?:[a-z])?\b',
+                # Postfach / PO Box (standalone)
+                r'\b(?:Postfach|Postbox|P\.?O\.?\s*Box|Case\s+postale|Casella\s+postale|Boîte\s+postale)\s+\d{1,6}\b',
+                # Postal code + city (standalone)
+                r'\b\d{4}\s+[A-Za-zäöüßÄÖÜ]+\b(?!\s*:)'
            ],
            replacement_template="[ADDRESS_{}]"
        ),
@ -290,25 +319,58 @@ class DataPatterns:
        Pattern(
            name="date",
            patterns=[
-                # Specific date formats with context
-                r'\b(?:geboren|birth|né|nato)\s+am\s+[0-9]{2}[./-][0-9]{2}[./-][0-9]{4}\b',  # Birth dates
-                r'\b(?:geboren|birth|né|nato)\s+am\s+[0-9]{4}[./-][0-9]{2}[./-][0-9]{2}\b',  # Birth dates
-                r'\b(?:vertrag|contract|contrat|contratto)\s+vom\s+[0-9]{2}[./-][0-9]{2}[./-][0-9]{4}\b',  # Contract dates
-                r'\b(?:vertrag|contract|contrat|contratto)\s+vom\s+[0-9]{4}[./-][0-9]{2}[./-][0-9]{2}\b',  # Contract dates
-                # Specific date formats with month names
-                r'\b(?:geboren|birth|né|nato)\s+am\s+(?:jan|feb|mar|apr|mai|jun|jul|aug|sep|okt|nov|dez|januar|februar|märz|april|mai|juni|juli|august|september|oktober|november|dezember)[a-z]*\s+\d{4}\b',  # Birth dates with month
-                r'\b(?:vertrag|contract|contrat|contratto)\s+vom\s+(?:jan|feb|mar|apr|mai|jun|jul|aug|sep|okt|nov|dez|januar|februar|märz|april|mai|juni|juli|august|september|oktober|november|dezember)[a-z]*\s+\d{4}\b'  # Contract dates with month
+                # Standalone date values – require valid day (1–31) and month (1–12) to avoid decimals (e.g. 53.37 CHF)
+                r'\b(0?[1-9]|[12]\d|3[01])[./-](0?[1-9]|1[0-2])[./-]\d{2,4}\b',  # 17.02.2026, 29-03-2026
+                r'\b(0?[1-9]|[12]\d|3[01])\.(0?[1-9]|1[0-2])\.[\s]*\d{2,4}\b',  # 17.02. 2026 (split across lines)
+                r'\b(0?[1-9]|[12]\d|3[01])\.(0?[1-9]|1[0-2])\.(?!\d)\b',  # 17.02., 29.03.
+                r'\b(0?[1-9]|[12]\d|3[01])\.(0?[1-9]|1[0-2])\b(?!\.?\d)(?!/\d)',  # 17.02, 29.03; exclude ratings (4.7/5)
+                # Context-specific date formats
+                r'\b(?:geboren|birth|né|nato)\s+am\s+[0-9]{2}[./-][0-9]{2}[./-][0-9]{4}\b',
+                r'\b(?:geboren|birth|né|nato)\s+am\s+[0-9]{4}[./-][0-9]{2}[./-][0-9]{2}\b',
+                r'\b(?:vertrag|contract|contrat|contratto)\s+vom\s+[0-9]{2}[./-][0-9]{2}[./-][0-9]{4}\b',
+                r'\b(?:vertrag|contract|contrat|contratto)\s+vom\s+[0-9]{4}[./-][0-9]{2}[./-][0-9]{2}\b',
+                r'\b(?:geboren|birth|né|nato)\s+am\s+(?:jan|feb|mar|apr|mai|jun|jul|aug|sep|okt|nov|dez|januar|februar|märz|april|mai|juni|juli|august|september|oktober|november|dezember)[a-z]*\s+\d{4}\b',
+                r'\b(?:vertrag|contract|contrat|contratto)\s+vom\s+(?:jan|feb|mar|apr|mai|jun|jul|aug|sep|okt|nov|dez|januar|februar|märz|april|mai|juni|juli|august|september|oktober|november|dezember)[a-z]*\s+\d{4}\b'
            ],
            replacement_template="[DATE_{}]"
        ),
        
+        # Policy number patterns (replaces only the number, keeps labels like "Police Nr.")
+        Pattern(
+            name="policy",
+            patterns=[
+                # Number after "Police Nr." etc. (fixed-width lookbehind – Python re requirement)
+                r'(?<=Police Nr\. )[\d.]+',
+                r'(?<=Police Nr\.  )[\d.]+',
+                r'(?<=Police Nr\.: )[\d.]+',
+                r'(?<=Police Nr )[\d.]+',
+                r'(?<=Police Nr: )[\d.]+',
+                r'(?<=Polizzenr\. )[\d.]+',
+                r'(?<=Polizzenummer: )[\d.]+',
+                r'(?<=Polizzenummer )[\d.]+',
+                r'(?<=Policy No\. )[\d.]+',
+                r'(?<=Policy No )[\d.]+',
+                r'(?<=Policy Number: )[\d.]+',
+                r'(?<=Policy Number )[\d.]+',
+                r'(?<=Polizza n° )[\d.]+',
+                r'(?<=Numéro de police: )[\d.]+',
+                r'(?<=Numéro de police )[\d.]+',
+                r'(?<=Numero polizza: )[\d.]+',
+                r'(?<=Numero polizza )[\d.]+',
+                # Standalone policy number format (e.g. 11.559.499) – require 2+ digit prefix to avoid amounts
+                r'\b\d{2,4}(?:\.\d{3}){2,}\b'
+            ],
+            replacement_template="[POLICY_{}]"
+        ),
+        
        # SSN patterns
        Pattern(
            name="ssn",
            patterns=[
-                r'\b(?:756|757|758|759)\.\d{4}\.\d{4}\.\d{2}\b',  # Swiss AHV
+                r'\b(?:756|757|758|759)\.\d{4}\.\d{4}\.\d{2}\b(?!,)',  # Swiss AHV - exclude before decimal
                r'\b(?:CHE|DE|FR|IT)-\d{3}\.\d{3}\.\d{3}\b',  # Company IDs
-                r'\b\d{3}\.\d{3}\.\d{3}\b'  # Generic SSN format
+                # Generic SSN format - exclude when followed by comma+digit (European decimal)
+                r'\b\d{3}\.\d{3}\.\d{3}\b(?!,\d)'
            ],
            replacement_template="[SSN_{}]"
        )
--- a/modules/features/neutralization/serviceNeutralization/subProcessPdfInPlace.py
+++ b/modules/features/neutralization/serviceNeutralization/subProcessPdfInPlace.py
@ -0,0 +1,110 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+"""
+PDF in-place neutralization using PyMuPDF.
+Removes original text completely and inserts full UUID placeholders.
+PyMuPDF uses insert_textbox which wraps long placeholders to preserve layout.
+"""
+
+import io
+import logging
+from typing import Dict, Optional
+
+logger = logging.getLogger(__name__)
+
+
+def neutralize_pdf_in_place(
+    pdf_bytes: bytes,
+    mapping: Dict[str, str],
+) -> Optional[bytes]:
+    """
+    Remove sensitive text and replace with UUID placeholders in-place.
+    Content is fully removed (not just covered) so it cannot be copied.
+
+    Args:
+        pdf_bytes: Original PDF file content
+        mapping: Dict of original_text -> placeholder (e.g. [address.uuid])
+
+    Returns:
+        Modified PDF bytes, or None on failure
+    """
+    if not mapping:
+        return pdf_bytes
+
+    try:
+        import fitz  # PyMuPDF
+    except ImportError:
+        logger.warning("PyMuPDF (fitz) not available for PDF in-place neutralization")
+        return None
+
+    try:
+        doc = fitz.open(stream=pdf_bytes, filetype="pdf")
+    except Exception as e:
+        logger.error(f"Failed to open PDF: {e}")
+        return None
+
+    sorted_items = sorted(mapping.items(), key=lambda x: -len(x[0]))
+    fill_color = (1, 1, 1)
+    text_color = (0, 0, 0)
+    fontname = "helv"
+    fontsize = 8
+
+    try:
+        for page_num in range(len(doc)):
+            page = doc[page_num]
+
+            for original_text, placeholder in sorted_items:
+                if not original_text or not placeholder:
+                    continue
+
+                search_text = original_text
+                insert_text = placeholder
+                if placeholder.startswith("[policy."):
+                    # Try label+number to get wider rect; insert UUID only (label+UUID would overflow)
+                    for prefix in ("Police Nr. ", "Police Nr.: ", "Polizzenr. ", "Policy no. ", "Policy No. "):
+                        candidate = prefix + original_text
+                        try:
+                            hits = page.search_for(candidate, quads=False)
+                            if hits:
+                                search_text = candidate
+                                insert_text = placeholder  # UUID only so it fits in rect
+                                break
+                        except Exception:
+                            continue
+
+                try:
+                    instances = page.search_for(search_text, quads=False)
+                except Exception:
+                    instances = []
+
+                for rect in instances:
+                    try:
+                        fs = 5 if placeholder.startswith(("[policy.", "[address.")) else fontsize
+                        page.add_redact_annot(
+                            rect,
+                            text=insert_text,
+                            fill=fill_color,
+                            text_color=text_color,
+                            fontname=fontname,
+                            fontsize=fs,
+                        )
+                    except Exception as e:
+                        logger.warning(f"Redact failed for {original_text[:40]!r}: {e}")
+
+            try:
+                page.apply_redactions()
+            except Exception as e:
+                logger.debug(f"apply_redactions page {page_num + 1}: {e}")
+
+        buf = io.BytesIO()
+        doc.save(buf, garbage=4, deflate=True)
+        doc.close()
+        return buf.getvalue()
+
+    except Exception as e:
+        logger.error(f"PDF in-place neutralization failed: {e}", exc_info=True)
+        try:
+            doc.close()
+        except Exception:
+            pass
+        return None
--- a/modules/routes/routeDataFiles.py
+++ b/modules/routes/routeDataFiles.py
@ -7,7 +7,7 @@ import logging
 import json

 # Import auth module
-from modules.auth import limiter, getCurrentUser
+from modules.auth import limiter, getCurrentUser, getRequestContext, RequestContext

 # Import interfaces
 import modules.interfaces.interfaceDbManagement as interfaceDbManagement
@ -40,7 +40,8 @@ router = APIRouter(
 def get_files(
    request: Request,
    pagination: Optional[str] = Query(None, description="JSON-encoded PaginationParams object"),
-    currentUser: User = Depends(getCurrentUser)
+    currentUser: User = Depends(getCurrentUser),
+    context: RequestContext = Depends(getRequestContext)
 ) -> PaginatedResponse[FileItem]:
    """
    Get files with optional pagination, sorting, and filtering.
@ -69,7 +70,11 @@ def get_files(
                    detail=f"Invalid pagination parameter: {str(e)}"
                )
        
-        managementInterface = interfaceDbManagement.getInterface(currentUser)
+        managementInterface = interfaceDbManagement.getInterface(
+            currentUser,
+            mandateId=str(context.mandateId) if context.mandateId else None,
+            featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None
+        )
        result = managementInterface.getAllFiles(pagination=paginationParams)
        
        # If pagination was requested, result is PaginatedResult
@ -330,11 +335,16 @@ def get_file_stats(
 def download_file(
    request: Request,
    fileId: str = Path(..., description="ID of the file to download"),
-    currentUser: User = Depends(getCurrentUser)
+    currentUser: User = Depends(getCurrentUser),
+    context: RequestContext = Depends(getRequestContext)
 ) -> Response:
-    """Download a file"""
+    """Download a file. Uses mandate/instance context when present (e.g. from feature pages)."""
    try:
-        managementInterface = interfaceDbManagement.getInterface(currentUser)
+        managementInterface = interfaceDbManagement.getInterface(
+            currentUser,
+            mandateId=str(context.mandateId) if context.mandateId else None,
+            featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None
+        )
        
        # Get file data
        fileData = managementInterface.getFile(fileId)
@ -378,11 +388,16 @@ def download_file(
 def preview_file(
    request: Request,
    fileId: str = Path(..., description="ID of the file to preview"),
-    currentUser: User = Depends(getCurrentUser)
+    currentUser: User = Depends(getCurrentUser),
+    context: RequestContext = Depends(getRequestContext)
 ) -> FilePreview:
-    """Preview a file's content"""
+    """Preview a file's content. Uses mandate/instance context when present."""
    try:
-        managementInterface = interfaceDbManagement.getInterface(currentUser)
+        managementInterface = interfaceDbManagement.getInterface(
+            currentUser,
+            mandateId=str(context.mandateId) if context.mandateId else None,
+            featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None
+        )
        
        # Get file preview using the correct method
        preview = managementInterface.getFileContent(fileId)
--- a/modules/routes/routeSystem.py
+++ b/modules/routes/routeSystem.py
@ -114,6 +114,9 @@ def _getFeatureUiObjects(featureCode: str) -> List[Dict[str, Any]]:
        elif featureCode == "teamsbot":
            from modules.features.teamsbot.mainTeamsbot import UI_OBJECTS
            return UI_OBJECTS
+        elif featureCode == "neutralization":
+            from modules.features.neutralization.mainNeutralization import UI_OBJECTS
+            return UI_OBJECTS
        else:
            logger.warning(f"Unknown feature code: {featureCode}")
            return []