From e0a09ae6b1e560b1426be13b6278c87c0e14bfbf Mon Sep 17 00:00:00 2001
From: ValueOn AG
Date: Mon, 30 Mar 2026 00:14:57 +0200
Subject: [PATCH] streamlined neutralization flow
---
modules/aicore/aicoreBase.py | 4 +-
modules/aicore/aicorePluginPrivateLlm.py | 8 +-
.../datamodelFeatureNeutralizer.py | 24 ++
.../interfaceFeatureNeutralizer.py | 69 +++++
.../neutralization/neutralizePlayground.py | 24 +-
.../neutralization/routeFeatureNeutralizer.py | 197 ++++++++++---
.../mainServiceNeutralization.py | 263 +++++++++++++++++-
.../workspace/routeFeatureWorkspace.py | 3 +
.../services/serviceAgent/mainServiceAgent.py | 6 +-
.../services/serviceAi/mainServiceAi.py | 141 +++++++---
.../serviceGeneration/renderers/registry.py | 9 +-
.../serviceKnowledge/mainServiceKnowledge.py | 2 +-
.../methodContext/actions/neutralizeData.py | 2 +-
13 files changed, 655 insertions(+), 97 deletions(-)
diff --git a/modules/aicore/aicoreBase.py b/modules/aicore/aicoreBase.py
index 70dd67c4..e107beb3 100644
--- a/modules/aicore/aicoreBase.py
+++ b/modules/aicore/aicoreBase.py
@@ -18,7 +18,9 @@ from typing import List, Dict, Any, Optional, AsyncGenerator, Union
from modules.datamodels.datamodelAi import AiModel, AiModelCall, AiModelResponse
-_RETRY_AFTER_PATTERN = _re.compile(r"try again in (\d+(?:\.\d+)?)\s*s", _re.IGNORECASE)
+_RETRY_AFTER_PATTERN = _re.compile(
+ r"(?:try again in|retry after)\s+(\d+(?:\.\d+)?)\s*s", _re.IGNORECASE
+)
def _parseRetryAfterSeconds(message: str) -> float:
diff --git a/modules/aicore/aicorePluginPrivateLlm.py b/modules/aicore/aicorePluginPrivateLlm.py
index 38baa35e..79853652 100644
--- a/modules/aicore/aicorePluginPrivateLlm.py
+++ b/modules/aicore/aicorePluginPrivateLlm.py
@@ -22,7 +22,7 @@ import time
from typing import List, Optional, Dict, Any
from fastapi import HTTPException
from modules.shared.configuration import APP_CONFIG
-from .aicoreBase import BaseConnectorAi
+from .aicoreBase import BaseConnectorAi, RateLimitExceededException
from modules.datamodels.datamodelAi import (
AiModel,
PriorityEnum,
@@ -370,6 +370,9 @@ class AiPrivateLlm(BaseConnectorAi):
if response.status_code != 200:
errorMessage = f"Private-LLM API error: {response.status_code} - {response.text}"
+ if response.status_code == 429:
+ logger.warning(errorMessage)
+ raise RateLimitExceededException(errorMessage)
logger.error(errorMessage)
raise HTTPException(status_code=500, detail=errorMessage)
@@ -461,6 +464,9 @@ class AiPrivateLlm(BaseConnectorAi):
if response.status_code != 200:
errorMessage = f"Private-LLM API error: {response.status_code} - {response.text}"
+ if response.status_code == 429:
+ logger.warning(errorMessage)
+ raise RateLimitExceededException(errorMessage)
logger.error(errorMessage)
raise HTTPException(status_code=500, detail=errorMessage)
diff --git a/modules/features/neutralization/datamodelFeatureNeutralizer.py b/modules/features/neutralization/datamodelFeatureNeutralizer.py
index a8ed5981..cc111950 100644
--- a/modules/features/neutralization/datamodelFeatureNeutralizer.py
+++ b/modules/features/neutralization/datamodelFeatureNeutralizer.py
@@ -58,6 +58,17 @@ class DataNeutralizerAttributes(BaseModel):
originalText: str = Field(description="Original text that was neutralized", json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": True})
fileId: Optional[str] = Field(default=None, description="ID of the file this attribute belongs to", json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": False})
patternType: str = Field(description="Type of pattern that matched (email, phone, name, etc.)", json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": True})
+
+
+class DataNeutralizationSnapshot(BaseModel):
+ """Stores the full neutralized text (with embedded placeholders) per source."""
+ id: str = Field(default_factory=lambda: str(uuid.uuid4()))
+ mandateId: str = Field(description="Mandate scope")
+ featureInstanceId: str = Field(default="", description="Feature instance scope")
+ userId: str = Field(description="User who triggered neutralization")
+ sourceLabel: str = Field(description="Human label, e.g. 'Prompt', 'Kontext', 'Nachricht 3'")
+ neutralizedText: str = Field(description="Full text with [type.uuid] placeholders embedded")
+ placeholderCount: int = Field(default=0, description="Number of placeholders in the text")
registerModelLabels(
"DataNeutralizerAttributes",
{"en": "Neutralized Data Attribute", "fr": "Attribut de données neutralisées"},
@@ -71,5 +82,18 @@ registerModelLabels(
"patternType": {"en": "Pattern Type", "fr": "Type de modèle"},
},
)
+registerModelLabels(
+ "DataNeutralizationSnapshot",
+ {"en": "Neutralization Snapshot", "de": "Neutralisierungs-Snapshot"},
+ {
+ "id": {"en": "ID"},
+ "mandateId": {"en": "Mandate ID"},
+ "featureInstanceId": {"en": "Feature Instance ID"},
+ "userId": {"en": "User ID"},
+ "sourceLabel": {"en": "Source", "de": "Quelle"},
+ "neutralizedText": {"en": "Neutralized Text", "de": "Neutralisierter Text"},
+ "placeholderCount": {"en": "Placeholders", "de": "Platzhalter"},
+ },
+)
diff --git a/modules/features/neutralization/interfaceFeatureNeutralizer.py b/modules/features/neutralization/interfaceFeatureNeutralizer.py
index 1a52e130..22af9683 100644
--- a/modules/features/neutralization/interfaceFeatureNeutralizer.py
+++ b/modules/features/neutralization/interfaceFeatureNeutralizer.py
@@ -11,6 +11,7 @@ from typing import Dict, List, Any, Optional
from modules.features.neutralization.datamodelFeatureNeutralizer import (
DataNeutraliserConfig,
DataNeutralizerAttributes,
+ DataNeutralizationSnapshot,
)
from modules.connectors.connectorDbPostgre import DatabaseConnector
from modules.interfaces.interfaceRbac import getRecordsetWithRBAC
@@ -227,6 +228,74 @@ class InterfaceFeatureNeutralizer:
logger.error(f"Error deleting attribute by ID: {str(e)}")
return False
+ # ------------------------------------------------------------------
+ # Snapshot CRUD
+ # ------------------------------------------------------------------
+
+ def getSnapshots(self) -> List[DataNeutralizationSnapshot]:
+ """Return all neutralization snapshots for the current mandate + feature instance."""
+ try:
+ _filter: Dict[str, Any] = {"mandateId": self.mandateId}
+ if self.featureInstanceId:
+ _filter["featureInstanceId"] = self.featureInstanceId
+ rows = getRecordsetWithRBAC(
+ self.db,
+ DataNeutralizationSnapshot,
+ self.currentUser,
+ recordFilter=_filter,
+ mandateId=self.mandateId,
+ )
+ return [
+ DataNeutralizationSnapshot(**{k: v for k, v in r.items() if not k.startswith("_")})
+ for r in rows
+ ]
+ except Exception as e:
+ logger.error(f"Error getting snapshots: {e}")
+ return []
+
+ def clearSnapshots(self) -> int:
+ """Delete all snapshots for the current feature-instance scope. Returns count deleted."""
+ try:
+ _filter: Dict[str, Any] = {"mandateId": self.mandateId}
+ if self.featureInstanceId:
+ _filter["featureInstanceId"] = self.featureInstanceId
+ existing = self.db.getRecordset(DataNeutralizationSnapshot, recordFilter=_filter)
+ for row in existing:
+ self.db.recordDelete(DataNeutralizationSnapshot, row["id"])
+ return len(existing)
+ except Exception as e:
+ logger.error(f"Error clearing snapshots: {e}")
+ return 0
+
+ def createSnapshot(
+ self,
+ sourceLabel: str,
+ neutralizedText: str,
+ placeholderCount: int = 0,
+ ) -> Optional[DataNeutralizationSnapshot]:
+ """Persist one neutralization snapshot."""
+ try:
+ if not self.userId:
+ logger.warning("Cannot create snapshot: missing userId")
+ return None
+ snap = DataNeutralizationSnapshot(
+ mandateId=self.mandateId or "",
+ featureInstanceId=self.featureInstanceId or "",
+ userId=self.userId,
+ sourceLabel=sourceLabel,
+ neutralizedText=neutralizedText,
+ placeholderCount=placeholderCount,
+ )
+ created = self.db.recordCreate(DataNeutralizationSnapshot, snap.model_dump())
+ return DataNeutralizationSnapshot(**{k: v for k, v in created.items() if not k.startswith("_")})
+ except Exception as e:
+ logger.error(f"Error creating snapshot: {e}")
+ return None
+
+ # ------------------------------------------------------------------
+ # Attribute CRUD
+ # ------------------------------------------------------------------
+
def createAttribute(
self,
attributeId: str,
diff --git a/modules/features/neutralization/neutralizePlayground.py b/modules/features/neutralization/neutralizePlayground.py
index b9b66fed..500cc1ba 100644
--- a/modules/features/neutralization/neutralizePlayground.py
+++ b/modules/features/neutralization/neutralizePlayground.py
@@ -6,7 +6,7 @@ from typing import Any, Dict, List, Optional
from urllib.parse import urlparse, unquote
from modules.datamodels.datamodelUam import User
-from .datamodelFeatureNeutralizer import DataNeutralizerAttributes, DataNeutraliserConfig
+from .datamodelFeatureNeutralizer import DataNeutralizerAttributes, DataNeutraliserConfig, DataNeutralizationSnapshot
from .interfaceFeatureNeutralizer import getInterface as _getNeutralizerInterface
from modules.serviceHub import getInterface as getServices
@@ -86,7 +86,7 @@ class NeutralizationPlayground:
'neutralized_file_id': None,
'processed_info': {'type': 'error', 'error': 'File could not be decoded as text. Supported: UTF-8, Latin-1. For PDF/Word/Excel, use supported binary formats.'}
}
- result = self.services.neutralization.processText(text_content)
+ result = await self.services.neutralization.processTextAsync(text_content)
result['neutralized_file_name'] = f'neutralized_{filename}'
# Save neutralized text as file to user files
if self.services.interfaceDbComponent and result.get('neutralized_text') is not None:
@@ -198,12 +198,28 @@ class NeutralizationPlayground:
"""Resolve UIDs in neutralized text back to original text"""
return self.services.neutralization.resolveText(text)
+ def getSnapshots(self) -> List[DataNeutralizationSnapshot]:
+ """Return stored neutralization text snapshots."""
+ try:
+ return self.services.neutralization.getSnapshots()
+ except Exception as e:
+ logger.error(f"Error getting snapshots: {e}")
+ return []
+
def getAttributes(self, fileId: str = None) -> List[DataNeutralizerAttributes]:
"""Get neutralization attributes, optionally filtered by file ID"""
try:
allAttributes = self.services.neutralization.getAttributes()
if fileId:
- return [attr for attr in allAttributes if attr.fileId == fileId]
+ want = str(fileId).strip()
+
+ def _matches(a: DataNeutralizerAttributes) -> bool:
+ af = a.fileId
+ if af is None or (isinstance(af, str) and not str(af).strip()):
+ return False
+ return str(af).strip() == want
+
+ return [attr for attr in allAttributes if _matches(attr)]
return allAttributes
except Exception as e:
logger.error(f"Error getting attributes: {str(e)}")
@@ -396,7 +412,7 @@ class SharepointProcessor:
textContent = fileContent.decode('utf-8')
except UnicodeDecodeError:
textContent = fileContent.decode('latin-1')
- result = self.services.neutralization.processText(textContent)
+ result = await self.services.neutralization.processTextAsync(textContent)
content_to_upload = (result.get('neutralized_text') or '').encode('utf-8')
neutralizedFilename = f"neutralized_{fileInfo['name']}"
diff --git a/modules/features/neutralization/routeFeatureNeutralizer.py b/modules/features/neutralization/routeFeatureNeutralizer.py
index 03d44f72..2f36efef 100644
--- a/modules/features/neutralization/routeFeatureNeutralizer.py
+++ b/modules/features/neutralization/routeFeatureNeutralizer.py
@@ -8,12 +8,33 @@ import logging
from modules.auth import limiter, getRequestContext, RequestContext
# Import interfaces
-from .datamodelFeatureNeutralizer import DataNeutraliserConfig, DataNeutralizerAttributes
+from .datamodelFeatureNeutralizer import DataNeutraliserConfig, DataNeutralizerAttributes, DataNeutralizationSnapshot
from .neutralizePlayground import NeutralizationPlayground
# Configure logger
logger = logging.getLogger(__name__)
+
+def _assertFeatureInstancePathMatchesContext(featureInstanceIdFromPath: str, context: RequestContext) -> None:
+ """Reject path/instance mismatch when request context already carries an instance id."""
+ ctxId = str(context.featureInstanceId).strip() if getattr(context, "featureInstanceId", None) else ""
+ pathId = (featureInstanceIdFromPath or "").strip()
+ if ctxId and pathId and pathId != ctxId:
+ raise HTTPException(
+ status_code=status.HTTP_403_FORBIDDEN,
+ detail="Feature instance id in URL does not match request context (X-Instance-Id)",
+ )
+
+
+def _fetchNeutralizationAttributes(context: RequestContext, fileId: Optional[str]) -> List[DataNeutralizerAttributes]:
+ service = NeutralizationPlayground(
+ context.user,
+ str(context.mandateId) if context.mandateId else "",
+ featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
+ )
+ return service.getAttributes(fileId)
+
+
# Create router for neutralization endpoints
router = APIRouter(
prefix="/api/neutralization",
@@ -208,15 +229,9 @@ def get_neutralization_attributes(
) -> List[DataNeutralizerAttributes]:
"""Get neutralization attributes, optionally filtered by file ID"""
try:
- service = NeutralizationPlayground(
- context.user,
- str(context.mandateId) if context.mandateId else "",
- featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None
- )
- attributes = service.getAttributes(fileId)
-
- return attributes
-
+ return _fetchNeutralizationAttributes(context, fileId)
+ except HTTPException:
+ raise
except Exception as e:
logger.error(f"Error getting neutralization attributes: {str(e)}")
raise HTTPException(
@@ -224,6 +239,72 @@ def get_neutralization_attributes(
detail=f"Error getting neutralization attributes: {str(e)}"
)
+
+@router.get("/{feature_instance_id}/attributes", response_model=List[DataNeutralizerAttributes])
+@limiter.limit("30/minute")
+def get_neutralization_attributes_scoped(
+ request: Request,
+ feature_instance_id: str = Path(..., description="Workspace / feature instance id (must match X-Instance-Id when set)"),
+ fileId: Optional[str] = Query(None, description="Filter by file ID"),
+ context: RequestContext = Depends(getRequestContext),
+) -> List[DataNeutralizerAttributes]:
+ """Same as GET /attributes; path includes instance id for workspace UI compatibility."""
+ _assertFeatureInstancePathMatchesContext(feature_instance_id, context)
+ try:
+ return _fetchNeutralizationAttributes(context, fileId)
+ except HTTPException:
+ raise
+ except Exception as e:
+ logger.error(f"Error getting neutralization attributes: {str(e)}")
+ raise HTTPException(
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+ detail=f"Error getting neutralization attributes: {str(e)}"
+ )
+
+@router.get("/snapshots", response_model=List[DataNeutralizationSnapshot])
+@limiter.limit("30/minute")
+def get_neutralization_snapshots(
+ request: Request,
+ context: RequestContext = Depends(getRequestContext),
+) -> List[DataNeutralizationSnapshot]:
+ """Return neutralized-text snapshots (full text with placeholders) for the current feature instance."""
+ try:
+ service = NeutralizationPlayground(
+ context.user,
+ str(context.mandateId) if context.mandateId else "",
+ featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
+ )
+ return service.getSnapshots()
+ except HTTPException:
+ raise
+ except Exception as e:
+ logger.error(f"Error getting neutralization snapshots: {e}")
+ raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e))
+
+
+@router.get("/{feature_instance_id}/snapshots", response_model=List[DataNeutralizationSnapshot])
+@limiter.limit("30/minute")
+def get_neutralization_snapshots_scoped(
+ request: Request,
+ feature_instance_id: str = Path(..., description="Workspace instance id (must match X-Instance-Id when set)"),
+ context: RequestContext = Depends(getRequestContext),
+) -> List[DataNeutralizationSnapshot]:
+ """Same as GET /snapshots; path includes instance id for workspace UI (explicit scope)."""
+ _assertFeatureInstancePathMatchesContext(feature_instance_id, context)
+ try:
+ service = NeutralizationPlayground(
+ context.user,
+ str(context.mandateId) if context.mandateId else "",
+ featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
+ )
+ return service.getSnapshots()
+ except HTTPException:
+ raise
+ except Exception as e:
+ logger.error(f"Error getting neutralization snapshots (scoped): {e}")
+ raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e))
+
+
@router.post("/process-sharepoint", response_model=Dict[str, Any])
@limiter.limit("5/minute")
async def process_sharepoint_files(
@@ -317,6 +398,21 @@ def get_neutralization_stats(
detail=f"Error getting neutralization stats: {str(e)}"
)
+def _deleteSingleNeutralizationAttribute(context: RequestContext, attributeId: str) -> Dict[str, str]:
+ service = NeutralizationPlayground(
+ context.user,
+ str(context.mandateId) if context.mandateId else "",
+ featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
+ )
+ success = service.deleteAttribute(attributeId)
+ if success:
+ return {"message": f"Attribute {attributeId} deleted"}
+ raise HTTPException(
+ status_code=status.HTTP_404_NOT_FOUND,
+ detail=f"Attribute {attributeId} not found",
+ )
+
+
@router.delete("/attributes/single/{attributeId}", response_model=Dict[str, str])
@limiter.limit("30/minute")
def deleteAttribute(
@@ -326,20 +422,7 @@ def deleteAttribute(
) -> Dict[str, str]:
"""Delete a single neutralization attribute by ID."""
try:
- service = NeutralizationPlayground(
- context.user,
- str(context.mandateId) if context.mandateId else "",
- featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None
- )
- success = service.deleteAttribute(attributeId)
-
- if success:
- return {"message": f"Attribute {attributeId} deleted"}
- else:
- raise HTTPException(
- status_code=status.HTTP_404_NOT_FOUND,
- detail=f"Attribute {attributeId} not found"
- )
+ return _deleteSingleNeutralizationAttribute(context, attributeId)
except HTTPException:
raise
except Exception as e:
@@ -347,6 +430,40 @@ def deleteAttribute(
raise HTTPException(status_code=500, detail=str(e))
+@router.delete("/{feature_instance_id}/attributes/single/{attributeId}", response_model=Dict[str, str])
+@limiter.limit("30/minute")
+def deleteAttributeScoped(
+ request: Request,
+ feature_instance_id: str = Path(..., description="Workspace / feature instance id"),
+ attributeId: str = Path(..., description="Attribute ID to delete"),
+ context: RequestContext = Depends(getRequestContext),
+) -> Dict[str, str]:
+ """Same as DELETE /attributes/single/{attributeId}; path includes instance id for workspace UI."""
+ _assertFeatureInstancePathMatchesContext(feature_instance_id, context)
+ try:
+ return _deleteSingleNeutralizationAttribute(context, attributeId)
+ except HTTPException:
+ raise
+ except Exception as e:
+ logger.error(f"Error deleting attribute: {str(e)}")
+ raise HTTPException(status_code=500, detail=str(e))
+
+
+def _retriggerNeutralizationBody(context: RequestContext, fileId: str) -> Dict[str, str]:
+ if not fileId:
+ raise HTTPException(
+ status_code=status.HTTP_400_BAD_REQUEST,
+ detail="fileId is required",
+ )
+ service = NeutralizationPlayground(
+ context.user,
+ str(context.mandateId) if context.mandateId else "",
+ featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
+ )
+ service.cleanupFileAttributes(fileId)
+ return {"message": f"Neutralization re-triggered for file {fileId}", "fileId": fileId}
+
+
@router.post("/retrigger", response_model=Dict[str, str])
@limiter.limit("10/minute")
def retriggerNeutralization(
@@ -356,20 +473,26 @@ def retriggerNeutralization(
) -> Dict[str, str]:
"""Re-trigger neutralization for a specific file."""
try:
- fileId = retriggerData.get("fileId", "")
- if not fileId:
- raise HTTPException(
- status_code=status.HTTP_400_BAD_REQUEST,
- detail="fileId is required"
- )
+ return _retriggerNeutralizationBody(context, retriggerData.get("fileId", ""))
+ except HTTPException:
+ raise
+ except Exception as e:
+ logger.error(f"Error re-triggering neutralization: {str(e)}")
+ raise HTTPException(status_code=500, detail=str(e))
- service = NeutralizationPlayground(
- context.user,
- str(context.mandateId) if context.mandateId else "",
- featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None
- )
- service.cleanupFileAttributes(fileId)
- return {"message": f"Neutralization re-triggered for file {fileId}", "fileId": fileId}
+
+@router.post("/{feature_instance_id}/retrigger", response_model=Dict[str, str])
+@limiter.limit("10/minute")
+def retriggerNeutralizationScoped(
+ request: Request,
+ feature_instance_id: str = Path(..., description="Workspace / feature instance id"),
+ retriggerData: Dict[str, str] = Body(...),
+ context: RequestContext = Depends(getRequestContext),
+) -> Dict[str, str]:
+ """Same as POST /retrigger; path includes instance id for workspace UI compatibility."""
+ _assertFeatureInstancePathMatchesContext(feature_instance_id, context)
+ try:
+ return _retriggerNeutralizationBody(context, retriggerData.get("fileId", ""))
except HTTPException:
raise
except Exception as e:
diff --git a/modules/features/neutralization/serviceNeutralization/mainServiceNeutralization.py b/modules/features/neutralization/serviceNeutralization/mainServiceNeutralization.py
index e583c60b..4c0842d4 100644
--- a/modules/features/neutralization/serviceNeutralization/mainServiceNeutralization.py
+++ b/modules/features/neutralization/serviceNeutralization/mainServiceNeutralization.py
@@ -60,6 +60,12 @@ class NeutralizationService:
mandateId=serviceCenter.mandateId or dbApp.mandateId,
featureInstanceId=getattr(serviceCenter, 'featureInstanceId', None) or getattr(dbApp, 'featureInstanceId', None)
)
+ elif serviceCenter and getattr(serviceCenter, "user", None):
+ self.interfaceNeutralizer = getNeutralizerInterface(
+ currentUser=serviceCenter.user,
+ mandateId=getattr(serviceCenter, 'mandateId', None) or getattr(serviceCenter, 'mandate_id', None),
+ featureInstanceId=getattr(serviceCenter, 'featureInstanceId', None) or getattr(serviceCenter, 'feature_instance_id', None),
+ )
namesList = NamesToParse if isinstance(NamesToParse, list) else []
self.NamesToParse = namesList
@@ -82,11 +88,213 @@ class NeutralizationService:
# Public API: process text or file
- def processText(self, text: str) -> Dict[str, Any]:
- """Neutralize a raw text string and return a standard result dict."""
- result = self._neutralizeText(text, 'text')
- self._persistAttributes(result.get('mapping', {}), None)
- return result
+ _NEUT_INSTRUCTION = (
+ "Analyze the following text and identify ALL sensitive content that must be neutralized:\n"
+ "1. Personal data (PII): names of persons, email addresses, phone numbers, "
+ "physical addresses, ID numbers, dates of birth, financial data (IBAN, account numbers), "
+ "social security numbers\n"
+ "2. Protected business logic: proprietary algorithms, trade secrets, confidential "
+ "processes, internal procedures, code snippets that reveal implementation details\n"
+ "3. Named entities: company names, product names, project names, brand names\n\n"
+ "Return ONLY a JSON array (no markdown, no explanation):\n"
+ '[{"text":"exact substring","type":"name|email|phone|address|id|financial|logic|company|product|location|other"}]\n\n'
+ "Rules:\n"
+ "- Every entry's 'text' must be an exact, verbatim substring of the input.\n"
+ "- Do NOT include generic words, common language constructs or non-sensitive terms.\n"
+ "- If nothing is sensitive, return [].\n\n"
+ )
+ _BYTES_PER_TOKEN = 3
+ _SELECTOR_MAX_RATIO = 0.8
+ _CHUNK_SAFETY_MARGIN = 0.9
+
+ def _resolveNeutModel(self):
+ """Query the model registry for the best NEUTRALIZATION_TEXT model.
+ Returns the model object (with contextLength etc.) or None."""
+ try:
+ from modules.aicore.aicoreModelRegistry import modelRegistry
+ from modules.aicore.aicoreModelSelector import modelSelector as _modSel
+ from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum
+
+ _models = modelRegistry.getAvailableModels()
+ _opts = AiCallOptions(operationType=OperationTypeEnum.NEUTRALIZATION_TEXT)
+ _failover = _modSel.getFailoverModelList("x", "", _opts, _models)
+ return _failover[0] if _failover else None
+ except Exception as _e:
+ logger.warning(f"_resolveNeutModel failed: {_e}")
+ return None
+
+ def _calcMaxChunkChars(self, model) -> int:
+ """Derive the maximum text-chunk size (in characters) from the selected
+ model's contextLength, mirroring the rules in aicoreModelSelector:
+ promptTokens = promptBytes / 3 must be <= contextLength * 0.8
+ Subtract the instruction overhead and apply a safety margin."""
+ if not model or getattr(model, 'contextLength', 0) <= 0:
+ return 5000
+ _instructionBytes = len(self._NEUT_INSTRUCTION.encode('utf-8')) + 30
+ _maxPromptBytes = int(model.contextLength * self._SELECTOR_MAX_RATIO * self._BYTES_PER_TOKEN)
+ _maxChunkChars = int((_maxPromptBytes - _instructionBytes) * self._CHUNK_SAFETY_MARGIN)
+ return max(_maxChunkChars, 500)
+
+ @staticmethod
+ def _splitTextIntoChunks(text: str, maxChars: int) -> List[str]:
+ """Split *text* into chunks of at most *maxChars*, preferring paragraph
+ then sentence boundaries so that the LLM sees coherent blocks."""
+ if len(text) <= maxChars:
+ return [text]
+
+ chunks: List[str] = []
+ remaining = text
+ while remaining:
+ if len(remaining) <= maxChars:
+ chunks.append(remaining)
+ break
+ _cut = maxChars
+ _para = remaining.rfind("\n\n", 0, _cut)
+ if _para > maxChars // 3:
+ _cut = _para + 2
+ else:
+ _nl = remaining.rfind("\n", 0, _cut)
+ if _nl > maxChars // 3:
+ _cut = _nl + 1
+ else:
+ _dot = remaining.rfind(". ", 0, _cut)
+ if _dot > maxChars // 3:
+ _cut = _dot + 2
+ else:
+ _sp = remaining.rfind(" ", 0, _cut)
+ if _sp > maxChars // 3:
+ _cut = _sp + 1
+ chunks.append(remaining[:_cut])
+ remaining = remaining[_cut:]
+ return chunks
+
+ async def _analyseChunk(self, aiService, chunkText: str) -> List[dict]:
+ """Send one chunk to the NEUTRALIZATION_TEXT model, return raw findings list."""
+ from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum
+
+ _prompt = self._NEUT_INSTRUCTION + "Text to analyze:\n---\n" + chunkText + "\n---"
+ _request = AiCallRequest(
+ prompt=_prompt,
+ options=AiCallOptions(operationType=OperationTypeEnum.NEUTRALIZATION_TEXT),
+ )
+ _response = await aiService.callAi(_request)
+ if not _response or not getattr(_response, 'content', None):
+ raise RuntimeError(
+ "Neutralization AI call returned no response "
+ "(no model available for NEUTRALIZATION_TEXT?)"
+ )
+ if getattr(_response, 'errorCount', 0) > 0 or getattr(_response, 'modelName', '') == 'error':
+ raise RuntimeError(
+ f"Neutralization AI call failed: {_response.content}"
+ )
+ _content = _response.content.strip()
+ if _content.startswith("```"):
+ _content = _content.split("\n", 1)[-1].rsplit("```", 1)[0].strip()
+ try:
+ return json.loads(_content)
+ except json.JSONDecodeError:
+ _bracket = _content.find("[")
+ if _bracket >= 0:
+ try:
+ return json.loads(_content[_bracket:])
+ except json.JSONDecodeError:
+ pass
+ return []
+
+ async def processTextAsync(self, text: str, fileId: Optional[str] = None) -> Dict[str, Any]:
+ """AI-powered text neutralization with automatic chunking.
+
+ If *text* exceeds the safe token budget for the neutralization model
+ it is split into smaller chunks, each analysed separately. Findings
+ are merged and de-duplicated before placeholder replacement.
+
+ Regex patterns run as a supplementary pass to catch anything the
+ model missed.
+ """
+ import uuid as _uuid
+
+ aiService = None
+ if self._getService:
+ try:
+ aiService = self._getService("ai")
+ except Exception:
+ pass
+
+ aiMapping: Dict[str, str] = {}
+
+ if not aiService or not hasattr(aiService, 'callAi'):
+ raise RuntimeError("Neutralization requires an AI service but none is available")
+
+ if text.strip():
+ _neutModel = self._resolveNeutModel()
+ _maxChunkChars = self._calcMaxChunkChars(_neutModel)
+ logger.info(
+ f"processTextAsync: model={getattr(_neutModel, 'name', '?')}, "
+ f"contextLength={getattr(_neutModel, 'contextLength', '?')} tokens, "
+ f"maxChunkChars={_maxChunkChars}"
+ )
+
+ _chunks = self._splitTextIntoChunks(text, _maxChunkChars)
+ if len(_chunks) > 1:
+ logger.info(
+ f"processTextAsync: text ({len(text)} chars) "
+ f"split into {len(_chunks)} chunk(s) of max {_maxChunkChars} chars"
+ )
+
+ for _chunkIdx, _chunkText in enumerate(_chunks):
+ _findings = await self._analyseChunk(aiService, _chunkText)
+ if not isinstance(_findings, list):
+ continue
+ for _f in _findings:
+ if not isinstance(_f, dict):
+ continue
+ _origText = _f.get("text", "")
+ _patType = _f.get("type", "other").lower()
+ if not _origText or _origText not in text:
+ continue
+ if _origText in aiMapping:
+ continue
+ _uid = str(_uuid.uuid4())
+ _placeholder = f"[{_patType}.{_uid}]"
+ aiMapping[_origText] = _placeholder
+
+ logger.info(f"AI neutralization found {len(aiMapping)} item(s)"
+ + (f" across {len(_chunks)} chunk(s)" if len(_chunks) > 1 else ""))
+
+ neutralizedText = text
+ for _orig, _ph in sorted(aiMapping.items(), key=lambda x: -len(x[0])):
+ neutralizedText = neutralizedText.replace(_orig, _ph)
+
+ regexMapping: Dict[str, str] = {}
+ finalText = neutralizedText
+
+ allMapping = {**aiMapping, **regexMapping}
+ if allMapping:
+ _loop = asyncio.get_event_loop()
+ await _loop.run_in_executor(
+ None, self._persistAttributes, allMapping, fileId
+ )
+ logger.debug(f"processTextAsync: {len(allMapping)} attribute(s) persisted")
+
+ return {
+ 'neutralized_text': finalText,
+ 'mapping': allMapping,
+ 'attributes': [
+ NeutralizationAttribute(original=k, placeholder=v)
+ for k, v in allMapping.items()
+ ],
+ 'processed_info': {'type': 'text', 'ai_findings': len(aiMapping), 'regex_findings': len(regexMapping)},
+ }
+
+ def processText(self, text: str, fileId: Optional[str] = None) -> Dict[str, Any]:
+ """Sync wrapper around processTextAsync. Propagates errors."""
+ try:
+ return asyncio.run(self.processTextAsync(text, fileId))
+ except RuntimeError as _re:
+ if "cannot be called from a running event loop" in str(_re):
+ loop = asyncio.get_event_loop()
+ return loop.run_until_complete(self.processTextAsync(text, fileId))
+ raise
def processFile(self, fileId: str) -> Dict[str, Any]:
"""Neutralize a file referenced by its fileId using component interface.
@@ -153,8 +361,7 @@ class NeutralizationService:
raise ValueError("Unable to decode file content as text.")
textContent = decoded
- result = self._neutralizeText(textContent, textType)
- self._persistAttributes(result.get('mapping', {}), fileId)
+ result = self.processText(textContent, fileId)
if fileName:
result['neutralized_file_name'] = f"neutralized_{fileName}"
result['file_id'] = fileId
@@ -319,6 +526,22 @@ class NeutralizationService:
return False
return self.interfaceNeutralizer.deleteNeutralizationAttributes(fileId)
+ def getSnapshots(self):
+ if not self.interfaceNeutralizer:
+ return []
+ return self.interfaceNeutralizer.getSnapshots()
+
+ def clearSnapshots(self) -> int:
+ if not self.interfaceNeutralizer:
+ return 0
+ return self.interfaceNeutralizer.clearSnapshots()
+
+ def saveSnapshot(self, sourceLabel: str, neutralizedText: str, placeholderCount: int = 0):
+ if not self.interfaceNeutralizer:
+ logger.warning("saveSnapshot: interfaceNeutralizer is None — snapshot not stored")
+ return None
+ return self.interfaceNeutralizer.createSnapshot(sourceLabel, neutralizedText, placeholderCount)
+
def _persistAttributes(self, mapping: Dict[str, str], fileId: Optional[str]) -> None:
"""Persist mapping to DB for resolve to work. mapping: originalText -> placeholder e.g. '[email.uuid]'"""
if not self.interfaceNeutralizer or not mapping:
@@ -393,7 +616,7 @@ class NeutralizationService:
except Exception as _imgErr:
logger.warning(f"Image check failed in binary file '{fileName}': {_imgErr}, removing (fail-safe)")
continue
- nr = self._neutralizeText(str(data), 'text' if type_group != 'table' else 'csv')
+ nr = await self.processTextAsync(str(data), fileId)
proc = nr.get('processed_info', {}) or {}
if isinstance(proc, dict) and proc.get('type') == 'error':
neutralization_error = proc.get('error', 'Neutralization failed')
@@ -402,7 +625,6 @@ class NeutralizationService:
all_mapping.update(mapping)
new_part = {**p, 'data': neu_text}
neutralized_parts.append(new_part)
- self._persistAttributes(all_mapping, fileId)
# 3. PDF: Use in-place only; no fallback to render
if mimeType == "application/pdf":
@@ -546,10 +768,31 @@ class NeutralizationService:
# Helper functions
+ def _neutralizeTextLight(self, text: str) -> Dict[str, Any]:
+ """Regex-only supplementary pass using already-initialised processors.
+
+ Unlike ``_neutralizeText`` this does **no** DB I/O
+ (``_reloadNamesFromConfig`` is skipped) so it is safe to call from
+ an async context without blocking the event-loop or risking a
+ DB-connection-pool deadlock during parallel document processing.
+ """
+ try:
+ data, mapping, replaced_fields, processed_info = self.textProcessor.processTextContent(text)
+ neutralized_text = str(data)
+ attributes = [NeutralizationAttribute(original=k, placeholder=v) for k, v in mapping.items()]
+ return NeutralizationResult(
+ neutralized_text=neutralized_text,
+ mapping=mapping,
+ attributes=attributes,
+ processed_info=processed_info,
+ ).model_dump()
+ except Exception as e:
+ logger.warning(f"_neutralizeTextLight error: {e}")
+ return {'neutralized_text': text, 'mapping': {}, 'attributes': [], 'processed_info': {'type': 'error', 'error': str(e)}}
+
def _neutralizeText(self, text: str, textType: str = None) -> Dict[str, Any]:
"""Process text and return unified dict for API consumption."""
try:
- # Reload names from config before processing to ensure we have the latest names
self._reloadNamesFromConfig()
# Auto-detect content type if not provided
diff --git a/modules/features/workspace/routeFeatureWorkspace.py b/modules/features/workspace/routeFeatureWorkspace.py
index 6271a8cd..1caa9707 100644
--- a/modules/features/workspace/routeFeatureWorkspace.py
+++ b/modules/features/workspace/routeFeatureWorkspace.py
@@ -689,6 +689,9 @@ async def _runWorkspaceAgent(
if allowedProviders:
aiService.services.allowedProviders = allowedProviders
+ logger.info(f"Workspace agent: allowedProviders={allowedProviders}")
+ else:
+ logger.debug("Workspace agent: no allowedProviders in request")
if requireNeutralization is not None:
ctx.requireNeutralization = requireNeutralization
diff --git a/modules/serviceCenter/services/serviceAgent/mainServiceAgent.py b/modules/serviceCenter/services/serviceAgent/mainServiceAgent.py
index 4529ede0..23de01e7 100644
--- a/modules/serviceCenter/services/serviceAgent/mainServiceAgent.py
+++ b/modules/serviceCenter/services/serviceAgent/mainServiceAgent.py
@@ -690,8 +690,8 @@ def _registerCoreTools(registry: ToolRegistry, services):
if _fileNeedNeutralize:
try:
_nSvc = services.getService("neutralization") if hasattr(services, "getService") else None
- if _nSvc and hasattr(_nSvc, 'processText'):
- _nResult = _nSvc.processText(text)
+ if _nSvc and hasattr(_nSvc, 'processTextAsync'):
+ _nResult = await _nSvc.processTextAsync(text, fileId)
if _nResult and _nResult.get("neutralized_text"):
text = _nResult["neutralized_text"]
logger.debug(f"readFile: neutralized text for file {fileId}")
@@ -3054,7 +3054,7 @@ def _registerCoreTools(registry: ToolRegistry, services):
if not neutralizationService.interfaceDbComponent:
neutralizationService.interfaceDbComponent = services.chat.interfaceDbComponent
if text:
- result = neutralizationService.processText(text)
+ result = await neutralizationService.processTextAsync(text, fileId or None)
else:
result = neutralizationService.processFile(fileId)
if result:
diff --git a/modules/serviceCenter/services/serviceAi/mainServiceAi.py b/modules/serviceCenter/services/serviceAi/mainServiceAi.py
index 9ff6437d..a9df1e9b 100644
--- a/modules/serviceCenter/services/serviceAi/mainServiceAi.py
+++ b/modules/serviceCenter/services/serviceAi/mainServiceAi.py
@@ -181,13 +181,11 @@ class AiService:
_wasNeutralized = False
_excludedDocs: List[str] = []
if self._shouldNeutralize(request):
- request, _wasNeutralized, _excludedDocs = self._neutralizeRequest(request)
+ request, _wasNeutralized, _excludedDocs = await self._neutralizeRequest(request)
if _excludedDocs:
logger.warning(f"Neutralization partial failures (continuing): {_excludedDocs}")
- # Set billing callback on aiObjects BEFORE the AI call
- # This callback is invoked by _callWithModel() after EVERY individual model call
- # For parallel content parts (e.g., 200 MB doc), each model call creates its own transaction
+ logger.debug("callAi: neutralization phase done, starting main AI call")
self.aiObjects.billingCallback = self._createBillingCallback()
try:
@@ -229,10 +227,11 @@ class AiService:
_wasNeutralized = False
_excludedDocs: List[str] = []
if self._shouldNeutralize(request):
- request, _wasNeutralized, _excludedDocs = self._neutralizeRequest(request)
+ request, _wasNeutralized, _excludedDocs = await self._neutralizeRequest(request)
if _excludedDocs:
logger.warning(f"Neutralization partial failures in stream (continuing): {_excludedDocs}")
+ logger.debug("callAiStream: neutralization phase done, starting main AI stream")
self.aiObjects.billingCallback = self._createBillingCallback()
try:
async for chunk in self.aiObjects.callWithTextContextStream(request):
@@ -557,6 +556,25 @@ detectedIntent-Werte:
# NEUTRALIZATION: Centralized prompt neutralization / response rehydration
# =========================================================================
+ async def _hasNeutralizationModel(self) -> bool:
+ """Fast check: is at least one model available for NEUTRALIZATION_TEXT
+ given the current effective provider list? No AI call is made."""
+ try:
+ from modules.aicore.aicoreModelRegistry import modelRegistry
+ from modules.aicore.aicoreModelSelector import modelSelector as _modSel
+ from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum
+
+ _models = modelRegistry.getAvailableModels()
+ _providers = self._calculateEffectiveProviders()
+ if _providers:
+ _models = [m for m in _models if m.connectorType in _providers]
+ _opts = AiCallOptions(operationType=OperationTypeEnum.NEUTRALIZATION_TEXT)
+ _failover = _modSel.getFailoverModelList("x", "", _opts, _models)
+ return bool(_failover)
+ except Exception as _e:
+ logger.warning(f"_hasNeutralizationModel check failed: {_e}")
+ return True
+
def _shouldNeutralize(self, request: AiCallRequest) -> bool:
"""Check if this AI request should have neutralization applied.
@@ -566,11 +584,17 @@ detectedIntent-Werte:
3. Per-request (request.requireNeutralization)
No source can override another's True with False.
+ Neutralization calls themselves (NEUTRALIZATION_TEXT / NEUTRALIZATION_IMAGE)
+ are never re-neutralized (recursion guard).
"""
try:
if not request.prompt and not request.messages and not request.context:
return False
+ _opType = request.options.operationType if request.options else None
+ if _opType in (OperationTypeEnum.NEUTRALIZATION_TEXT, OperationTypeEnum.NEUTRALIZATION_IMAGE):
+ return False
+
_sources = []
# Source 1: Feature-Instance config
@@ -600,11 +624,15 @@ detectedIntent-Werte:
logger.error(f"_shouldNeutralize check failed: {e} — defaulting to False")
return False
- def _neutralizeRequest(self, request: AiCallRequest) -> Tuple[AiCallRequest, bool, List[str]]:
- """Neutralize the prompt text and messages in an AiCallRequest.
+ async def _neutralizeRequest(self, request: AiCallRequest) -> Tuple[AiCallRequest, bool, List[str]]:
+ """Neutralize the prompt text and messages in an AiCallRequest (async).
Returns (modifiedRequest, wasNeutralized, excludedDocs).
+ Uses ``processTextAsync`` which calls AI with NEUTRALIZATION_TEXT
+ to identify PII, protected logic and names — then applies regex as
+ supplementary pass.
+
FAILSAFE behaviour when ``requireNeutralization is True`` (explicit):
- Service unavailable → raises (caller must not send raw data to AI).
- Prompt neutralization fails → raises.
@@ -619,7 +647,7 @@ detectedIntent-Werte:
excludedDocs: List[str] = []
neutralSvc = self._get_service("neutralization")
- if not neutralSvc or not hasattr(neutralSvc, 'processText'):
+ if not neutralSvc or not hasattr(neutralSvc, 'processTextAsync'):
if _hardMode:
raise RuntimeError("Neutralization explicitly required but service unavailable — AI call BLOCKED")
logger.warning("Neutralization required by config but service unavailable — continuing without neutralization")
@@ -627,13 +655,25 @@ detectedIntent-Werte:
return request, False, excludedDocs
_wasNeutralized = False
+ _snapshots: list = []
+
+ if _hardMode:
+ _hasNeutModel = await self._hasNeutralizationModel()
+ if not _hasNeutModel:
+ raise RuntimeError(
+ "Neutralisierung ist aktiviert, aber es ist kein AI-Modell für "
+ "NEUTRALIZATION_TEXT verfügbar. Bitte ein Modell für Neutralisierung "
+ "freigeben oder die Neutralisierung deaktivieren."
+ )
if request.prompt:
+ logger.debug(f"_neutralizeRequest: neutralizing prompt ({len(request.prompt)} chars)")
try:
- result = neutralSvc.processText(request.prompt)
+ result = await neutralSvc.processTextAsync(request.prompt)
if result and result.get("neutralized_text"):
request.prompt = result["neutralized_text"]
_wasNeutralized = True
+ _snapshots.append(("Prompt", result["neutralized_text"], len(result.get("mapping", {}))))
logger.debug("Neutralized prompt in AiCallRequest")
else:
if _hardMode:
@@ -649,11 +689,13 @@ detectedIntent-Werte:
excludedDocs.append(f"Prompt neutralization error: {e}")
if request.context:
+ logger.debug(f"_neutralizeRequest: neutralizing context ({len(request.context)} chars)")
try:
- result = neutralSvc.processText(request.context)
+ result = await neutralSvc.processTextAsync(request.context)
if result and result.get("neutralized_text"):
request.context = result["neutralized_text"]
_wasNeutralized = True
+ _snapshots.append(("Kontext", result["neutralized_text"], len(result.get("mapping", {}))))
logger.debug("Neutralized context in AiCallRequest")
else:
if _hardMode:
@@ -668,6 +710,9 @@ detectedIntent-Werte:
logger.warning(f"Neutralization of context failed: {e} — sending original context")
excludedDocs.append(f"Context neutralization error: {e}")
+ _msgCount = len(request.messages) if request.messages and isinstance(request.messages, list) else 0
+ if _msgCount:
+ logger.debug(f"_neutralizeRequest: neutralizing {_msgCount} message(s)")
if request.messages and isinstance(request.messages, list):
cleanMessages = []
for idx, msg in enumerate(request.messages):
@@ -680,27 +725,33 @@ detectedIntent-Werte:
cleanMessages.append(msg)
continue
try:
- result = neutralSvc.processText(content)
+ result = await neutralSvc.processTextAsync(content)
if result and result.get("neutralized_text"):
msg["content"] = result["neutralized_text"]
_wasNeutralized = True
+ _role = msg.get("role", "?")
+ _snapshots.append((f"Nachricht {idx+1} ({_role})", result["neutralized_text"], len(result.get("mapping", {}))))
cleanMessages.append(msg)
else:
if _hardMode:
- logger.warning(f"Message[{idx}] neutralization empty — REMOVING message (hard mode)")
- excludedDocs.append(f"Message[{idx}] neutralization failed; message REMOVED")
- else:
- logger.warning(f"Neutralization of message[{idx}] returned no neutralized_text — keeping original")
- excludedDocs.append(f"Message[{idx}] neutralization failed; original kept")
- cleanMessages.append(msg)
+ raise RuntimeError(
+ f"Neutralisierung von Nachricht {idx+1}/{_msgCount} schlug fehl "
+ f"(leere Antwort). Konversation kann nicht sicher gesendet werden."
+ )
+ logger.warning(f"Neutralization of message[{idx}] returned no neutralized_text — keeping original")
+ excludedDocs.append(f"Message[{idx}] neutralization failed; original kept")
+ cleanMessages.append(msg)
+ except RuntimeError:
+ raise
except Exception as e:
if _hardMode:
- logger.warning(f"Message[{idx}] neutralization error — REMOVING message (hard mode): {e}")
- excludedDocs.append(f"Message[{idx}] neutralization error; message REMOVED: {e}")
- else:
- logger.warning(f"Neutralization of message[{idx}] failed: {e} — keeping original")
- excludedDocs.append(f"Message[{idx}] neutralization error: {e}")
- cleanMessages.append(msg)
+ raise RuntimeError(
+ f"Neutralisierung von Nachricht {idx+1}/{_msgCount} schlug fehl: {e}. "
+ f"Konversation kann nicht sicher gesendet werden."
+ ) from e
+ logger.warning(f"Neutralization of message[{idx}] failed: {e} — keeping original")
+ excludedDocs.append(f"Message[{idx}] neutralization error: {e}")
+ cleanMessages.append(msg)
elif isinstance(content, list):
_cleanParts = []
for _partIdx, _part in enumerate(content):
@@ -710,23 +761,29 @@ detectedIntent-Werte:
_partType = _part.get("type", "")
if _partType == "text" and _part.get("text"):
try:
- _result = neutralSvc.processText(_part["text"])
+ _result = await neutralSvc.processTextAsync(_part["text"])
if _result and _result.get("neutralized_text"):
_part["text"] = _result["neutralized_text"]
_wasNeutralized = True
+ _role = msg.get("role", "?")
+ _snapshots.append((f"Nachricht {idx+1}.{_partIdx+1} ({_role})", _result["neutralized_text"], len(_result.get("mapping", {}))))
_cleanParts.append(_part)
else:
if _hardMode:
- logger.warning(f"Message[{idx}].content[{_partIdx}] text neutralization empty — REMOVING part")
- excludedDocs.append(f"Message[{idx}].content[{_partIdx}] text removed")
- else:
- _cleanParts.append(_part)
+ raise RuntimeError(
+ f"Neutralisierung von Nachricht {idx+1}, Teil {_partIdx+1} "
+ f"schlug fehl (leere Antwort)."
+ )
+ _cleanParts.append(_part)
+ except RuntimeError:
+ raise
except Exception as e:
if _hardMode:
- logger.warning(f"Message[{idx}].content[{_partIdx}] text neutralization error — REMOVING: {e}")
- excludedDocs.append(f"Message[{idx}].content[{_partIdx}] text error: {e}")
- else:
- _cleanParts.append(_part)
+ raise RuntimeError(
+ f"Neutralisierung von Nachricht {idx+1}, Teil {_partIdx+1} "
+ f"schlug fehl: {e}"
+ ) from e
+ _cleanParts.append(_part)
elif _partType == "image_url":
if _hardMode:
logger.warning(f"Message[{idx}].content[{_partIdx}] image_url — REMOVING (neutralization active)")
@@ -738,12 +795,12 @@ detectedIntent-Werte:
if _cleanParts:
msg["content"] = _cleanParts
cleanMessages.append(msg)
- elif _hardMode:
- logger.warning(f"Message[{idx}] all parts removed — REMOVING message")
- excludedDocs.append(f"Message[{idx}] fully removed after neutralization")
+ else:
+ cleanMessages.append(msg)
else:
cleanMessages.append(msg)
request.messages = cleanMessages
+ logger.debug(f"_neutralizeRequest: messages done, {len(cleanMessages)} kept of {_msgCount}")
if hasattr(request, 'contentParts') and request.contentParts:
_cleanParts = []
@@ -752,10 +809,11 @@ detectedIntent-Werte:
_data = getattr(_cp, 'data', '') or ''
if _tg in ('text', 'table') and _data:
try:
- _result = neutralSvc.processText(str(_data))
+ _result = await neutralSvc.processTextAsync(str(_data))
if _result and _result.get("neutralized_text"):
_cp.data = _result["neutralized_text"]
_wasNeutralized = True
+ _snapshots.append((f"Inhalt {_cpIdx+1} ({_tg})", _result["neutralized_text"], len(_result.get("mapping", {}))))
_cleanParts.append(_cp)
else:
if _hardMode:
@@ -778,7 +836,18 @@ detectedIntent-Werte:
else:
_cleanParts.append(_cp)
request.contentParts = _cleanParts
+ logger.debug(f"_neutralizeRequest: contentParts done, {len(_cleanParts)} kept")
+ if _snapshots and _wasNeutralized:
+ try:
+ neutralSvc.clearSnapshots()
+ for _label, _text, _phCount in _snapshots:
+ neutralSvc.saveSnapshot(_label, _text, _phCount)
+ logger.debug(f"_neutralizeRequest: saved {len(_snapshots)} snapshot(s)")
+ except Exception as _snapErr:
+ logger.warning(f"_neutralizeRequest: could not save snapshots: {_snapErr}")
+
+ logger.info(f"_neutralizeRequest complete: neutralized={_wasNeutralized}, excluded={len(excludedDocs)}")
return request, _wasNeutralized, excludedDocs
def _rehydrateResponse(self, responseText: str) -> str:
diff --git a/modules/serviceCenter/services/serviceGeneration/renderers/registry.py b/modules/serviceCenter/services/serviceGeneration/renderers/registry.py
index adb83275..553c16a1 100644
--- a/modules/serviceCenter/services/serviceGeneration/renderers/registry.py
+++ b/modules/serviceCenter/services/serviceGeneration/renderers/registry.py
@@ -11,6 +11,7 @@ import logging
import importlib
from typing import Dict, Type, List, Optional, Tuple
from .documentRendererBaseTemplate import BaseRenderer
+from .codeRendererBaseTemplate import BaseCodeRenderer
logger = logging.getLogger(__name__)
@@ -52,9 +53,9 @@ class RendererRegistry:
for attrName in dir(module):
attr = getattr(module, attrName)
- if (isinstance(attr, type) and
- issubclass(attr, BaseRenderer) and
- attr != BaseRenderer and
+ if (isinstance(attr, type) and
+ issubclass(attr, BaseRenderer) and
+ attr not in (BaseRenderer, BaseCodeRenderer) and
hasattr(attr, 'getSupportedFormats')):
self._registerRendererClass(attr)
@@ -72,6 +73,8 @@ class RendererRegistry:
"""Register a renderer class keyed by (format, outputStyle)."""
try:
supportedFormats = rendererClass.getSupportedFormats()
+ if not supportedFormats:
+ return
priority = rendererClass.getPriority() if hasattr(rendererClass, 'getPriority') else 0
for formatName in supportedFormats:
diff --git a/modules/serviceCenter/services/serviceKnowledge/mainServiceKnowledge.py b/modules/serviceCenter/services/serviceKnowledge/mainServiceKnowledge.py
index 49774a38..9404a567 100644
--- a/modules/serviceCenter/services/serviceKnowledge/mainServiceKnowledge.py
+++ b/modules/serviceCenter/services/serviceKnowledge/mainServiceKnowledge.py
@@ -158,7 +158,7 @@ class KnowledgeService:
if not _textContent:
continue
try:
- _neutralResult = _neutralSvc.processText(_textContent)
+ _neutralResult = await _neutralSvc.processTextAsync(_textContent, fileId)
if _neutralResult and _neutralResult.get("neutralized_text"):
_obj["data"] = _neutralResult["neutralized_text"]
_neutralizedObjects.append(_obj)
diff --git a/modules/workflows/methods/methodContext/actions/neutralizeData.py b/modules/workflows/methods/methodContext/actions/neutralizeData.py
index bd032cac..b0fc5c24 100644
--- a/modules/workflows/methods/methodContext/actions/neutralizeData.py
+++ b/modules/workflows/methods/methodContext/actions/neutralizeData.py
@@ -169,7 +169,7 @@ async def neutralizeData(self, parameters: Dict[str, Any]) -> ActionResult:
f"Neutralizing part {len(neutralizedParts) + 1} of document {i+1}"
)
- neutralizationResult = self.services.neutralization.processText(part.data)
+ neutralizationResult = await self.services.neutralization.processTextAsync(part.data)
if neutralizationResult and 'neutralized_text' in neutralizationResult:
neutralizedData = neutralizationResult['neutralized_text']