diff --git a/modules/aicore/aicoreBase.py b/modules/aicore/aicoreBase.py index 70dd67c4..e107beb3 100644 --- a/modules/aicore/aicoreBase.py +++ b/modules/aicore/aicoreBase.py @@ -18,7 +18,9 @@ from typing import List, Dict, Any, Optional, AsyncGenerator, Union from modules.datamodels.datamodelAi import AiModel, AiModelCall, AiModelResponse -_RETRY_AFTER_PATTERN = _re.compile(r"try again in (\d+(?:\.\d+)?)\s*s", _re.IGNORECASE) +_RETRY_AFTER_PATTERN = _re.compile( + r"(?:try again in|retry after)\s+(\d+(?:\.\d+)?)\s*s", _re.IGNORECASE +) def _parseRetryAfterSeconds(message: str) -> float: diff --git a/modules/aicore/aicorePluginPrivateLlm.py b/modules/aicore/aicorePluginPrivateLlm.py index 38baa35e..79853652 100644 --- a/modules/aicore/aicorePluginPrivateLlm.py +++ b/modules/aicore/aicorePluginPrivateLlm.py @@ -22,7 +22,7 @@ import time from typing import List, Optional, Dict, Any from fastapi import HTTPException from modules.shared.configuration import APP_CONFIG -from .aicoreBase import BaseConnectorAi +from .aicoreBase import BaseConnectorAi, RateLimitExceededException from modules.datamodels.datamodelAi import ( AiModel, PriorityEnum, @@ -370,6 +370,9 @@ class AiPrivateLlm(BaseConnectorAi): if response.status_code != 200: errorMessage = f"Private-LLM API error: {response.status_code} - {response.text}" + if response.status_code == 429: + logger.warning(errorMessage) + raise RateLimitExceededException(errorMessage) logger.error(errorMessage) raise HTTPException(status_code=500, detail=errorMessage) @@ -461,6 +464,9 @@ class AiPrivateLlm(BaseConnectorAi): if response.status_code != 200: errorMessage = f"Private-LLM API error: {response.status_code} - {response.text}" + if response.status_code == 429: + logger.warning(errorMessage) + raise RateLimitExceededException(errorMessage) logger.error(errorMessage) raise HTTPException(status_code=500, detail=errorMessage) diff --git a/modules/features/neutralization/datamodelFeatureNeutralizer.py b/modules/features/neutralization/datamodelFeatureNeutralizer.py index a8ed5981..cc111950 100644 --- a/modules/features/neutralization/datamodelFeatureNeutralizer.py +++ b/modules/features/neutralization/datamodelFeatureNeutralizer.py @@ -58,6 +58,17 @@ class DataNeutralizerAttributes(BaseModel): originalText: str = Field(description="Original text that was neutralized", json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": True}) fileId: Optional[str] = Field(default=None, description="ID of the file this attribute belongs to", json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": False}) patternType: str = Field(description="Type of pattern that matched (email, phone, name, etc.)", json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": True}) + + +class DataNeutralizationSnapshot(BaseModel): + """Stores the full neutralized text (with embedded placeholders) per source.""" + id: str = Field(default_factory=lambda: str(uuid.uuid4())) + mandateId: str = Field(description="Mandate scope") + featureInstanceId: str = Field(default="", description="Feature instance scope") + userId: str = Field(description="User who triggered neutralization") + sourceLabel: str = Field(description="Human label, e.g. 'Prompt', 'Kontext', 'Nachricht 3'") + neutralizedText: str = Field(description="Full text with [type.uuid] placeholders embedded") + placeholderCount: int = Field(default=0, description="Number of placeholders in the text") registerModelLabels( "DataNeutralizerAttributes", {"en": "Neutralized Data Attribute", "fr": "Attribut de données neutralisées"}, @@ -71,5 +82,18 @@ registerModelLabels( "patternType": {"en": "Pattern Type", "fr": "Type de modèle"}, }, ) +registerModelLabels( + "DataNeutralizationSnapshot", + {"en": "Neutralization Snapshot", "de": "Neutralisierungs-Snapshot"}, + { + "id": {"en": "ID"}, + "mandateId": {"en": "Mandate ID"}, + "featureInstanceId": {"en": "Feature Instance ID"}, + "userId": {"en": "User ID"}, + "sourceLabel": {"en": "Source", "de": "Quelle"}, + "neutralizedText": {"en": "Neutralized Text", "de": "Neutralisierter Text"}, + "placeholderCount": {"en": "Placeholders", "de": "Platzhalter"}, + }, +) diff --git a/modules/features/neutralization/interfaceFeatureNeutralizer.py b/modules/features/neutralization/interfaceFeatureNeutralizer.py index 1a52e130..22af9683 100644 --- a/modules/features/neutralization/interfaceFeatureNeutralizer.py +++ b/modules/features/neutralization/interfaceFeatureNeutralizer.py @@ -11,6 +11,7 @@ from typing import Dict, List, Any, Optional from modules.features.neutralization.datamodelFeatureNeutralizer import ( DataNeutraliserConfig, DataNeutralizerAttributes, + DataNeutralizationSnapshot, ) from modules.connectors.connectorDbPostgre import DatabaseConnector from modules.interfaces.interfaceRbac import getRecordsetWithRBAC @@ -227,6 +228,74 @@ class InterfaceFeatureNeutralizer: logger.error(f"Error deleting attribute by ID: {str(e)}") return False + # ------------------------------------------------------------------ + # Snapshot CRUD + # ------------------------------------------------------------------ + + def getSnapshots(self) -> List[DataNeutralizationSnapshot]: + """Return all neutralization snapshots for the current mandate + feature instance.""" + try: + _filter: Dict[str, Any] = {"mandateId": self.mandateId} + if self.featureInstanceId: + _filter["featureInstanceId"] = self.featureInstanceId + rows = getRecordsetWithRBAC( + self.db, + DataNeutralizationSnapshot, + self.currentUser, + recordFilter=_filter, + mandateId=self.mandateId, + ) + return [ + DataNeutralizationSnapshot(**{k: v for k, v in r.items() if not k.startswith("_")}) + for r in rows + ] + except Exception as e: + logger.error(f"Error getting snapshots: {e}") + return [] + + def clearSnapshots(self) -> int: + """Delete all snapshots for the current feature-instance scope. Returns count deleted.""" + try: + _filter: Dict[str, Any] = {"mandateId": self.mandateId} + if self.featureInstanceId: + _filter["featureInstanceId"] = self.featureInstanceId + existing = self.db.getRecordset(DataNeutralizationSnapshot, recordFilter=_filter) + for row in existing: + self.db.recordDelete(DataNeutralizationSnapshot, row["id"]) + return len(existing) + except Exception as e: + logger.error(f"Error clearing snapshots: {e}") + return 0 + + def createSnapshot( + self, + sourceLabel: str, + neutralizedText: str, + placeholderCount: int = 0, + ) -> Optional[DataNeutralizationSnapshot]: + """Persist one neutralization snapshot.""" + try: + if not self.userId: + logger.warning("Cannot create snapshot: missing userId") + return None + snap = DataNeutralizationSnapshot( + mandateId=self.mandateId or "", + featureInstanceId=self.featureInstanceId or "", + userId=self.userId, + sourceLabel=sourceLabel, + neutralizedText=neutralizedText, + placeholderCount=placeholderCount, + ) + created = self.db.recordCreate(DataNeutralizationSnapshot, snap.model_dump()) + return DataNeutralizationSnapshot(**{k: v for k, v in created.items() if not k.startswith("_")}) + except Exception as e: + logger.error(f"Error creating snapshot: {e}") + return None + + # ------------------------------------------------------------------ + # Attribute CRUD + # ------------------------------------------------------------------ + def createAttribute( self, attributeId: str, diff --git a/modules/features/neutralization/neutralizePlayground.py b/modules/features/neutralization/neutralizePlayground.py index b9b66fed..500cc1ba 100644 --- a/modules/features/neutralization/neutralizePlayground.py +++ b/modules/features/neutralization/neutralizePlayground.py @@ -6,7 +6,7 @@ from typing import Any, Dict, List, Optional from urllib.parse import urlparse, unquote from modules.datamodels.datamodelUam import User -from .datamodelFeatureNeutralizer import DataNeutralizerAttributes, DataNeutraliserConfig +from .datamodelFeatureNeutralizer import DataNeutralizerAttributes, DataNeutraliserConfig, DataNeutralizationSnapshot from .interfaceFeatureNeutralizer import getInterface as _getNeutralizerInterface from modules.serviceHub import getInterface as getServices @@ -86,7 +86,7 @@ class NeutralizationPlayground: 'neutralized_file_id': None, 'processed_info': {'type': 'error', 'error': 'File could not be decoded as text. Supported: UTF-8, Latin-1. For PDF/Word/Excel, use supported binary formats.'} } - result = self.services.neutralization.processText(text_content) + result = await self.services.neutralization.processTextAsync(text_content) result['neutralized_file_name'] = f'neutralized_{filename}' # Save neutralized text as file to user files if self.services.interfaceDbComponent and result.get('neutralized_text') is not None: @@ -198,12 +198,28 @@ class NeutralizationPlayground: """Resolve UIDs in neutralized text back to original text""" return self.services.neutralization.resolveText(text) + def getSnapshots(self) -> List[DataNeutralizationSnapshot]: + """Return stored neutralization text snapshots.""" + try: + return self.services.neutralization.getSnapshots() + except Exception as e: + logger.error(f"Error getting snapshots: {e}") + return [] + def getAttributes(self, fileId: str = None) -> List[DataNeutralizerAttributes]: """Get neutralization attributes, optionally filtered by file ID""" try: allAttributes = self.services.neutralization.getAttributes() if fileId: - return [attr for attr in allAttributes if attr.fileId == fileId] + want = str(fileId).strip() + + def _matches(a: DataNeutralizerAttributes) -> bool: + af = a.fileId + if af is None or (isinstance(af, str) and not str(af).strip()): + return False + return str(af).strip() == want + + return [attr for attr in allAttributes if _matches(attr)] return allAttributes except Exception as e: logger.error(f"Error getting attributes: {str(e)}") @@ -396,7 +412,7 @@ class SharepointProcessor: textContent = fileContent.decode('utf-8') except UnicodeDecodeError: textContent = fileContent.decode('latin-1') - result = self.services.neutralization.processText(textContent) + result = await self.services.neutralization.processTextAsync(textContent) content_to_upload = (result.get('neutralized_text') or '').encode('utf-8') neutralizedFilename = f"neutralized_{fileInfo['name']}" diff --git a/modules/features/neutralization/routeFeatureNeutralizer.py b/modules/features/neutralization/routeFeatureNeutralizer.py index 03d44f72..2f36efef 100644 --- a/modules/features/neutralization/routeFeatureNeutralizer.py +++ b/modules/features/neutralization/routeFeatureNeutralizer.py @@ -8,12 +8,33 @@ import logging from modules.auth import limiter, getRequestContext, RequestContext # Import interfaces -from .datamodelFeatureNeutralizer import DataNeutraliserConfig, DataNeutralizerAttributes +from .datamodelFeatureNeutralizer import DataNeutraliserConfig, DataNeutralizerAttributes, DataNeutralizationSnapshot from .neutralizePlayground import NeutralizationPlayground # Configure logger logger = logging.getLogger(__name__) + +def _assertFeatureInstancePathMatchesContext(featureInstanceIdFromPath: str, context: RequestContext) -> None: + """Reject path/instance mismatch when request context already carries an instance id.""" + ctxId = str(context.featureInstanceId).strip() if getattr(context, "featureInstanceId", None) else "" + pathId = (featureInstanceIdFromPath or "").strip() + if ctxId and pathId and pathId != ctxId: + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="Feature instance id in URL does not match request context (X-Instance-Id)", + ) + + +def _fetchNeutralizationAttributes(context: RequestContext, fileId: Optional[str]) -> List[DataNeutralizerAttributes]: + service = NeutralizationPlayground( + context.user, + str(context.mandateId) if context.mandateId else "", + featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None, + ) + return service.getAttributes(fileId) + + # Create router for neutralization endpoints router = APIRouter( prefix="/api/neutralization", @@ -208,15 +229,9 @@ def get_neutralization_attributes( ) -> List[DataNeutralizerAttributes]: """Get neutralization attributes, optionally filtered by file ID""" try: - service = NeutralizationPlayground( - context.user, - str(context.mandateId) if context.mandateId else "", - featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None - ) - attributes = service.getAttributes(fileId) - - return attributes - + return _fetchNeutralizationAttributes(context, fileId) + except HTTPException: + raise except Exception as e: logger.error(f"Error getting neutralization attributes: {str(e)}") raise HTTPException( @@ -224,6 +239,72 @@ def get_neutralization_attributes( detail=f"Error getting neutralization attributes: {str(e)}" ) + +@router.get("/{feature_instance_id}/attributes", response_model=List[DataNeutralizerAttributes]) +@limiter.limit("30/minute") +def get_neutralization_attributes_scoped( + request: Request, + feature_instance_id: str = Path(..., description="Workspace / feature instance id (must match X-Instance-Id when set)"), + fileId: Optional[str] = Query(None, description="Filter by file ID"), + context: RequestContext = Depends(getRequestContext), +) -> List[DataNeutralizerAttributes]: + """Same as GET /attributes; path includes instance id for workspace UI compatibility.""" + _assertFeatureInstancePathMatchesContext(feature_instance_id, context) + try: + return _fetchNeutralizationAttributes(context, fileId) + except HTTPException: + raise + except Exception as e: + logger.error(f"Error getting neutralization attributes: {str(e)}") + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=f"Error getting neutralization attributes: {str(e)}" + ) + +@router.get("/snapshots", response_model=List[DataNeutralizationSnapshot]) +@limiter.limit("30/minute") +def get_neutralization_snapshots( + request: Request, + context: RequestContext = Depends(getRequestContext), +) -> List[DataNeutralizationSnapshot]: + """Return neutralized-text snapshots (full text with placeholders) for the current feature instance.""" + try: + service = NeutralizationPlayground( + context.user, + str(context.mandateId) if context.mandateId else "", + featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None, + ) + return service.getSnapshots() + except HTTPException: + raise + except Exception as e: + logger.error(f"Error getting neutralization snapshots: {e}") + raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)) + + +@router.get("/{feature_instance_id}/snapshots", response_model=List[DataNeutralizationSnapshot]) +@limiter.limit("30/minute") +def get_neutralization_snapshots_scoped( + request: Request, + feature_instance_id: str = Path(..., description="Workspace instance id (must match X-Instance-Id when set)"), + context: RequestContext = Depends(getRequestContext), +) -> List[DataNeutralizationSnapshot]: + """Same as GET /snapshots; path includes instance id for workspace UI (explicit scope).""" + _assertFeatureInstancePathMatchesContext(feature_instance_id, context) + try: + service = NeutralizationPlayground( + context.user, + str(context.mandateId) if context.mandateId else "", + featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None, + ) + return service.getSnapshots() + except HTTPException: + raise + except Exception as e: + logger.error(f"Error getting neutralization snapshots (scoped): {e}") + raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)) + + @router.post("/process-sharepoint", response_model=Dict[str, Any]) @limiter.limit("5/minute") async def process_sharepoint_files( @@ -317,6 +398,21 @@ def get_neutralization_stats( detail=f"Error getting neutralization stats: {str(e)}" ) +def _deleteSingleNeutralizationAttribute(context: RequestContext, attributeId: str) -> Dict[str, str]: + service = NeutralizationPlayground( + context.user, + str(context.mandateId) if context.mandateId else "", + featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None, + ) + success = service.deleteAttribute(attributeId) + if success: + return {"message": f"Attribute {attributeId} deleted"} + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=f"Attribute {attributeId} not found", + ) + + @router.delete("/attributes/single/{attributeId}", response_model=Dict[str, str]) @limiter.limit("30/minute") def deleteAttribute( @@ -326,20 +422,7 @@ def deleteAttribute( ) -> Dict[str, str]: """Delete a single neutralization attribute by ID.""" try: - service = NeutralizationPlayground( - context.user, - str(context.mandateId) if context.mandateId else "", - featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None - ) - success = service.deleteAttribute(attributeId) - - if success: - return {"message": f"Attribute {attributeId} deleted"} - else: - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail=f"Attribute {attributeId} not found" - ) + return _deleteSingleNeutralizationAttribute(context, attributeId) except HTTPException: raise except Exception as e: @@ -347,6 +430,40 @@ def deleteAttribute( raise HTTPException(status_code=500, detail=str(e)) +@router.delete("/{feature_instance_id}/attributes/single/{attributeId}", response_model=Dict[str, str]) +@limiter.limit("30/minute") +def deleteAttributeScoped( + request: Request, + feature_instance_id: str = Path(..., description="Workspace / feature instance id"), + attributeId: str = Path(..., description="Attribute ID to delete"), + context: RequestContext = Depends(getRequestContext), +) -> Dict[str, str]: + """Same as DELETE /attributes/single/{attributeId}; path includes instance id for workspace UI.""" + _assertFeatureInstancePathMatchesContext(feature_instance_id, context) + try: + return _deleteSingleNeutralizationAttribute(context, attributeId) + except HTTPException: + raise + except Exception as e: + logger.error(f"Error deleting attribute: {str(e)}") + raise HTTPException(status_code=500, detail=str(e)) + + +def _retriggerNeutralizationBody(context: RequestContext, fileId: str) -> Dict[str, str]: + if not fileId: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="fileId is required", + ) + service = NeutralizationPlayground( + context.user, + str(context.mandateId) if context.mandateId else "", + featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None, + ) + service.cleanupFileAttributes(fileId) + return {"message": f"Neutralization re-triggered for file {fileId}", "fileId": fileId} + + @router.post("/retrigger", response_model=Dict[str, str]) @limiter.limit("10/minute") def retriggerNeutralization( @@ -356,20 +473,26 @@ def retriggerNeutralization( ) -> Dict[str, str]: """Re-trigger neutralization for a specific file.""" try: - fileId = retriggerData.get("fileId", "") - if not fileId: - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, - detail="fileId is required" - ) + return _retriggerNeutralizationBody(context, retriggerData.get("fileId", "")) + except HTTPException: + raise + except Exception as e: + logger.error(f"Error re-triggering neutralization: {str(e)}") + raise HTTPException(status_code=500, detail=str(e)) - service = NeutralizationPlayground( - context.user, - str(context.mandateId) if context.mandateId else "", - featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None - ) - service.cleanupFileAttributes(fileId) - return {"message": f"Neutralization re-triggered for file {fileId}", "fileId": fileId} + +@router.post("/{feature_instance_id}/retrigger", response_model=Dict[str, str]) +@limiter.limit("10/minute") +def retriggerNeutralizationScoped( + request: Request, + feature_instance_id: str = Path(..., description="Workspace / feature instance id"), + retriggerData: Dict[str, str] = Body(...), + context: RequestContext = Depends(getRequestContext), +) -> Dict[str, str]: + """Same as POST /retrigger; path includes instance id for workspace UI compatibility.""" + _assertFeatureInstancePathMatchesContext(feature_instance_id, context) + try: + return _retriggerNeutralizationBody(context, retriggerData.get("fileId", "")) except HTTPException: raise except Exception as e: diff --git a/modules/features/neutralization/serviceNeutralization/mainServiceNeutralization.py b/modules/features/neutralization/serviceNeutralization/mainServiceNeutralization.py index e583c60b..4c0842d4 100644 --- a/modules/features/neutralization/serviceNeutralization/mainServiceNeutralization.py +++ b/modules/features/neutralization/serviceNeutralization/mainServiceNeutralization.py @@ -60,6 +60,12 @@ class NeutralizationService: mandateId=serviceCenter.mandateId or dbApp.mandateId, featureInstanceId=getattr(serviceCenter, 'featureInstanceId', None) or getattr(dbApp, 'featureInstanceId', None) ) + elif serviceCenter and getattr(serviceCenter, "user", None): + self.interfaceNeutralizer = getNeutralizerInterface( + currentUser=serviceCenter.user, + mandateId=getattr(serviceCenter, 'mandateId', None) or getattr(serviceCenter, 'mandate_id', None), + featureInstanceId=getattr(serviceCenter, 'featureInstanceId', None) or getattr(serviceCenter, 'feature_instance_id', None), + ) namesList = NamesToParse if isinstance(NamesToParse, list) else [] self.NamesToParse = namesList @@ -82,11 +88,213 @@ class NeutralizationService: # Public API: process text or file - def processText(self, text: str) -> Dict[str, Any]: - """Neutralize a raw text string and return a standard result dict.""" - result = self._neutralizeText(text, 'text') - self._persistAttributes(result.get('mapping', {}), None) - return result + _NEUT_INSTRUCTION = ( + "Analyze the following text and identify ALL sensitive content that must be neutralized:\n" + "1. Personal data (PII): names of persons, email addresses, phone numbers, " + "physical addresses, ID numbers, dates of birth, financial data (IBAN, account numbers), " + "social security numbers\n" + "2. Protected business logic: proprietary algorithms, trade secrets, confidential " + "processes, internal procedures, code snippets that reveal implementation details\n" + "3. Named entities: company names, product names, project names, brand names\n\n" + "Return ONLY a JSON array (no markdown, no explanation):\n" + '[{"text":"exact substring","type":"name|email|phone|address|id|financial|logic|company|product|location|other"}]\n\n' + "Rules:\n" + "- Every entry's 'text' must be an exact, verbatim substring of the input.\n" + "- Do NOT include generic words, common language constructs or non-sensitive terms.\n" + "- If nothing is sensitive, return [].\n\n" + ) + _BYTES_PER_TOKEN = 3 + _SELECTOR_MAX_RATIO = 0.8 + _CHUNK_SAFETY_MARGIN = 0.9 + + def _resolveNeutModel(self): + """Query the model registry for the best NEUTRALIZATION_TEXT model. + Returns the model object (with contextLength etc.) or None.""" + try: + from modules.aicore.aicoreModelRegistry import modelRegistry + from modules.aicore.aicoreModelSelector import modelSelector as _modSel + from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum + + _models = modelRegistry.getAvailableModels() + _opts = AiCallOptions(operationType=OperationTypeEnum.NEUTRALIZATION_TEXT) + _failover = _modSel.getFailoverModelList("x", "", _opts, _models) + return _failover[0] if _failover else None + except Exception as _e: + logger.warning(f"_resolveNeutModel failed: {_e}") + return None + + def _calcMaxChunkChars(self, model) -> int: + """Derive the maximum text-chunk size (in characters) from the selected + model's contextLength, mirroring the rules in aicoreModelSelector: + promptTokens = promptBytes / 3 must be <= contextLength * 0.8 + Subtract the instruction overhead and apply a safety margin.""" + if not model or getattr(model, 'contextLength', 0) <= 0: + return 5000 + _instructionBytes = len(self._NEUT_INSTRUCTION.encode('utf-8')) + 30 + _maxPromptBytes = int(model.contextLength * self._SELECTOR_MAX_RATIO * self._BYTES_PER_TOKEN) + _maxChunkChars = int((_maxPromptBytes - _instructionBytes) * self._CHUNK_SAFETY_MARGIN) + return max(_maxChunkChars, 500) + + @staticmethod + def _splitTextIntoChunks(text: str, maxChars: int) -> List[str]: + """Split *text* into chunks of at most *maxChars*, preferring paragraph + then sentence boundaries so that the LLM sees coherent blocks.""" + if len(text) <= maxChars: + return [text] + + chunks: List[str] = [] + remaining = text + while remaining: + if len(remaining) <= maxChars: + chunks.append(remaining) + break + _cut = maxChars + _para = remaining.rfind("\n\n", 0, _cut) + if _para > maxChars // 3: + _cut = _para + 2 + else: + _nl = remaining.rfind("\n", 0, _cut) + if _nl > maxChars // 3: + _cut = _nl + 1 + else: + _dot = remaining.rfind(". ", 0, _cut) + if _dot > maxChars // 3: + _cut = _dot + 2 + else: + _sp = remaining.rfind(" ", 0, _cut) + if _sp > maxChars // 3: + _cut = _sp + 1 + chunks.append(remaining[:_cut]) + remaining = remaining[_cut:] + return chunks + + async def _analyseChunk(self, aiService, chunkText: str) -> List[dict]: + """Send one chunk to the NEUTRALIZATION_TEXT model, return raw findings list.""" + from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum + + _prompt = self._NEUT_INSTRUCTION + "Text to analyze:\n---\n" + chunkText + "\n---" + _request = AiCallRequest( + prompt=_prompt, + options=AiCallOptions(operationType=OperationTypeEnum.NEUTRALIZATION_TEXT), + ) + _response = await aiService.callAi(_request) + if not _response or not getattr(_response, 'content', None): + raise RuntimeError( + "Neutralization AI call returned no response " + "(no model available for NEUTRALIZATION_TEXT?)" + ) + if getattr(_response, 'errorCount', 0) > 0 or getattr(_response, 'modelName', '') == 'error': + raise RuntimeError( + f"Neutralization AI call failed: {_response.content}" + ) + _content = _response.content.strip() + if _content.startswith("```"): + _content = _content.split("\n", 1)[-1].rsplit("```", 1)[0].strip() + try: + return json.loads(_content) + except json.JSONDecodeError: + _bracket = _content.find("[") + if _bracket >= 0: + try: + return json.loads(_content[_bracket:]) + except json.JSONDecodeError: + pass + return [] + + async def processTextAsync(self, text: str, fileId: Optional[str] = None) -> Dict[str, Any]: + """AI-powered text neutralization with automatic chunking. + + If *text* exceeds the safe token budget for the neutralization model + it is split into smaller chunks, each analysed separately. Findings + are merged and de-duplicated before placeholder replacement. + + Regex patterns run as a supplementary pass to catch anything the + model missed. + """ + import uuid as _uuid + + aiService = None + if self._getService: + try: + aiService = self._getService("ai") + except Exception: + pass + + aiMapping: Dict[str, str] = {} + + if not aiService or not hasattr(aiService, 'callAi'): + raise RuntimeError("Neutralization requires an AI service but none is available") + + if text.strip(): + _neutModel = self._resolveNeutModel() + _maxChunkChars = self._calcMaxChunkChars(_neutModel) + logger.info( + f"processTextAsync: model={getattr(_neutModel, 'name', '?')}, " + f"contextLength={getattr(_neutModel, 'contextLength', '?')} tokens, " + f"maxChunkChars={_maxChunkChars}" + ) + + _chunks = self._splitTextIntoChunks(text, _maxChunkChars) + if len(_chunks) > 1: + logger.info( + f"processTextAsync: text ({len(text)} chars) " + f"split into {len(_chunks)} chunk(s) of max {_maxChunkChars} chars" + ) + + for _chunkIdx, _chunkText in enumerate(_chunks): + _findings = await self._analyseChunk(aiService, _chunkText) + if not isinstance(_findings, list): + continue + for _f in _findings: + if not isinstance(_f, dict): + continue + _origText = _f.get("text", "") + _patType = _f.get("type", "other").lower() + if not _origText or _origText not in text: + continue + if _origText in aiMapping: + continue + _uid = str(_uuid.uuid4()) + _placeholder = f"[{_patType}.{_uid}]" + aiMapping[_origText] = _placeholder + + logger.info(f"AI neutralization found {len(aiMapping)} item(s)" + + (f" across {len(_chunks)} chunk(s)" if len(_chunks) > 1 else "")) + + neutralizedText = text + for _orig, _ph in sorted(aiMapping.items(), key=lambda x: -len(x[0])): + neutralizedText = neutralizedText.replace(_orig, _ph) + + regexMapping: Dict[str, str] = {} + finalText = neutralizedText + + allMapping = {**aiMapping, **regexMapping} + if allMapping: + _loop = asyncio.get_event_loop() + await _loop.run_in_executor( + None, self._persistAttributes, allMapping, fileId + ) + logger.debug(f"processTextAsync: {len(allMapping)} attribute(s) persisted") + + return { + 'neutralized_text': finalText, + 'mapping': allMapping, + 'attributes': [ + NeutralizationAttribute(original=k, placeholder=v) + for k, v in allMapping.items() + ], + 'processed_info': {'type': 'text', 'ai_findings': len(aiMapping), 'regex_findings': len(regexMapping)}, + } + + def processText(self, text: str, fileId: Optional[str] = None) -> Dict[str, Any]: + """Sync wrapper around processTextAsync. Propagates errors.""" + try: + return asyncio.run(self.processTextAsync(text, fileId)) + except RuntimeError as _re: + if "cannot be called from a running event loop" in str(_re): + loop = asyncio.get_event_loop() + return loop.run_until_complete(self.processTextAsync(text, fileId)) + raise def processFile(self, fileId: str) -> Dict[str, Any]: """Neutralize a file referenced by its fileId using component interface. @@ -153,8 +361,7 @@ class NeutralizationService: raise ValueError("Unable to decode file content as text.") textContent = decoded - result = self._neutralizeText(textContent, textType) - self._persistAttributes(result.get('mapping', {}), fileId) + result = self.processText(textContent, fileId) if fileName: result['neutralized_file_name'] = f"neutralized_{fileName}" result['file_id'] = fileId @@ -319,6 +526,22 @@ class NeutralizationService: return False return self.interfaceNeutralizer.deleteNeutralizationAttributes(fileId) + def getSnapshots(self): + if not self.interfaceNeutralizer: + return [] + return self.interfaceNeutralizer.getSnapshots() + + def clearSnapshots(self) -> int: + if not self.interfaceNeutralizer: + return 0 + return self.interfaceNeutralizer.clearSnapshots() + + def saveSnapshot(self, sourceLabel: str, neutralizedText: str, placeholderCount: int = 0): + if not self.interfaceNeutralizer: + logger.warning("saveSnapshot: interfaceNeutralizer is None — snapshot not stored") + return None + return self.interfaceNeutralizer.createSnapshot(sourceLabel, neutralizedText, placeholderCount) + def _persistAttributes(self, mapping: Dict[str, str], fileId: Optional[str]) -> None: """Persist mapping to DB for resolve to work. mapping: originalText -> placeholder e.g. '[email.uuid]'""" if not self.interfaceNeutralizer or not mapping: @@ -393,7 +616,7 @@ class NeutralizationService: except Exception as _imgErr: logger.warning(f"Image check failed in binary file '{fileName}': {_imgErr}, removing (fail-safe)") continue - nr = self._neutralizeText(str(data), 'text' if type_group != 'table' else 'csv') + nr = await self.processTextAsync(str(data), fileId) proc = nr.get('processed_info', {}) or {} if isinstance(proc, dict) and proc.get('type') == 'error': neutralization_error = proc.get('error', 'Neutralization failed') @@ -402,7 +625,6 @@ class NeutralizationService: all_mapping.update(mapping) new_part = {**p, 'data': neu_text} neutralized_parts.append(new_part) - self._persistAttributes(all_mapping, fileId) # 3. PDF: Use in-place only; no fallback to render if mimeType == "application/pdf": @@ -546,10 +768,31 @@ class NeutralizationService: # Helper functions + def _neutralizeTextLight(self, text: str) -> Dict[str, Any]: + """Regex-only supplementary pass using already-initialised processors. + + Unlike ``_neutralizeText`` this does **no** DB I/O + (``_reloadNamesFromConfig`` is skipped) so it is safe to call from + an async context without blocking the event-loop or risking a + DB-connection-pool deadlock during parallel document processing. + """ + try: + data, mapping, replaced_fields, processed_info = self.textProcessor.processTextContent(text) + neutralized_text = str(data) + attributes = [NeutralizationAttribute(original=k, placeholder=v) for k, v in mapping.items()] + return NeutralizationResult( + neutralized_text=neutralized_text, + mapping=mapping, + attributes=attributes, + processed_info=processed_info, + ).model_dump() + except Exception as e: + logger.warning(f"_neutralizeTextLight error: {e}") + return {'neutralized_text': text, 'mapping': {}, 'attributes': [], 'processed_info': {'type': 'error', 'error': str(e)}} + def _neutralizeText(self, text: str, textType: str = None) -> Dict[str, Any]: """Process text and return unified dict for API consumption.""" try: - # Reload names from config before processing to ensure we have the latest names self._reloadNamesFromConfig() # Auto-detect content type if not provided diff --git a/modules/features/workspace/routeFeatureWorkspace.py b/modules/features/workspace/routeFeatureWorkspace.py index 6271a8cd..1caa9707 100644 --- a/modules/features/workspace/routeFeatureWorkspace.py +++ b/modules/features/workspace/routeFeatureWorkspace.py @@ -689,6 +689,9 @@ async def _runWorkspaceAgent( if allowedProviders: aiService.services.allowedProviders = allowedProviders + logger.info(f"Workspace agent: allowedProviders={allowedProviders}") + else: + logger.debug("Workspace agent: no allowedProviders in request") if requireNeutralization is not None: ctx.requireNeutralization = requireNeutralization diff --git a/modules/serviceCenter/services/serviceAgent/mainServiceAgent.py b/modules/serviceCenter/services/serviceAgent/mainServiceAgent.py index 4529ede0..23de01e7 100644 --- a/modules/serviceCenter/services/serviceAgent/mainServiceAgent.py +++ b/modules/serviceCenter/services/serviceAgent/mainServiceAgent.py @@ -690,8 +690,8 @@ def _registerCoreTools(registry: ToolRegistry, services): if _fileNeedNeutralize: try: _nSvc = services.getService("neutralization") if hasattr(services, "getService") else None - if _nSvc and hasattr(_nSvc, 'processText'): - _nResult = _nSvc.processText(text) + if _nSvc and hasattr(_nSvc, 'processTextAsync'): + _nResult = await _nSvc.processTextAsync(text, fileId) if _nResult and _nResult.get("neutralized_text"): text = _nResult["neutralized_text"] logger.debug(f"readFile: neutralized text for file {fileId}") @@ -3054,7 +3054,7 @@ def _registerCoreTools(registry: ToolRegistry, services): if not neutralizationService.interfaceDbComponent: neutralizationService.interfaceDbComponent = services.chat.interfaceDbComponent if text: - result = neutralizationService.processText(text) + result = await neutralizationService.processTextAsync(text, fileId or None) else: result = neutralizationService.processFile(fileId) if result: diff --git a/modules/serviceCenter/services/serviceAi/mainServiceAi.py b/modules/serviceCenter/services/serviceAi/mainServiceAi.py index 9ff6437d..a9df1e9b 100644 --- a/modules/serviceCenter/services/serviceAi/mainServiceAi.py +++ b/modules/serviceCenter/services/serviceAi/mainServiceAi.py @@ -181,13 +181,11 @@ class AiService: _wasNeutralized = False _excludedDocs: List[str] = [] if self._shouldNeutralize(request): - request, _wasNeutralized, _excludedDocs = self._neutralizeRequest(request) + request, _wasNeutralized, _excludedDocs = await self._neutralizeRequest(request) if _excludedDocs: logger.warning(f"Neutralization partial failures (continuing): {_excludedDocs}") - # Set billing callback on aiObjects BEFORE the AI call - # This callback is invoked by _callWithModel() after EVERY individual model call - # For parallel content parts (e.g., 200 MB doc), each model call creates its own transaction + logger.debug("callAi: neutralization phase done, starting main AI call") self.aiObjects.billingCallback = self._createBillingCallback() try: @@ -229,10 +227,11 @@ class AiService: _wasNeutralized = False _excludedDocs: List[str] = [] if self._shouldNeutralize(request): - request, _wasNeutralized, _excludedDocs = self._neutralizeRequest(request) + request, _wasNeutralized, _excludedDocs = await self._neutralizeRequest(request) if _excludedDocs: logger.warning(f"Neutralization partial failures in stream (continuing): {_excludedDocs}") + logger.debug("callAiStream: neutralization phase done, starting main AI stream") self.aiObjects.billingCallback = self._createBillingCallback() try: async for chunk in self.aiObjects.callWithTextContextStream(request): @@ -557,6 +556,25 @@ detectedIntent-Werte: # NEUTRALIZATION: Centralized prompt neutralization / response rehydration # ========================================================================= + async def _hasNeutralizationModel(self) -> bool: + """Fast check: is at least one model available for NEUTRALIZATION_TEXT + given the current effective provider list? No AI call is made.""" + try: + from modules.aicore.aicoreModelRegistry import modelRegistry + from modules.aicore.aicoreModelSelector import modelSelector as _modSel + from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum + + _models = modelRegistry.getAvailableModels() + _providers = self._calculateEffectiveProviders() + if _providers: + _models = [m for m in _models if m.connectorType in _providers] + _opts = AiCallOptions(operationType=OperationTypeEnum.NEUTRALIZATION_TEXT) + _failover = _modSel.getFailoverModelList("x", "", _opts, _models) + return bool(_failover) + except Exception as _e: + logger.warning(f"_hasNeutralizationModel check failed: {_e}") + return True + def _shouldNeutralize(self, request: AiCallRequest) -> bool: """Check if this AI request should have neutralization applied. @@ -566,11 +584,17 @@ detectedIntent-Werte: 3. Per-request (request.requireNeutralization) No source can override another's True with False. + Neutralization calls themselves (NEUTRALIZATION_TEXT / NEUTRALIZATION_IMAGE) + are never re-neutralized (recursion guard). """ try: if not request.prompt and not request.messages and not request.context: return False + _opType = request.options.operationType if request.options else None + if _opType in (OperationTypeEnum.NEUTRALIZATION_TEXT, OperationTypeEnum.NEUTRALIZATION_IMAGE): + return False + _sources = [] # Source 1: Feature-Instance config @@ -600,11 +624,15 @@ detectedIntent-Werte: logger.error(f"_shouldNeutralize check failed: {e} — defaulting to False") return False - def _neutralizeRequest(self, request: AiCallRequest) -> Tuple[AiCallRequest, bool, List[str]]: - """Neutralize the prompt text and messages in an AiCallRequest. + async def _neutralizeRequest(self, request: AiCallRequest) -> Tuple[AiCallRequest, bool, List[str]]: + """Neutralize the prompt text and messages in an AiCallRequest (async). Returns (modifiedRequest, wasNeutralized, excludedDocs). + Uses ``processTextAsync`` which calls AI with NEUTRALIZATION_TEXT + to identify PII, protected logic and names — then applies regex as + supplementary pass. + FAILSAFE behaviour when ``requireNeutralization is True`` (explicit): - Service unavailable → raises (caller must not send raw data to AI). - Prompt neutralization fails → raises. @@ -619,7 +647,7 @@ detectedIntent-Werte: excludedDocs: List[str] = [] neutralSvc = self._get_service("neutralization") - if not neutralSvc or not hasattr(neutralSvc, 'processText'): + if not neutralSvc or not hasattr(neutralSvc, 'processTextAsync'): if _hardMode: raise RuntimeError("Neutralization explicitly required but service unavailable — AI call BLOCKED") logger.warning("Neutralization required by config but service unavailable — continuing without neutralization") @@ -627,13 +655,25 @@ detectedIntent-Werte: return request, False, excludedDocs _wasNeutralized = False + _snapshots: list = [] + + if _hardMode: + _hasNeutModel = await self._hasNeutralizationModel() + if not _hasNeutModel: + raise RuntimeError( + "Neutralisierung ist aktiviert, aber es ist kein AI-Modell für " + "NEUTRALIZATION_TEXT verfügbar. Bitte ein Modell für Neutralisierung " + "freigeben oder die Neutralisierung deaktivieren." + ) if request.prompt: + logger.debug(f"_neutralizeRequest: neutralizing prompt ({len(request.prompt)} chars)") try: - result = neutralSvc.processText(request.prompt) + result = await neutralSvc.processTextAsync(request.prompt) if result and result.get("neutralized_text"): request.prompt = result["neutralized_text"] _wasNeutralized = True + _snapshots.append(("Prompt", result["neutralized_text"], len(result.get("mapping", {})))) logger.debug("Neutralized prompt in AiCallRequest") else: if _hardMode: @@ -649,11 +689,13 @@ detectedIntent-Werte: excludedDocs.append(f"Prompt neutralization error: {e}") if request.context: + logger.debug(f"_neutralizeRequest: neutralizing context ({len(request.context)} chars)") try: - result = neutralSvc.processText(request.context) + result = await neutralSvc.processTextAsync(request.context) if result and result.get("neutralized_text"): request.context = result["neutralized_text"] _wasNeutralized = True + _snapshots.append(("Kontext", result["neutralized_text"], len(result.get("mapping", {})))) logger.debug("Neutralized context in AiCallRequest") else: if _hardMode: @@ -668,6 +710,9 @@ detectedIntent-Werte: logger.warning(f"Neutralization of context failed: {e} — sending original context") excludedDocs.append(f"Context neutralization error: {e}") + _msgCount = len(request.messages) if request.messages and isinstance(request.messages, list) else 0 + if _msgCount: + logger.debug(f"_neutralizeRequest: neutralizing {_msgCount} message(s)") if request.messages and isinstance(request.messages, list): cleanMessages = [] for idx, msg in enumerate(request.messages): @@ -680,27 +725,33 @@ detectedIntent-Werte: cleanMessages.append(msg) continue try: - result = neutralSvc.processText(content) + result = await neutralSvc.processTextAsync(content) if result and result.get("neutralized_text"): msg["content"] = result["neutralized_text"] _wasNeutralized = True + _role = msg.get("role", "?") + _snapshots.append((f"Nachricht {idx+1} ({_role})", result["neutralized_text"], len(result.get("mapping", {})))) cleanMessages.append(msg) else: if _hardMode: - logger.warning(f"Message[{idx}] neutralization empty — REMOVING message (hard mode)") - excludedDocs.append(f"Message[{idx}] neutralization failed; message REMOVED") - else: - logger.warning(f"Neutralization of message[{idx}] returned no neutralized_text — keeping original") - excludedDocs.append(f"Message[{idx}] neutralization failed; original kept") - cleanMessages.append(msg) + raise RuntimeError( + f"Neutralisierung von Nachricht {idx+1}/{_msgCount} schlug fehl " + f"(leere Antwort). Konversation kann nicht sicher gesendet werden." + ) + logger.warning(f"Neutralization of message[{idx}] returned no neutralized_text — keeping original") + excludedDocs.append(f"Message[{idx}] neutralization failed; original kept") + cleanMessages.append(msg) + except RuntimeError: + raise except Exception as e: if _hardMode: - logger.warning(f"Message[{idx}] neutralization error — REMOVING message (hard mode): {e}") - excludedDocs.append(f"Message[{idx}] neutralization error; message REMOVED: {e}") - else: - logger.warning(f"Neutralization of message[{idx}] failed: {e} — keeping original") - excludedDocs.append(f"Message[{idx}] neutralization error: {e}") - cleanMessages.append(msg) + raise RuntimeError( + f"Neutralisierung von Nachricht {idx+1}/{_msgCount} schlug fehl: {e}. " + f"Konversation kann nicht sicher gesendet werden." + ) from e + logger.warning(f"Neutralization of message[{idx}] failed: {e} — keeping original") + excludedDocs.append(f"Message[{idx}] neutralization error: {e}") + cleanMessages.append(msg) elif isinstance(content, list): _cleanParts = [] for _partIdx, _part in enumerate(content): @@ -710,23 +761,29 @@ detectedIntent-Werte: _partType = _part.get("type", "") if _partType == "text" and _part.get("text"): try: - _result = neutralSvc.processText(_part["text"]) + _result = await neutralSvc.processTextAsync(_part["text"]) if _result and _result.get("neutralized_text"): _part["text"] = _result["neutralized_text"] _wasNeutralized = True + _role = msg.get("role", "?") + _snapshots.append((f"Nachricht {idx+1}.{_partIdx+1} ({_role})", _result["neutralized_text"], len(_result.get("mapping", {})))) _cleanParts.append(_part) else: if _hardMode: - logger.warning(f"Message[{idx}].content[{_partIdx}] text neutralization empty — REMOVING part") - excludedDocs.append(f"Message[{idx}].content[{_partIdx}] text removed") - else: - _cleanParts.append(_part) + raise RuntimeError( + f"Neutralisierung von Nachricht {idx+1}, Teil {_partIdx+1} " + f"schlug fehl (leere Antwort)." + ) + _cleanParts.append(_part) + except RuntimeError: + raise except Exception as e: if _hardMode: - logger.warning(f"Message[{idx}].content[{_partIdx}] text neutralization error — REMOVING: {e}") - excludedDocs.append(f"Message[{idx}].content[{_partIdx}] text error: {e}") - else: - _cleanParts.append(_part) + raise RuntimeError( + f"Neutralisierung von Nachricht {idx+1}, Teil {_partIdx+1} " + f"schlug fehl: {e}" + ) from e + _cleanParts.append(_part) elif _partType == "image_url": if _hardMode: logger.warning(f"Message[{idx}].content[{_partIdx}] image_url — REMOVING (neutralization active)") @@ -738,12 +795,12 @@ detectedIntent-Werte: if _cleanParts: msg["content"] = _cleanParts cleanMessages.append(msg) - elif _hardMode: - logger.warning(f"Message[{idx}] all parts removed — REMOVING message") - excludedDocs.append(f"Message[{idx}] fully removed after neutralization") + else: + cleanMessages.append(msg) else: cleanMessages.append(msg) request.messages = cleanMessages + logger.debug(f"_neutralizeRequest: messages done, {len(cleanMessages)} kept of {_msgCount}") if hasattr(request, 'contentParts') and request.contentParts: _cleanParts = [] @@ -752,10 +809,11 @@ detectedIntent-Werte: _data = getattr(_cp, 'data', '') or '' if _tg in ('text', 'table') and _data: try: - _result = neutralSvc.processText(str(_data)) + _result = await neutralSvc.processTextAsync(str(_data)) if _result and _result.get("neutralized_text"): _cp.data = _result["neutralized_text"] _wasNeutralized = True + _snapshots.append((f"Inhalt {_cpIdx+1} ({_tg})", _result["neutralized_text"], len(_result.get("mapping", {})))) _cleanParts.append(_cp) else: if _hardMode: @@ -778,7 +836,18 @@ detectedIntent-Werte: else: _cleanParts.append(_cp) request.contentParts = _cleanParts + logger.debug(f"_neutralizeRequest: contentParts done, {len(_cleanParts)} kept") + if _snapshots and _wasNeutralized: + try: + neutralSvc.clearSnapshots() + for _label, _text, _phCount in _snapshots: + neutralSvc.saveSnapshot(_label, _text, _phCount) + logger.debug(f"_neutralizeRequest: saved {len(_snapshots)} snapshot(s)") + except Exception as _snapErr: + logger.warning(f"_neutralizeRequest: could not save snapshots: {_snapErr}") + + logger.info(f"_neutralizeRequest complete: neutralized={_wasNeutralized}, excluded={len(excludedDocs)}") return request, _wasNeutralized, excludedDocs def _rehydrateResponse(self, responseText: str) -> str: diff --git a/modules/serviceCenter/services/serviceGeneration/renderers/registry.py b/modules/serviceCenter/services/serviceGeneration/renderers/registry.py index adb83275..553c16a1 100644 --- a/modules/serviceCenter/services/serviceGeneration/renderers/registry.py +++ b/modules/serviceCenter/services/serviceGeneration/renderers/registry.py @@ -11,6 +11,7 @@ import logging import importlib from typing import Dict, Type, List, Optional, Tuple from .documentRendererBaseTemplate import BaseRenderer +from .codeRendererBaseTemplate import BaseCodeRenderer logger = logging.getLogger(__name__) @@ -52,9 +53,9 @@ class RendererRegistry: for attrName in dir(module): attr = getattr(module, attrName) - if (isinstance(attr, type) and - issubclass(attr, BaseRenderer) and - attr != BaseRenderer and + if (isinstance(attr, type) and + issubclass(attr, BaseRenderer) and + attr not in (BaseRenderer, BaseCodeRenderer) and hasattr(attr, 'getSupportedFormats')): self._registerRendererClass(attr) @@ -72,6 +73,8 @@ class RendererRegistry: """Register a renderer class keyed by (format, outputStyle).""" try: supportedFormats = rendererClass.getSupportedFormats() + if not supportedFormats: + return priority = rendererClass.getPriority() if hasattr(rendererClass, 'getPriority') else 0 for formatName in supportedFormats: diff --git a/modules/serviceCenter/services/serviceKnowledge/mainServiceKnowledge.py b/modules/serviceCenter/services/serviceKnowledge/mainServiceKnowledge.py index 49774a38..9404a567 100644 --- a/modules/serviceCenter/services/serviceKnowledge/mainServiceKnowledge.py +++ b/modules/serviceCenter/services/serviceKnowledge/mainServiceKnowledge.py @@ -158,7 +158,7 @@ class KnowledgeService: if not _textContent: continue try: - _neutralResult = _neutralSvc.processText(_textContent) + _neutralResult = await _neutralSvc.processTextAsync(_textContent, fileId) if _neutralResult and _neutralResult.get("neutralized_text"): _obj["data"] = _neutralResult["neutralized_text"] _neutralizedObjects.append(_obj) diff --git a/modules/workflows/methods/methodContext/actions/neutralizeData.py b/modules/workflows/methods/methodContext/actions/neutralizeData.py index bd032cac..b0fc5c24 100644 --- a/modules/workflows/methods/methodContext/actions/neutralizeData.py +++ b/modules/workflows/methods/methodContext/actions/neutralizeData.py @@ -169,7 +169,7 @@ async def neutralizeData(self, parameters: Dict[str, Any]) -> ActionResult: f"Neutralizing part {len(neutralizedParts) + 1} of document {i+1}" ) - neutralizationResult = self.services.neutralization.processText(part.data) + neutralizationResult = await self.services.neutralization.processTextAsync(part.data) if neutralizationResult and 'neutralized_text' in neutralizationResult: neutralizedData = neutralizationResult['neutralized_text']