streamlined neutralization flow
This commit is contained in:
parent
7e88005721
commit
e0a09ae6b1
13 changed files with 655 additions and 97 deletions
|
|
@ -18,7 +18,9 @@ from typing import List, Dict, Any, Optional, AsyncGenerator, Union
|
|||
from modules.datamodels.datamodelAi import AiModel, AiModelCall, AiModelResponse
|
||||
|
||||
|
||||
_RETRY_AFTER_PATTERN = _re.compile(r"try again in (\d+(?:\.\d+)?)\s*s", _re.IGNORECASE)
|
||||
_RETRY_AFTER_PATTERN = _re.compile(
|
||||
r"(?:try again in|retry after)\s+(\d+(?:\.\d+)?)\s*s", _re.IGNORECASE
|
||||
)
|
||||
|
||||
|
||||
def _parseRetryAfterSeconds(message: str) -> float:
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ import time
|
|||
from typing import List, Optional, Dict, Any
|
||||
from fastapi import HTTPException
|
||||
from modules.shared.configuration import APP_CONFIG
|
||||
from .aicoreBase import BaseConnectorAi
|
||||
from .aicoreBase import BaseConnectorAi, RateLimitExceededException
|
||||
from modules.datamodels.datamodelAi import (
|
||||
AiModel,
|
||||
PriorityEnum,
|
||||
|
|
@ -370,6 +370,9 @@ class AiPrivateLlm(BaseConnectorAi):
|
|||
|
||||
if response.status_code != 200:
|
||||
errorMessage = f"Private-LLM API error: {response.status_code} - {response.text}"
|
||||
if response.status_code == 429:
|
||||
logger.warning(errorMessage)
|
||||
raise RateLimitExceededException(errorMessage)
|
||||
logger.error(errorMessage)
|
||||
raise HTTPException(status_code=500, detail=errorMessage)
|
||||
|
||||
|
|
@ -461,6 +464,9 @@ class AiPrivateLlm(BaseConnectorAi):
|
|||
|
||||
if response.status_code != 200:
|
||||
errorMessage = f"Private-LLM API error: {response.status_code} - {response.text}"
|
||||
if response.status_code == 429:
|
||||
logger.warning(errorMessage)
|
||||
raise RateLimitExceededException(errorMessage)
|
||||
logger.error(errorMessage)
|
||||
raise HTTPException(status_code=500, detail=errorMessage)
|
||||
|
||||
|
|
|
|||
|
|
@ -58,6 +58,17 @@ class DataNeutralizerAttributes(BaseModel):
|
|||
originalText: str = Field(description="Original text that was neutralized", json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": True})
|
||||
fileId: Optional[str] = Field(default=None, description="ID of the file this attribute belongs to", json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": False})
|
||||
patternType: str = Field(description="Type of pattern that matched (email, phone, name, etc.)", json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": True})
|
||||
|
||||
|
||||
class DataNeutralizationSnapshot(BaseModel):
|
||||
"""Stores the full neutralized text (with embedded placeholders) per source."""
|
||||
id: str = Field(default_factory=lambda: str(uuid.uuid4()))
|
||||
mandateId: str = Field(description="Mandate scope")
|
||||
featureInstanceId: str = Field(default="", description="Feature instance scope")
|
||||
userId: str = Field(description="User who triggered neutralization")
|
||||
sourceLabel: str = Field(description="Human label, e.g. 'Prompt', 'Kontext', 'Nachricht 3'")
|
||||
neutralizedText: str = Field(description="Full text with [type.uuid] placeholders embedded")
|
||||
placeholderCount: int = Field(default=0, description="Number of placeholders in the text")
|
||||
registerModelLabels(
|
||||
"DataNeutralizerAttributes",
|
||||
{"en": "Neutralized Data Attribute", "fr": "Attribut de données neutralisées"},
|
||||
|
|
@ -71,5 +82,18 @@ registerModelLabels(
|
|||
"patternType": {"en": "Pattern Type", "fr": "Type de modèle"},
|
||||
},
|
||||
)
|
||||
registerModelLabels(
|
||||
"DataNeutralizationSnapshot",
|
||||
{"en": "Neutralization Snapshot", "de": "Neutralisierungs-Snapshot"},
|
||||
{
|
||||
"id": {"en": "ID"},
|
||||
"mandateId": {"en": "Mandate ID"},
|
||||
"featureInstanceId": {"en": "Feature Instance ID"},
|
||||
"userId": {"en": "User ID"},
|
||||
"sourceLabel": {"en": "Source", "de": "Quelle"},
|
||||
"neutralizedText": {"en": "Neutralized Text", "de": "Neutralisierter Text"},
|
||||
"placeholderCount": {"en": "Placeholders", "de": "Platzhalter"},
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ from typing import Dict, List, Any, Optional
|
|||
from modules.features.neutralization.datamodelFeatureNeutralizer import (
|
||||
DataNeutraliserConfig,
|
||||
DataNeutralizerAttributes,
|
||||
DataNeutralizationSnapshot,
|
||||
)
|
||||
from modules.connectors.connectorDbPostgre import DatabaseConnector
|
||||
from modules.interfaces.interfaceRbac import getRecordsetWithRBAC
|
||||
|
|
@ -227,6 +228,74 @@ class InterfaceFeatureNeutralizer:
|
|||
logger.error(f"Error deleting attribute by ID: {str(e)}")
|
||||
return False
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Snapshot CRUD
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def getSnapshots(self) -> List[DataNeutralizationSnapshot]:
|
||||
"""Return all neutralization snapshots for the current mandate + feature instance."""
|
||||
try:
|
||||
_filter: Dict[str, Any] = {"mandateId": self.mandateId}
|
||||
if self.featureInstanceId:
|
||||
_filter["featureInstanceId"] = self.featureInstanceId
|
||||
rows = getRecordsetWithRBAC(
|
||||
self.db,
|
||||
DataNeutralizationSnapshot,
|
||||
self.currentUser,
|
||||
recordFilter=_filter,
|
||||
mandateId=self.mandateId,
|
||||
)
|
||||
return [
|
||||
DataNeutralizationSnapshot(**{k: v for k, v in r.items() if not k.startswith("_")})
|
||||
for r in rows
|
||||
]
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting snapshots: {e}")
|
||||
return []
|
||||
|
||||
def clearSnapshots(self) -> int:
|
||||
"""Delete all snapshots for the current feature-instance scope. Returns count deleted."""
|
||||
try:
|
||||
_filter: Dict[str, Any] = {"mandateId": self.mandateId}
|
||||
if self.featureInstanceId:
|
||||
_filter["featureInstanceId"] = self.featureInstanceId
|
||||
existing = self.db.getRecordset(DataNeutralizationSnapshot, recordFilter=_filter)
|
||||
for row in existing:
|
||||
self.db.recordDelete(DataNeutralizationSnapshot, row["id"])
|
||||
return len(existing)
|
||||
except Exception as e:
|
||||
logger.error(f"Error clearing snapshots: {e}")
|
||||
return 0
|
||||
|
||||
def createSnapshot(
|
||||
self,
|
||||
sourceLabel: str,
|
||||
neutralizedText: str,
|
||||
placeholderCount: int = 0,
|
||||
) -> Optional[DataNeutralizationSnapshot]:
|
||||
"""Persist one neutralization snapshot."""
|
||||
try:
|
||||
if not self.userId:
|
||||
logger.warning("Cannot create snapshot: missing userId")
|
||||
return None
|
||||
snap = DataNeutralizationSnapshot(
|
||||
mandateId=self.mandateId or "",
|
||||
featureInstanceId=self.featureInstanceId or "",
|
||||
userId=self.userId,
|
||||
sourceLabel=sourceLabel,
|
||||
neutralizedText=neutralizedText,
|
||||
placeholderCount=placeholderCount,
|
||||
)
|
||||
created = self.db.recordCreate(DataNeutralizationSnapshot, snap.model_dump())
|
||||
return DataNeutralizationSnapshot(**{k: v for k, v in created.items() if not k.startswith("_")})
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating snapshot: {e}")
|
||||
return None
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Attribute CRUD
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def createAttribute(
|
||||
self,
|
||||
attributeId: str,
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ from typing import Any, Dict, List, Optional
|
|||
from urllib.parse import urlparse, unquote
|
||||
|
||||
from modules.datamodels.datamodelUam import User
|
||||
from .datamodelFeatureNeutralizer import DataNeutralizerAttributes, DataNeutraliserConfig
|
||||
from .datamodelFeatureNeutralizer import DataNeutralizerAttributes, DataNeutraliserConfig, DataNeutralizationSnapshot
|
||||
from .interfaceFeatureNeutralizer import getInterface as _getNeutralizerInterface
|
||||
from modules.serviceHub import getInterface as getServices
|
||||
|
||||
|
|
@ -86,7 +86,7 @@ class NeutralizationPlayground:
|
|||
'neutralized_file_id': None,
|
||||
'processed_info': {'type': 'error', 'error': 'File could not be decoded as text. Supported: UTF-8, Latin-1. For PDF/Word/Excel, use supported binary formats.'}
|
||||
}
|
||||
result = self.services.neutralization.processText(text_content)
|
||||
result = await self.services.neutralization.processTextAsync(text_content)
|
||||
result['neutralized_file_name'] = f'neutralized_{filename}'
|
||||
# Save neutralized text as file to user files
|
||||
if self.services.interfaceDbComponent and result.get('neutralized_text') is not None:
|
||||
|
|
@ -198,12 +198,28 @@ class NeutralizationPlayground:
|
|||
"""Resolve UIDs in neutralized text back to original text"""
|
||||
return self.services.neutralization.resolveText(text)
|
||||
|
||||
def getSnapshots(self) -> List[DataNeutralizationSnapshot]:
|
||||
"""Return stored neutralization text snapshots."""
|
||||
try:
|
||||
return self.services.neutralization.getSnapshots()
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting snapshots: {e}")
|
||||
return []
|
||||
|
||||
def getAttributes(self, fileId: str = None) -> List[DataNeutralizerAttributes]:
|
||||
"""Get neutralization attributes, optionally filtered by file ID"""
|
||||
try:
|
||||
allAttributes = self.services.neutralization.getAttributes()
|
||||
if fileId:
|
||||
return [attr for attr in allAttributes if attr.fileId == fileId]
|
||||
want = str(fileId).strip()
|
||||
|
||||
def _matches(a: DataNeutralizerAttributes) -> bool:
|
||||
af = a.fileId
|
||||
if af is None or (isinstance(af, str) and not str(af).strip()):
|
||||
return False
|
||||
return str(af).strip() == want
|
||||
|
||||
return [attr for attr in allAttributes if _matches(attr)]
|
||||
return allAttributes
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting attributes: {str(e)}")
|
||||
|
|
@ -396,7 +412,7 @@ class SharepointProcessor:
|
|||
textContent = fileContent.decode('utf-8')
|
||||
except UnicodeDecodeError:
|
||||
textContent = fileContent.decode('latin-1')
|
||||
result = self.services.neutralization.processText(textContent)
|
||||
result = await self.services.neutralization.processTextAsync(textContent)
|
||||
content_to_upload = (result.get('neutralized_text') or '').encode('utf-8')
|
||||
|
||||
neutralizedFilename = f"neutralized_{fileInfo['name']}"
|
||||
|
|
|
|||
|
|
@ -8,12 +8,33 @@ import logging
|
|||
from modules.auth import limiter, getRequestContext, RequestContext
|
||||
|
||||
# Import interfaces
|
||||
from .datamodelFeatureNeutralizer import DataNeutraliserConfig, DataNeutralizerAttributes
|
||||
from .datamodelFeatureNeutralizer import DataNeutraliserConfig, DataNeutralizerAttributes, DataNeutralizationSnapshot
|
||||
from .neutralizePlayground import NeutralizationPlayground
|
||||
|
||||
# Configure logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _assertFeatureInstancePathMatchesContext(featureInstanceIdFromPath: str, context: RequestContext) -> None:
|
||||
"""Reject path/instance mismatch when request context already carries an instance id."""
|
||||
ctxId = str(context.featureInstanceId).strip() if getattr(context, "featureInstanceId", None) else ""
|
||||
pathId = (featureInstanceIdFromPath or "").strip()
|
||||
if ctxId and pathId and pathId != ctxId:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="Feature instance id in URL does not match request context (X-Instance-Id)",
|
||||
)
|
||||
|
||||
|
||||
def _fetchNeutralizationAttributes(context: RequestContext, fileId: Optional[str]) -> List[DataNeutralizerAttributes]:
|
||||
service = NeutralizationPlayground(
|
||||
context.user,
|
||||
str(context.mandateId) if context.mandateId else "",
|
||||
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
|
||||
)
|
||||
return service.getAttributes(fileId)
|
||||
|
||||
|
||||
# Create router for neutralization endpoints
|
||||
router = APIRouter(
|
||||
prefix="/api/neutralization",
|
||||
|
|
@ -208,15 +229,9 @@ def get_neutralization_attributes(
|
|||
) -> List[DataNeutralizerAttributes]:
|
||||
"""Get neutralization attributes, optionally filtered by file ID"""
|
||||
try:
|
||||
service = NeutralizationPlayground(
|
||||
context.user,
|
||||
str(context.mandateId) if context.mandateId else "",
|
||||
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None
|
||||
)
|
||||
attributes = service.getAttributes(fileId)
|
||||
|
||||
return attributes
|
||||
|
||||
return _fetchNeutralizationAttributes(context, fileId)
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting neutralization attributes: {str(e)}")
|
||||
raise HTTPException(
|
||||
|
|
@ -224,6 +239,72 @@ def get_neutralization_attributes(
|
|||
detail=f"Error getting neutralization attributes: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.get("/{feature_instance_id}/attributes", response_model=List[DataNeutralizerAttributes])
|
||||
@limiter.limit("30/minute")
|
||||
def get_neutralization_attributes_scoped(
|
||||
request: Request,
|
||||
feature_instance_id: str = Path(..., description="Workspace / feature instance id (must match X-Instance-Id when set)"),
|
||||
fileId: Optional[str] = Query(None, description="Filter by file ID"),
|
||||
context: RequestContext = Depends(getRequestContext),
|
||||
) -> List[DataNeutralizerAttributes]:
|
||||
"""Same as GET /attributes; path includes instance id for workspace UI compatibility."""
|
||||
_assertFeatureInstancePathMatchesContext(feature_instance_id, context)
|
||||
try:
|
||||
return _fetchNeutralizationAttributes(context, fileId)
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting neutralization attributes: {str(e)}")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail=f"Error getting neutralization attributes: {str(e)}"
|
||||
)
|
||||
|
||||
@router.get("/snapshots", response_model=List[DataNeutralizationSnapshot])
|
||||
@limiter.limit("30/minute")
|
||||
def get_neutralization_snapshots(
|
||||
request: Request,
|
||||
context: RequestContext = Depends(getRequestContext),
|
||||
) -> List[DataNeutralizationSnapshot]:
|
||||
"""Return neutralized-text snapshots (full text with placeholders) for the current feature instance."""
|
||||
try:
|
||||
service = NeutralizationPlayground(
|
||||
context.user,
|
||||
str(context.mandateId) if context.mandateId else "",
|
||||
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
|
||||
)
|
||||
return service.getSnapshots()
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting neutralization snapshots: {e}")
|
||||
raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/{feature_instance_id}/snapshots", response_model=List[DataNeutralizationSnapshot])
|
||||
@limiter.limit("30/minute")
|
||||
def get_neutralization_snapshots_scoped(
|
||||
request: Request,
|
||||
feature_instance_id: str = Path(..., description="Workspace instance id (must match X-Instance-Id when set)"),
|
||||
context: RequestContext = Depends(getRequestContext),
|
||||
) -> List[DataNeutralizationSnapshot]:
|
||||
"""Same as GET /snapshots; path includes instance id for workspace UI (explicit scope)."""
|
||||
_assertFeatureInstancePathMatchesContext(feature_instance_id, context)
|
||||
try:
|
||||
service = NeutralizationPlayground(
|
||||
context.user,
|
||||
str(context.mandateId) if context.mandateId else "",
|
||||
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
|
||||
)
|
||||
return service.getSnapshots()
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting neutralization snapshots (scoped): {e}")
|
||||
raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e))
|
||||
|
||||
|
||||
@router.post("/process-sharepoint", response_model=Dict[str, Any])
|
||||
@limiter.limit("5/minute")
|
||||
async def process_sharepoint_files(
|
||||
|
|
@ -317,6 +398,21 @@ def get_neutralization_stats(
|
|||
detail=f"Error getting neutralization stats: {str(e)}"
|
||||
)
|
||||
|
||||
def _deleteSingleNeutralizationAttribute(context: RequestContext, attributeId: str) -> Dict[str, str]:
|
||||
service = NeutralizationPlayground(
|
||||
context.user,
|
||||
str(context.mandateId) if context.mandateId else "",
|
||||
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
|
||||
)
|
||||
success = service.deleteAttribute(attributeId)
|
||||
if success:
|
||||
return {"message": f"Attribute {attributeId} deleted"}
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail=f"Attribute {attributeId} not found",
|
||||
)
|
||||
|
||||
|
||||
@router.delete("/attributes/single/{attributeId}", response_model=Dict[str, str])
|
||||
@limiter.limit("30/minute")
|
||||
def deleteAttribute(
|
||||
|
|
@ -326,20 +422,7 @@ def deleteAttribute(
|
|||
) -> Dict[str, str]:
|
||||
"""Delete a single neutralization attribute by ID."""
|
||||
try:
|
||||
service = NeutralizationPlayground(
|
||||
context.user,
|
||||
str(context.mandateId) if context.mandateId else "",
|
||||
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None
|
||||
)
|
||||
success = service.deleteAttribute(attributeId)
|
||||
|
||||
if success:
|
||||
return {"message": f"Attribute {attributeId} deleted"}
|
||||
else:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail=f"Attribute {attributeId} not found"
|
||||
)
|
||||
return _deleteSingleNeutralizationAttribute(context, attributeId)
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
|
|
@ -347,6 +430,40 @@ def deleteAttribute(
|
|||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.delete("/{feature_instance_id}/attributes/single/{attributeId}", response_model=Dict[str, str])
|
||||
@limiter.limit("30/minute")
|
||||
def deleteAttributeScoped(
|
||||
request: Request,
|
||||
feature_instance_id: str = Path(..., description="Workspace / feature instance id"),
|
||||
attributeId: str = Path(..., description="Attribute ID to delete"),
|
||||
context: RequestContext = Depends(getRequestContext),
|
||||
) -> Dict[str, str]:
|
||||
"""Same as DELETE /attributes/single/{attributeId}; path includes instance id for workspace UI."""
|
||||
_assertFeatureInstancePathMatchesContext(feature_instance_id, context)
|
||||
try:
|
||||
return _deleteSingleNeutralizationAttribute(context, attributeId)
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error deleting attribute: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
def _retriggerNeutralizationBody(context: RequestContext, fileId: str) -> Dict[str, str]:
|
||||
if not fileId:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail="fileId is required",
|
||||
)
|
||||
service = NeutralizationPlayground(
|
||||
context.user,
|
||||
str(context.mandateId) if context.mandateId else "",
|
||||
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
|
||||
)
|
||||
service.cleanupFileAttributes(fileId)
|
||||
return {"message": f"Neutralization re-triggered for file {fileId}", "fileId": fileId}
|
||||
|
||||
|
||||
@router.post("/retrigger", response_model=Dict[str, str])
|
||||
@limiter.limit("10/minute")
|
||||
def retriggerNeutralization(
|
||||
|
|
@ -356,20 +473,26 @@ def retriggerNeutralization(
|
|||
) -> Dict[str, str]:
|
||||
"""Re-trigger neutralization for a specific file."""
|
||||
try:
|
||||
fileId = retriggerData.get("fileId", "")
|
||||
if not fileId:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail="fileId is required"
|
||||
)
|
||||
return _retriggerNeutralizationBody(context, retriggerData.get("fileId", ""))
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error re-triggering neutralization: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
service = NeutralizationPlayground(
|
||||
context.user,
|
||||
str(context.mandateId) if context.mandateId else "",
|
||||
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None
|
||||
)
|
||||
service.cleanupFileAttributes(fileId)
|
||||
return {"message": f"Neutralization re-triggered for file {fileId}", "fileId": fileId}
|
||||
|
||||
@router.post("/{feature_instance_id}/retrigger", response_model=Dict[str, str])
|
||||
@limiter.limit("10/minute")
|
||||
def retriggerNeutralizationScoped(
|
||||
request: Request,
|
||||
feature_instance_id: str = Path(..., description="Workspace / feature instance id"),
|
||||
retriggerData: Dict[str, str] = Body(...),
|
||||
context: RequestContext = Depends(getRequestContext),
|
||||
) -> Dict[str, str]:
|
||||
"""Same as POST /retrigger; path includes instance id for workspace UI compatibility."""
|
||||
_assertFeatureInstancePathMatchesContext(feature_instance_id, context)
|
||||
try:
|
||||
return _retriggerNeutralizationBody(context, retriggerData.get("fileId", ""))
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
|
|
|
|||
|
|
@ -60,6 +60,12 @@ class NeutralizationService:
|
|||
mandateId=serviceCenter.mandateId or dbApp.mandateId,
|
||||
featureInstanceId=getattr(serviceCenter, 'featureInstanceId', None) or getattr(dbApp, 'featureInstanceId', None)
|
||||
)
|
||||
elif serviceCenter and getattr(serviceCenter, "user", None):
|
||||
self.interfaceNeutralizer = getNeutralizerInterface(
|
||||
currentUser=serviceCenter.user,
|
||||
mandateId=getattr(serviceCenter, 'mandateId', None) or getattr(serviceCenter, 'mandate_id', None),
|
||||
featureInstanceId=getattr(serviceCenter, 'featureInstanceId', None) or getattr(serviceCenter, 'feature_instance_id', None),
|
||||
)
|
||||
|
||||
namesList = NamesToParse if isinstance(NamesToParse, list) else []
|
||||
self.NamesToParse = namesList
|
||||
|
|
@ -82,11 +88,213 @@ class NeutralizationService:
|
|||
|
||||
# Public API: process text or file
|
||||
|
||||
def processText(self, text: str) -> Dict[str, Any]:
|
||||
"""Neutralize a raw text string and return a standard result dict."""
|
||||
result = self._neutralizeText(text, 'text')
|
||||
self._persistAttributes(result.get('mapping', {}), None)
|
||||
return result
|
||||
_NEUT_INSTRUCTION = (
|
||||
"Analyze the following text and identify ALL sensitive content that must be neutralized:\n"
|
||||
"1. Personal data (PII): names of persons, email addresses, phone numbers, "
|
||||
"physical addresses, ID numbers, dates of birth, financial data (IBAN, account numbers), "
|
||||
"social security numbers\n"
|
||||
"2. Protected business logic: proprietary algorithms, trade secrets, confidential "
|
||||
"processes, internal procedures, code snippets that reveal implementation details\n"
|
||||
"3. Named entities: company names, product names, project names, brand names\n\n"
|
||||
"Return ONLY a JSON array (no markdown, no explanation):\n"
|
||||
'[{"text":"exact substring","type":"name|email|phone|address|id|financial|logic|company|product|location|other"}]\n\n'
|
||||
"Rules:\n"
|
||||
"- Every entry's 'text' must be an exact, verbatim substring of the input.\n"
|
||||
"- Do NOT include generic words, common language constructs or non-sensitive terms.\n"
|
||||
"- If nothing is sensitive, return [].\n\n"
|
||||
)
|
||||
_BYTES_PER_TOKEN = 3
|
||||
_SELECTOR_MAX_RATIO = 0.8
|
||||
_CHUNK_SAFETY_MARGIN = 0.9
|
||||
|
||||
def _resolveNeutModel(self):
|
||||
"""Query the model registry for the best NEUTRALIZATION_TEXT model.
|
||||
Returns the model object (with contextLength etc.) or None."""
|
||||
try:
|
||||
from modules.aicore.aicoreModelRegistry import modelRegistry
|
||||
from modules.aicore.aicoreModelSelector import modelSelector as _modSel
|
||||
from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum
|
||||
|
||||
_models = modelRegistry.getAvailableModels()
|
||||
_opts = AiCallOptions(operationType=OperationTypeEnum.NEUTRALIZATION_TEXT)
|
||||
_failover = _modSel.getFailoverModelList("x", "", _opts, _models)
|
||||
return _failover[0] if _failover else None
|
||||
except Exception as _e:
|
||||
logger.warning(f"_resolveNeutModel failed: {_e}")
|
||||
return None
|
||||
|
||||
def _calcMaxChunkChars(self, model) -> int:
|
||||
"""Derive the maximum text-chunk size (in characters) from the selected
|
||||
model's contextLength, mirroring the rules in aicoreModelSelector:
|
||||
promptTokens = promptBytes / 3 must be <= contextLength * 0.8
|
||||
Subtract the instruction overhead and apply a safety margin."""
|
||||
if not model or getattr(model, 'contextLength', 0) <= 0:
|
||||
return 5000
|
||||
_instructionBytes = len(self._NEUT_INSTRUCTION.encode('utf-8')) + 30
|
||||
_maxPromptBytes = int(model.contextLength * self._SELECTOR_MAX_RATIO * self._BYTES_PER_TOKEN)
|
||||
_maxChunkChars = int((_maxPromptBytes - _instructionBytes) * self._CHUNK_SAFETY_MARGIN)
|
||||
return max(_maxChunkChars, 500)
|
||||
|
||||
@staticmethod
|
||||
def _splitTextIntoChunks(text: str, maxChars: int) -> List[str]:
|
||||
"""Split *text* into chunks of at most *maxChars*, preferring paragraph
|
||||
then sentence boundaries so that the LLM sees coherent blocks."""
|
||||
if len(text) <= maxChars:
|
||||
return [text]
|
||||
|
||||
chunks: List[str] = []
|
||||
remaining = text
|
||||
while remaining:
|
||||
if len(remaining) <= maxChars:
|
||||
chunks.append(remaining)
|
||||
break
|
||||
_cut = maxChars
|
||||
_para = remaining.rfind("\n\n", 0, _cut)
|
||||
if _para > maxChars // 3:
|
||||
_cut = _para + 2
|
||||
else:
|
||||
_nl = remaining.rfind("\n", 0, _cut)
|
||||
if _nl > maxChars // 3:
|
||||
_cut = _nl + 1
|
||||
else:
|
||||
_dot = remaining.rfind(". ", 0, _cut)
|
||||
if _dot > maxChars // 3:
|
||||
_cut = _dot + 2
|
||||
else:
|
||||
_sp = remaining.rfind(" ", 0, _cut)
|
||||
if _sp > maxChars // 3:
|
||||
_cut = _sp + 1
|
||||
chunks.append(remaining[:_cut])
|
||||
remaining = remaining[_cut:]
|
||||
return chunks
|
||||
|
||||
async def _analyseChunk(self, aiService, chunkText: str) -> List[dict]:
|
||||
"""Send one chunk to the NEUTRALIZATION_TEXT model, return raw findings list."""
|
||||
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum
|
||||
|
||||
_prompt = self._NEUT_INSTRUCTION + "Text to analyze:\n---\n" + chunkText + "\n---"
|
||||
_request = AiCallRequest(
|
||||
prompt=_prompt,
|
||||
options=AiCallOptions(operationType=OperationTypeEnum.NEUTRALIZATION_TEXT),
|
||||
)
|
||||
_response = await aiService.callAi(_request)
|
||||
if not _response or not getattr(_response, 'content', None):
|
||||
raise RuntimeError(
|
||||
"Neutralization AI call returned no response "
|
||||
"(no model available for NEUTRALIZATION_TEXT?)"
|
||||
)
|
||||
if getattr(_response, 'errorCount', 0) > 0 or getattr(_response, 'modelName', '') == 'error':
|
||||
raise RuntimeError(
|
||||
f"Neutralization AI call failed: {_response.content}"
|
||||
)
|
||||
_content = _response.content.strip()
|
||||
if _content.startswith("```"):
|
||||
_content = _content.split("\n", 1)[-1].rsplit("```", 1)[0].strip()
|
||||
try:
|
||||
return json.loads(_content)
|
||||
except json.JSONDecodeError:
|
||||
_bracket = _content.find("[")
|
||||
if _bracket >= 0:
|
||||
try:
|
||||
return json.loads(_content[_bracket:])
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
return []
|
||||
|
||||
async def processTextAsync(self, text: str, fileId: Optional[str] = None) -> Dict[str, Any]:
|
||||
"""AI-powered text neutralization with automatic chunking.
|
||||
|
||||
If *text* exceeds the safe token budget for the neutralization model
|
||||
it is split into smaller chunks, each analysed separately. Findings
|
||||
are merged and de-duplicated before placeholder replacement.
|
||||
|
||||
Regex patterns run as a supplementary pass to catch anything the
|
||||
model missed.
|
||||
"""
|
||||
import uuid as _uuid
|
||||
|
||||
aiService = None
|
||||
if self._getService:
|
||||
try:
|
||||
aiService = self._getService("ai")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
aiMapping: Dict[str, str] = {}
|
||||
|
||||
if not aiService or not hasattr(aiService, 'callAi'):
|
||||
raise RuntimeError("Neutralization requires an AI service but none is available")
|
||||
|
||||
if text.strip():
|
||||
_neutModel = self._resolveNeutModel()
|
||||
_maxChunkChars = self._calcMaxChunkChars(_neutModel)
|
||||
logger.info(
|
||||
f"processTextAsync: model={getattr(_neutModel, 'name', '?')}, "
|
||||
f"contextLength={getattr(_neutModel, 'contextLength', '?')} tokens, "
|
||||
f"maxChunkChars={_maxChunkChars}"
|
||||
)
|
||||
|
||||
_chunks = self._splitTextIntoChunks(text, _maxChunkChars)
|
||||
if len(_chunks) > 1:
|
||||
logger.info(
|
||||
f"processTextAsync: text ({len(text)} chars) "
|
||||
f"split into {len(_chunks)} chunk(s) of max {_maxChunkChars} chars"
|
||||
)
|
||||
|
||||
for _chunkIdx, _chunkText in enumerate(_chunks):
|
||||
_findings = await self._analyseChunk(aiService, _chunkText)
|
||||
if not isinstance(_findings, list):
|
||||
continue
|
||||
for _f in _findings:
|
||||
if not isinstance(_f, dict):
|
||||
continue
|
||||
_origText = _f.get("text", "")
|
||||
_patType = _f.get("type", "other").lower()
|
||||
if not _origText or _origText not in text:
|
||||
continue
|
||||
if _origText in aiMapping:
|
||||
continue
|
||||
_uid = str(_uuid.uuid4())
|
||||
_placeholder = f"[{_patType}.{_uid}]"
|
||||
aiMapping[_origText] = _placeholder
|
||||
|
||||
logger.info(f"AI neutralization found {len(aiMapping)} item(s)"
|
||||
+ (f" across {len(_chunks)} chunk(s)" if len(_chunks) > 1 else ""))
|
||||
|
||||
neutralizedText = text
|
||||
for _orig, _ph in sorted(aiMapping.items(), key=lambda x: -len(x[0])):
|
||||
neutralizedText = neutralizedText.replace(_orig, _ph)
|
||||
|
||||
regexMapping: Dict[str, str] = {}
|
||||
finalText = neutralizedText
|
||||
|
||||
allMapping = {**aiMapping, **regexMapping}
|
||||
if allMapping:
|
||||
_loop = asyncio.get_event_loop()
|
||||
await _loop.run_in_executor(
|
||||
None, self._persistAttributes, allMapping, fileId
|
||||
)
|
||||
logger.debug(f"processTextAsync: {len(allMapping)} attribute(s) persisted")
|
||||
|
||||
return {
|
||||
'neutralized_text': finalText,
|
||||
'mapping': allMapping,
|
||||
'attributes': [
|
||||
NeutralizationAttribute(original=k, placeholder=v)
|
||||
for k, v in allMapping.items()
|
||||
],
|
||||
'processed_info': {'type': 'text', 'ai_findings': len(aiMapping), 'regex_findings': len(regexMapping)},
|
||||
}
|
||||
|
||||
def processText(self, text: str, fileId: Optional[str] = None) -> Dict[str, Any]:
|
||||
"""Sync wrapper around processTextAsync. Propagates errors."""
|
||||
try:
|
||||
return asyncio.run(self.processTextAsync(text, fileId))
|
||||
except RuntimeError as _re:
|
||||
if "cannot be called from a running event loop" in str(_re):
|
||||
loop = asyncio.get_event_loop()
|
||||
return loop.run_until_complete(self.processTextAsync(text, fileId))
|
||||
raise
|
||||
|
||||
def processFile(self, fileId: str) -> Dict[str, Any]:
|
||||
"""Neutralize a file referenced by its fileId using component interface.
|
||||
|
|
@ -153,8 +361,7 @@ class NeutralizationService:
|
|||
raise ValueError("Unable to decode file content as text.")
|
||||
textContent = decoded
|
||||
|
||||
result = self._neutralizeText(textContent, textType)
|
||||
self._persistAttributes(result.get('mapping', {}), fileId)
|
||||
result = self.processText(textContent, fileId)
|
||||
if fileName:
|
||||
result['neutralized_file_name'] = f"neutralized_{fileName}"
|
||||
result['file_id'] = fileId
|
||||
|
|
@ -319,6 +526,22 @@ class NeutralizationService:
|
|||
return False
|
||||
return self.interfaceNeutralizer.deleteNeutralizationAttributes(fileId)
|
||||
|
||||
def getSnapshots(self):
|
||||
if not self.interfaceNeutralizer:
|
||||
return []
|
||||
return self.interfaceNeutralizer.getSnapshots()
|
||||
|
||||
def clearSnapshots(self) -> int:
|
||||
if not self.interfaceNeutralizer:
|
||||
return 0
|
||||
return self.interfaceNeutralizer.clearSnapshots()
|
||||
|
||||
def saveSnapshot(self, sourceLabel: str, neutralizedText: str, placeholderCount: int = 0):
|
||||
if not self.interfaceNeutralizer:
|
||||
logger.warning("saveSnapshot: interfaceNeutralizer is None — snapshot not stored")
|
||||
return None
|
||||
return self.interfaceNeutralizer.createSnapshot(sourceLabel, neutralizedText, placeholderCount)
|
||||
|
||||
def _persistAttributes(self, mapping: Dict[str, str], fileId: Optional[str]) -> None:
|
||||
"""Persist mapping to DB for resolve to work. mapping: originalText -> placeholder e.g. '[email.uuid]'"""
|
||||
if not self.interfaceNeutralizer or not mapping:
|
||||
|
|
@ -393,7 +616,7 @@ class NeutralizationService:
|
|||
except Exception as _imgErr:
|
||||
logger.warning(f"Image check failed in binary file '{fileName}': {_imgErr}, removing (fail-safe)")
|
||||
continue
|
||||
nr = self._neutralizeText(str(data), 'text' if type_group != 'table' else 'csv')
|
||||
nr = await self.processTextAsync(str(data), fileId)
|
||||
proc = nr.get('processed_info', {}) or {}
|
||||
if isinstance(proc, dict) and proc.get('type') == 'error':
|
||||
neutralization_error = proc.get('error', 'Neutralization failed')
|
||||
|
|
@ -402,7 +625,6 @@ class NeutralizationService:
|
|||
all_mapping.update(mapping)
|
||||
new_part = {**p, 'data': neu_text}
|
||||
neutralized_parts.append(new_part)
|
||||
self._persistAttributes(all_mapping, fileId)
|
||||
|
||||
# 3. PDF: Use in-place only; no fallback to render
|
||||
if mimeType == "application/pdf":
|
||||
|
|
@ -546,10 +768,31 @@ class NeutralizationService:
|
|||
|
||||
# Helper functions
|
||||
|
||||
def _neutralizeTextLight(self, text: str) -> Dict[str, Any]:
|
||||
"""Regex-only supplementary pass using already-initialised processors.
|
||||
|
||||
Unlike ``_neutralizeText`` this does **no** DB I/O
|
||||
(``_reloadNamesFromConfig`` is skipped) so it is safe to call from
|
||||
an async context without blocking the event-loop or risking a
|
||||
DB-connection-pool deadlock during parallel document processing.
|
||||
"""
|
||||
try:
|
||||
data, mapping, replaced_fields, processed_info = self.textProcessor.processTextContent(text)
|
||||
neutralized_text = str(data)
|
||||
attributes = [NeutralizationAttribute(original=k, placeholder=v) for k, v in mapping.items()]
|
||||
return NeutralizationResult(
|
||||
neutralized_text=neutralized_text,
|
||||
mapping=mapping,
|
||||
attributes=attributes,
|
||||
processed_info=processed_info,
|
||||
).model_dump()
|
||||
except Exception as e:
|
||||
logger.warning(f"_neutralizeTextLight error: {e}")
|
||||
return {'neutralized_text': text, 'mapping': {}, 'attributes': [], 'processed_info': {'type': 'error', 'error': str(e)}}
|
||||
|
||||
def _neutralizeText(self, text: str, textType: str = None) -> Dict[str, Any]:
|
||||
"""Process text and return unified dict for API consumption."""
|
||||
try:
|
||||
# Reload names from config before processing to ensure we have the latest names
|
||||
self._reloadNamesFromConfig()
|
||||
|
||||
# Auto-detect content type if not provided
|
||||
|
|
|
|||
|
|
@ -689,6 +689,9 @@ async def _runWorkspaceAgent(
|
|||
|
||||
if allowedProviders:
|
||||
aiService.services.allowedProviders = allowedProviders
|
||||
logger.info(f"Workspace agent: allowedProviders={allowedProviders}")
|
||||
else:
|
||||
logger.debug("Workspace agent: no allowedProviders in request")
|
||||
if requireNeutralization is not None:
|
||||
ctx.requireNeutralization = requireNeutralization
|
||||
|
||||
|
|
|
|||
|
|
@ -690,8 +690,8 @@ def _registerCoreTools(registry: ToolRegistry, services):
|
|||
if _fileNeedNeutralize:
|
||||
try:
|
||||
_nSvc = services.getService("neutralization") if hasattr(services, "getService") else None
|
||||
if _nSvc and hasattr(_nSvc, 'processText'):
|
||||
_nResult = _nSvc.processText(text)
|
||||
if _nSvc and hasattr(_nSvc, 'processTextAsync'):
|
||||
_nResult = await _nSvc.processTextAsync(text, fileId)
|
||||
if _nResult and _nResult.get("neutralized_text"):
|
||||
text = _nResult["neutralized_text"]
|
||||
logger.debug(f"readFile: neutralized text for file {fileId}")
|
||||
|
|
@ -3054,7 +3054,7 @@ def _registerCoreTools(registry: ToolRegistry, services):
|
|||
if not neutralizationService.interfaceDbComponent:
|
||||
neutralizationService.interfaceDbComponent = services.chat.interfaceDbComponent
|
||||
if text:
|
||||
result = neutralizationService.processText(text)
|
||||
result = await neutralizationService.processTextAsync(text, fileId or None)
|
||||
else:
|
||||
result = neutralizationService.processFile(fileId)
|
||||
if result:
|
||||
|
|
|
|||
|
|
@ -181,13 +181,11 @@ class AiService:
|
|||
_wasNeutralized = False
|
||||
_excludedDocs: List[str] = []
|
||||
if self._shouldNeutralize(request):
|
||||
request, _wasNeutralized, _excludedDocs = self._neutralizeRequest(request)
|
||||
request, _wasNeutralized, _excludedDocs = await self._neutralizeRequest(request)
|
||||
if _excludedDocs:
|
||||
logger.warning(f"Neutralization partial failures (continuing): {_excludedDocs}")
|
||||
|
||||
# Set billing callback on aiObjects BEFORE the AI call
|
||||
# This callback is invoked by _callWithModel() after EVERY individual model call
|
||||
# For parallel content parts (e.g., 200 MB doc), each model call creates its own transaction
|
||||
logger.debug("callAi: neutralization phase done, starting main AI call")
|
||||
self.aiObjects.billingCallback = self._createBillingCallback()
|
||||
|
||||
try:
|
||||
|
|
@ -229,10 +227,11 @@ class AiService:
|
|||
_wasNeutralized = False
|
||||
_excludedDocs: List[str] = []
|
||||
if self._shouldNeutralize(request):
|
||||
request, _wasNeutralized, _excludedDocs = self._neutralizeRequest(request)
|
||||
request, _wasNeutralized, _excludedDocs = await self._neutralizeRequest(request)
|
||||
if _excludedDocs:
|
||||
logger.warning(f"Neutralization partial failures in stream (continuing): {_excludedDocs}")
|
||||
|
||||
logger.debug("callAiStream: neutralization phase done, starting main AI stream")
|
||||
self.aiObjects.billingCallback = self._createBillingCallback()
|
||||
try:
|
||||
async for chunk in self.aiObjects.callWithTextContextStream(request):
|
||||
|
|
@ -557,6 +556,25 @@ detectedIntent-Werte:
|
|||
# NEUTRALIZATION: Centralized prompt neutralization / response rehydration
|
||||
# =========================================================================
|
||||
|
||||
async def _hasNeutralizationModel(self) -> bool:
|
||||
"""Fast check: is at least one model available for NEUTRALIZATION_TEXT
|
||||
given the current effective provider list? No AI call is made."""
|
||||
try:
|
||||
from modules.aicore.aicoreModelRegistry import modelRegistry
|
||||
from modules.aicore.aicoreModelSelector import modelSelector as _modSel
|
||||
from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum
|
||||
|
||||
_models = modelRegistry.getAvailableModels()
|
||||
_providers = self._calculateEffectiveProviders()
|
||||
if _providers:
|
||||
_models = [m for m in _models if m.connectorType in _providers]
|
||||
_opts = AiCallOptions(operationType=OperationTypeEnum.NEUTRALIZATION_TEXT)
|
||||
_failover = _modSel.getFailoverModelList("x", "", _opts, _models)
|
||||
return bool(_failover)
|
||||
except Exception as _e:
|
||||
logger.warning(f"_hasNeutralizationModel check failed: {_e}")
|
||||
return True
|
||||
|
||||
def _shouldNeutralize(self, request: AiCallRequest) -> bool:
|
||||
"""Check if this AI request should have neutralization applied.
|
||||
|
||||
|
|
@ -566,11 +584,17 @@ detectedIntent-Werte:
|
|||
3. Per-request (request.requireNeutralization)
|
||||
|
||||
No source can override another's True with False.
|
||||
Neutralization calls themselves (NEUTRALIZATION_TEXT / NEUTRALIZATION_IMAGE)
|
||||
are never re-neutralized (recursion guard).
|
||||
"""
|
||||
try:
|
||||
if not request.prompt and not request.messages and not request.context:
|
||||
return False
|
||||
|
||||
_opType = request.options.operationType if request.options else None
|
||||
if _opType in (OperationTypeEnum.NEUTRALIZATION_TEXT, OperationTypeEnum.NEUTRALIZATION_IMAGE):
|
||||
return False
|
||||
|
||||
_sources = []
|
||||
|
||||
# Source 1: Feature-Instance config
|
||||
|
|
@ -600,11 +624,15 @@ detectedIntent-Werte:
|
|||
logger.error(f"_shouldNeutralize check failed: {e} — defaulting to False")
|
||||
return False
|
||||
|
||||
def _neutralizeRequest(self, request: AiCallRequest) -> Tuple[AiCallRequest, bool, List[str]]:
|
||||
"""Neutralize the prompt text and messages in an AiCallRequest.
|
||||
async def _neutralizeRequest(self, request: AiCallRequest) -> Tuple[AiCallRequest, bool, List[str]]:
|
||||
"""Neutralize the prompt text and messages in an AiCallRequest (async).
|
||||
|
||||
Returns (modifiedRequest, wasNeutralized, excludedDocs).
|
||||
|
||||
Uses ``processTextAsync`` which calls AI with NEUTRALIZATION_TEXT
|
||||
to identify PII, protected logic and names — then applies regex as
|
||||
supplementary pass.
|
||||
|
||||
FAILSAFE behaviour when ``requireNeutralization is True`` (explicit):
|
||||
- Service unavailable → raises (caller must not send raw data to AI).
|
||||
- Prompt neutralization fails → raises.
|
||||
|
|
@ -619,7 +647,7 @@ detectedIntent-Werte:
|
|||
excludedDocs: List[str] = []
|
||||
|
||||
neutralSvc = self._get_service("neutralization")
|
||||
if not neutralSvc or not hasattr(neutralSvc, 'processText'):
|
||||
if not neutralSvc or not hasattr(neutralSvc, 'processTextAsync'):
|
||||
if _hardMode:
|
||||
raise RuntimeError("Neutralization explicitly required but service unavailable — AI call BLOCKED")
|
||||
logger.warning("Neutralization required by config but service unavailable — continuing without neutralization")
|
||||
|
|
@ -627,13 +655,25 @@ detectedIntent-Werte:
|
|||
return request, False, excludedDocs
|
||||
|
||||
_wasNeutralized = False
|
||||
_snapshots: list = []
|
||||
|
||||
if _hardMode:
|
||||
_hasNeutModel = await self._hasNeutralizationModel()
|
||||
if not _hasNeutModel:
|
||||
raise RuntimeError(
|
||||
"Neutralisierung ist aktiviert, aber es ist kein AI-Modell für "
|
||||
"NEUTRALIZATION_TEXT verfügbar. Bitte ein Modell für Neutralisierung "
|
||||
"freigeben oder die Neutralisierung deaktivieren."
|
||||
)
|
||||
|
||||
if request.prompt:
|
||||
logger.debug(f"_neutralizeRequest: neutralizing prompt ({len(request.prompt)} chars)")
|
||||
try:
|
||||
result = neutralSvc.processText(request.prompt)
|
||||
result = await neutralSvc.processTextAsync(request.prompt)
|
||||
if result and result.get("neutralized_text"):
|
||||
request.prompt = result["neutralized_text"]
|
||||
_wasNeutralized = True
|
||||
_snapshots.append(("Prompt", result["neutralized_text"], len(result.get("mapping", {}))))
|
||||
logger.debug("Neutralized prompt in AiCallRequest")
|
||||
else:
|
||||
if _hardMode:
|
||||
|
|
@ -649,11 +689,13 @@ detectedIntent-Werte:
|
|||
excludedDocs.append(f"Prompt neutralization error: {e}")
|
||||
|
||||
if request.context:
|
||||
logger.debug(f"_neutralizeRequest: neutralizing context ({len(request.context)} chars)")
|
||||
try:
|
||||
result = neutralSvc.processText(request.context)
|
||||
result = await neutralSvc.processTextAsync(request.context)
|
||||
if result and result.get("neutralized_text"):
|
||||
request.context = result["neutralized_text"]
|
||||
_wasNeutralized = True
|
||||
_snapshots.append(("Kontext", result["neutralized_text"], len(result.get("mapping", {}))))
|
||||
logger.debug("Neutralized context in AiCallRequest")
|
||||
else:
|
||||
if _hardMode:
|
||||
|
|
@ -668,6 +710,9 @@ detectedIntent-Werte:
|
|||
logger.warning(f"Neutralization of context failed: {e} — sending original context")
|
||||
excludedDocs.append(f"Context neutralization error: {e}")
|
||||
|
||||
_msgCount = len(request.messages) if request.messages and isinstance(request.messages, list) else 0
|
||||
if _msgCount:
|
||||
logger.debug(f"_neutralizeRequest: neutralizing {_msgCount} message(s)")
|
||||
if request.messages and isinstance(request.messages, list):
|
||||
cleanMessages = []
|
||||
for idx, msg in enumerate(request.messages):
|
||||
|
|
@ -680,24 +725,30 @@ detectedIntent-Werte:
|
|||
cleanMessages.append(msg)
|
||||
continue
|
||||
try:
|
||||
result = neutralSvc.processText(content)
|
||||
result = await neutralSvc.processTextAsync(content)
|
||||
if result and result.get("neutralized_text"):
|
||||
msg["content"] = result["neutralized_text"]
|
||||
_wasNeutralized = True
|
||||
_role = msg.get("role", "?")
|
||||
_snapshots.append((f"Nachricht {idx+1} ({_role})", result["neutralized_text"], len(result.get("mapping", {}))))
|
||||
cleanMessages.append(msg)
|
||||
else:
|
||||
if _hardMode:
|
||||
logger.warning(f"Message[{idx}] neutralization empty — REMOVING message (hard mode)")
|
||||
excludedDocs.append(f"Message[{idx}] neutralization failed; message REMOVED")
|
||||
else:
|
||||
raise RuntimeError(
|
||||
f"Neutralisierung von Nachricht {idx+1}/{_msgCount} schlug fehl "
|
||||
f"(leere Antwort). Konversation kann nicht sicher gesendet werden."
|
||||
)
|
||||
logger.warning(f"Neutralization of message[{idx}] returned no neutralized_text — keeping original")
|
||||
excludedDocs.append(f"Message[{idx}] neutralization failed; original kept")
|
||||
cleanMessages.append(msg)
|
||||
except RuntimeError:
|
||||
raise
|
||||
except Exception as e:
|
||||
if _hardMode:
|
||||
logger.warning(f"Message[{idx}] neutralization error — REMOVING message (hard mode): {e}")
|
||||
excludedDocs.append(f"Message[{idx}] neutralization error; message REMOVED: {e}")
|
||||
else:
|
||||
raise RuntimeError(
|
||||
f"Neutralisierung von Nachricht {idx+1}/{_msgCount} schlug fehl: {e}. "
|
||||
f"Konversation kann nicht sicher gesendet werden."
|
||||
) from e
|
||||
logger.warning(f"Neutralization of message[{idx}] failed: {e} — keeping original")
|
||||
excludedDocs.append(f"Message[{idx}] neutralization error: {e}")
|
||||
cleanMessages.append(msg)
|
||||
|
|
@ -710,22 +761,28 @@ detectedIntent-Werte:
|
|||
_partType = _part.get("type", "")
|
||||
if _partType == "text" and _part.get("text"):
|
||||
try:
|
||||
_result = neutralSvc.processText(_part["text"])
|
||||
_result = await neutralSvc.processTextAsync(_part["text"])
|
||||
if _result and _result.get("neutralized_text"):
|
||||
_part["text"] = _result["neutralized_text"]
|
||||
_wasNeutralized = True
|
||||
_role = msg.get("role", "?")
|
||||
_snapshots.append((f"Nachricht {idx+1}.{_partIdx+1} ({_role})", _result["neutralized_text"], len(_result.get("mapping", {}))))
|
||||
_cleanParts.append(_part)
|
||||
else:
|
||||
if _hardMode:
|
||||
logger.warning(f"Message[{idx}].content[{_partIdx}] text neutralization empty — REMOVING part")
|
||||
excludedDocs.append(f"Message[{idx}].content[{_partIdx}] text removed")
|
||||
else:
|
||||
raise RuntimeError(
|
||||
f"Neutralisierung von Nachricht {idx+1}, Teil {_partIdx+1} "
|
||||
f"schlug fehl (leere Antwort)."
|
||||
)
|
||||
_cleanParts.append(_part)
|
||||
except RuntimeError:
|
||||
raise
|
||||
except Exception as e:
|
||||
if _hardMode:
|
||||
logger.warning(f"Message[{idx}].content[{_partIdx}] text neutralization error — REMOVING: {e}")
|
||||
excludedDocs.append(f"Message[{idx}].content[{_partIdx}] text error: {e}")
|
||||
else:
|
||||
raise RuntimeError(
|
||||
f"Neutralisierung von Nachricht {idx+1}, Teil {_partIdx+1} "
|
||||
f"schlug fehl: {e}"
|
||||
) from e
|
||||
_cleanParts.append(_part)
|
||||
elif _partType == "image_url":
|
||||
if _hardMode:
|
||||
|
|
@ -738,12 +795,12 @@ detectedIntent-Werte:
|
|||
if _cleanParts:
|
||||
msg["content"] = _cleanParts
|
||||
cleanMessages.append(msg)
|
||||
elif _hardMode:
|
||||
logger.warning(f"Message[{idx}] all parts removed — REMOVING message")
|
||||
excludedDocs.append(f"Message[{idx}] fully removed after neutralization")
|
||||
else:
|
||||
cleanMessages.append(msg)
|
||||
else:
|
||||
cleanMessages.append(msg)
|
||||
request.messages = cleanMessages
|
||||
logger.debug(f"_neutralizeRequest: messages done, {len(cleanMessages)} kept of {_msgCount}")
|
||||
|
||||
if hasattr(request, 'contentParts') and request.contentParts:
|
||||
_cleanParts = []
|
||||
|
|
@ -752,10 +809,11 @@ detectedIntent-Werte:
|
|||
_data = getattr(_cp, 'data', '') or ''
|
||||
if _tg in ('text', 'table') and _data:
|
||||
try:
|
||||
_result = neutralSvc.processText(str(_data))
|
||||
_result = await neutralSvc.processTextAsync(str(_data))
|
||||
if _result and _result.get("neutralized_text"):
|
||||
_cp.data = _result["neutralized_text"]
|
||||
_wasNeutralized = True
|
||||
_snapshots.append((f"Inhalt {_cpIdx+1} ({_tg})", _result["neutralized_text"], len(_result.get("mapping", {}))))
|
||||
_cleanParts.append(_cp)
|
||||
else:
|
||||
if _hardMode:
|
||||
|
|
@ -778,7 +836,18 @@ detectedIntent-Werte:
|
|||
else:
|
||||
_cleanParts.append(_cp)
|
||||
request.contentParts = _cleanParts
|
||||
logger.debug(f"_neutralizeRequest: contentParts done, {len(_cleanParts)} kept")
|
||||
|
||||
if _snapshots and _wasNeutralized:
|
||||
try:
|
||||
neutralSvc.clearSnapshots()
|
||||
for _label, _text, _phCount in _snapshots:
|
||||
neutralSvc.saveSnapshot(_label, _text, _phCount)
|
||||
logger.debug(f"_neutralizeRequest: saved {len(_snapshots)} snapshot(s)")
|
||||
except Exception as _snapErr:
|
||||
logger.warning(f"_neutralizeRequest: could not save snapshots: {_snapErr}")
|
||||
|
||||
logger.info(f"_neutralizeRequest complete: neutralized={_wasNeutralized}, excluded={len(excludedDocs)}")
|
||||
return request, _wasNeutralized, excludedDocs
|
||||
|
||||
def _rehydrateResponse(self, responseText: str) -> str:
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ import logging
|
|||
import importlib
|
||||
from typing import Dict, Type, List, Optional, Tuple
|
||||
from .documentRendererBaseTemplate import BaseRenderer
|
||||
from .codeRendererBaseTemplate import BaseCodeRenderer
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -54,7 +55,7 @@ class RendererRegistry:
|
|||
attr = getattr(module, attrName)
|
||||
if (isinstance(attr, type) and
|
||||
issubclass(attr, BaseRenderer) and
|
||||
attr != BaseRenderer and
|
||||
attr not in (BaseRenderer, BaseCodeRenderer) and
|
||||
hasattr(attr, 'getSupportedFormats')):
|
||||
self._registerRendererClass(attr)
|
||||
|
||||
|
|
@ -72,6 +73,8 @@ class RendererRegistry:
|
|||
"""Register a renderer class keyed by (format, outputStyle)."""
|
||||
try:
|
||||
supportedFormats = rendererClass.getSupportedFormats()
|
||||
if not supportedFormats:
|
||||
return
|
||||
priority = rendererClass.getPriority() if hasattr(rendererClass, 'getPriority') else 0
|
||||
|
||||
for formatName in supportedFormats:
|
||||
|
|
|
|||
|
|
@ -158,7 +158,7 @@ class KnowledgeService:
|
|||
if not _textContent:
|
||||
continue
|
||||
try:
|
||||
_neutralResult = _neutralSvc.processText(_textContent)
|
||||
_neutralResult = await _neutralSvc.processTextAsync(_textContent, fileId)
|
||||
if _neutralResult and _neutralResult.get("neutralized_text"):
|
||||
_obj["data"] = _neutralResult["neutralized_text"]
|
||||
_neutralizedObjects.append(_obj)
|
||||
|
|
|
|||
|
|
@ -169,7 +169,7 @@ async def neutralizeData(self, parameters: Dict[str, Any]) -> ActionResult:
|
|||
f"Neutralizing part {len(neutralizedParts) + 1} of document {i+1}"
|
||||
)
|
||||
|
||||
neutralizationResult = self.services.neutralization.processText(part.data)
|
||||
neutralizationResult = await self.services.neutralization.processTextAsync(part.data)
|
||||
|
||||
if neutralizationResult and 'neutralized_text' in neutralizationResult:
|
||||
neutralizedData = neutralizationResult['neutralized_text']
|
||||
|
|
|
|||
Loading…
Reference in a new issue