gateway/modules/features/neutralization/routeFeatureNeutralizer.py
2026-04-10 12:33:27 +02:00

536 lines
21 KiB
Python

# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
from fastapi import APIRouter, HTTPException, Depends, Path, Request, status, Query, Body, File, UploadFile
from typing import List, Dict, Any, Optional
import logging
# Import auth module
from modules.auth import limiter, getRequestContext, RequestContext
# Import interfaces
from .datamodelFeatureNeutralizer import DataNeutraliserConfig, DataNeutralizerAttributes, DataNeutralizationSnapshot
from .neutralizePlayground import NeutralizationPlayground
from modules.shared.i18nRegistry import apiRouteContext
routeApiMsg = apiRouteContext("routeFeatureNeutralizer")
# Configure logger
logger = logging.getLogger(__name__)
def _assertFeatureInstancePathMatchesContext(featureInstanceIdFromPath: str, context: RequestContext) -> None:
"""Reject path/instance mismatch when request context already carries an instance id."""
ctxId = str(context.featureInstanceId).strip() if getattr(context, "featureInstanceId", None) else ""
pathId = (featureInstanceIdFromPath or "").strip()
if ctxId and pathId and pathId != ctxId:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail=routeApiMsg("Feature instance id in URL does not match request context (X-Instance-Id)"),
)
def _fetchNeutralizationAttributes(context: RequestContext, fileId: Optional[str]) -> List[DataNeutralizerAttributes]:
service = NeutralizationPlayground(
context.user,
str(context.mandateId) if context.mandateId else "",
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
)
return service.getAttributes(fileId)
# Create router for neutralization endpoints
router = APIRouter(
prefix="/api/neutralization",
tags=["Data Neutralisation"],
responses={
404: {"description": "Not found"},
400: {"description": "Bad request"},
401: {"description": "Unauthorized"},
403: {"description": "Forbidden"},
500: {"description": "Internal server error"}
}
)
@router.get("/config", response_model=DataNeutraliserConfig)
@limiter.limit("30/minute")
def get_neutralization_config(
request: Request,
context: RequestContext = Depends(getRequestContext)
) -> DataNeutraliserConfig:
"""Get data neutralization configuration"""
try:
mandate_id = str(context.mandateId) if context.mandateId else ""
feature_instance_id = str(context.featureInstanceId) if context.featureInstanceId else ""
service = NeutralizationPlayground(
context.user, mandate_id, featureInstanceId=feature_instance_id or None
)
config = service.getConfig()
if not config:
# Return default config instead of 404 (requires mandateId and featureInstanceId for instance-scoped config)
return DataNeutraliserConfig(
mandateId=mandate_id,
featureInstanceId=feature_instance_id,
userId=context.user.id,
enabled=True,
namesToParse="",
sharepointSourcePath="",
sharepointTargetPath=""
)
return config
except HTTPException:
raise
except Exception as e:
logger.error(f"Error getting neutralization config: {str(e)}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Error getting neutralization config: {str(e)}"
)
@router.post("/config", response_model=DataNeutraliserConfig)
@limiter.limit("10/minute")
def save_neutralization_config(
request: Request,
config_data: Dict[str, Any] = Body(...),
context: RequestContext = Depends(getRequestContext)
) -> DataNeutraliserConfig:
"""Save or update data neutralization configuration"""
try:
service = NeutralizationPlayground(
context.user,
str(context.mandateId) if context.mandateId else "",
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None
)
config = service.saveConfig(config_data)
return config
except Exception as e:
logger.error(f"Error saving neutralization config: {str(e)}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Error saving neutralization config: {str(e)}"
)
@router.post("/neutralize-file")
@limiter.limit("20/minute")
async def neutralize_file(
request: Request,
file: UploadFile = File(..., description="File to neutralize (PDF, DOCX, XLSX, PPTX, TXT, CSV, JSON)"),
context: RequestContext = Depends(getRequestContext)
) -> Dict[str, Any]:
"""Upload and neutralize a file. Returns neutralized text or base64-encoded file for download."""
try:
if not file.filename or not file.filename.strip():
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=routeApiMsg("File name is required")
)
content = await file.read()
if not content:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=routeApiMsg("File is empty")
)
service = NeutralizationPlayground(
context.user,
str(context.mandateId) if context.mandateId else "",
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None
)
result = await service.processUploadedFileAsync(content, file.filename or "file")
logger.info(f"Neutralize file result keys: {list(result.keys())}, has_base64={bool(result.get('neutralized_file_base64'))}, has_text={result.get('neutralized_text') is not None}")
return result
except HTTPException:
raise
except Exception as e:
logger.error(f"Error neutralizing file: {str(e)}", exc_info=True)
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Error neutralizing file: {str(e)}"
)
@router.post("/neutralize-text", response_model=Dict[str, Any])
@limiter.limit("20/minute")
def neutralize_text(
request: Request,
text_data: Dict[str, Any] = Body(...),
context: RequestContext = Depends(getRequestContext)
) -> Dict[str, Any]:
"""Neutralize text content"""
try:
text = text_data.get("text", "")
file_id = text_data.get("fileId")
if not text:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=routeApiMsg("Text content is required")
)
service = NeutralizationPlayground(
context.user,
str(context.mandateId) if context.mandateId else "",
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None
)
result = service.neutralizeText(text, file_id)
return result
except HTTPException:
raise
except Exception as e:
logger.error(f"Error neutralizing text: {str(e)}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Error neutralizing text: {str(e)}"
)
@router.post("/resolve-text", response_model=Dict[str, str])
@limiter.limit("20/minute")
def resolve_text(
request: Request,
text_data: Dict[str, str] = Body(...),
context: RequestContext = Depends(getRequestContext)
) -> Dict[str, str]:
"""Resolve UIDs in neutralized text back to original text"""
try:
text = text_data.get("text", "")
if not text:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=routeApiMsg("Text content is required")
)
service = NeutralizationPlayground(
context.user,
str(context.mandateId) if context.mandateId else "",
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None
)
resolved_text = service.resolveText(text)
return {"resolved_text": resolved_text}
except HTTPException:
raise
except Exception as e:
logger.error(f"Error resolving text: {str(e)}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Error resolving text: {str(e)}"
)
@router.get("/attributes", response_model=List[DataNeutralizerAttributes])
@limiter.limit("30/minute")
def get_neutralization_attributes(
request: Request,
fileId: Optional[str] = Query(None, description="Filter by file ID"),
context: RequestContext = Depends(getRequestContext)
) -> List[DataNeutralizerAttributes]:
"""Get neutralization attributes, optionally filtered by file ID"""
try:
return _fetchNeutralizationAttributes(context, fileId)
except HTTPException:
raise
except Exception as e:
logger.error(f"Error getting neutralization attributes: {str(e)}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Error getting neutralization attributes: {str(e)}"
)
@router.get("/{feature_instance_id}/attributes", response_model=List[DataNeutralizerAttributes])
@limiter.limit("30/minute")
def get_neutralization_attributes_scoped(
request: Request,
feature_instance_id: str = Path(..., description="Workspace / feature instance id (must match X-Instance-Id when set)"),
fileId: Optional[str] = Query(None, description="Filter by file ID"),
context: RequestContext = Depends(getRequestContext),
) -> List[DataNeutralizerAttributes]:
"""Same as GET /attributes; path includes instance id for workspace UI compatibility."""
_assertFeatureInstancePathMatchesContext(feature_instance_id, context)
try:
return _fetchNeutralizationAttributes(context, fileId)
except HTTPException:
raise
except Exception as e:
logger.error(f"Error getting neutralization attributes: {str(e)}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Error getting neutralization attributes: {str(e)}"
)
@router.get("/snapshots", response_model=List[DataNeutralizationSnapshot])
@limiter.limit("30/minute")
def get_neutralization_snapshots(
request: Request,
context: RequestContext = Depends(getRequestContext),
) -> List[DataNeutralizationSnapshot]:
"""Return neutralized-text snapshots (full text with placeholders) for the current feature instance."""
try:
service = NeutralizationPlayground(
context.user,
str(context.mandateId) if context.mandateId else "",
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
)
return service.getSnapshots()
except HTTPException:
raise
except Exception as e:
logger.error(f"Error getting neutralization snapshots: {e}")
raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e))
@router.get("/{feature_instance_id}/snapshots", response_model=List[DataNeutralizationSnapshot])
@limiter.limit("30/minute")
def get_neutralization_snapshots_scoped(
request: Request,
feature_instance_id: str = Path(..., description="Workspace instance id (must match X-Instance-Id when set)"),
context: RequestContext = Depends(getRequestContext),
) -> List[DataNeutralizationSnapshot]:
"""Same as GET /snapshots; path includes instance id for workspace UI (explicit scope)."""
_assertFeatureInstancePathMatchesContext(feature_instance_id, context)
try:
service = NeutralizationPlayground(
context.user,
str(context.mandateId) if context.mandateId else "",
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
)
return service.getSnapshots()
except HTTPException:
raise
except Exception as e:
logger.error(f"Error getting neutralization snapshots (scoped): {e}")
raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e))
@router.post("/process-sharepoint", response_model=Dict[str, Any])
@limiter.limit("5/minute")
async def process_sharepoint_files(
request: Request,
paths_data: Dict[str, str] = Body(...),
context: RequestContext = Depends(getRequestContext)
) -> Dict[str, Any]:
"""Process files from SharePoint source path and store neutralized files in target path"""
try:
source_path = paths_data.get("sourcePath", "")
target_path = paths_data.get("targetPath", "")
if not source_path or not target_path:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=routeApiMsg("Both source and target paths are required")
)
service = NeutralizationPlayground(
context.user,
str(context.mandateId) if context.mandateId else "",
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None
)
result = await service.processSharepointFiles(source_path, target_path)
return result
except HTTPException:
raise
except Exception as e:
logger.error(f"Error processing SharePoint files: {str(e)}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Error processing SharePoint files: {str(e)}"
)
@router.post("/batch-process", response_model=Dict[str, Any])
@limiter.limit("10/minute")
def batch_process_files(
request: Request,
files_data: List[Dict[str, Any]] = Body(...),
context: RequestContext = Depends(getRequestContext)
) -> Dict[str, Any]:
"""Process multiple files for neutralization"""
try:
if not files_data:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=routeApiMsg("Files data is required")
)
service = NeutralizationPlayground(
context.user,
str(context.mandateId) if context.mandateId else "",
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None
)
result = service.batchNeutralizeFiles(files_data)
return result
except HTTPException:
raise
except Exception as e:
logger.error(f"Error batch processing files: {str(e)}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Error batch processing files: {str(e)}"
)
@router.get("/stats", response_model=Dict[str, Any])
@limiter.limit("30/minute")
def get_neutralization_stats(
request: Request,
context: RequestContext = Depends(getRequestContext)
) -> Dict[str, Any]:
"""Get neutralization processing statistics"""
try:
service = NeutralizationPlayground(
context.user,
str(context.mandateId) if context.mandateId else "",
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None
)
stats = service.getProcessingStats()
return stats
except Exception as e:
logger.error(f"Error getting neutralization stats: {str(e)}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Error getting neutralization stats: {str(e)}"
)
def _deleteSingleNeutralizationAttribute(context: RequestContext, attributeId: str) -> Dict[str, str]:
service = NeutralizationPlayground(
context.user,
str(context.mandateId) if context.mandateId else "",
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
)
success = service.deleteAttribute(attributeId)
if success:
return {"message": f"Attribute {attributeId} deleted"}
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Attribute {attributeId} not found",
)
@router.delete("/attributes/single/{attributeId}", response_model=Dict[str, str])
@limiter.limit("30/minute")
def deleteAttribute(
request: Request,
attributeId: str = Path(..., description="Attribute ID to delete"),
context: RequestContext = Depends(getRequestContext),
) -> Dict[str, str]:
"""Delete a single neutralization attribute by ID."""
try:
return _deleteSingleNeutralizationAttribute(context, attributeId)
except HTTPException:
raise
except Exception as e:
logger.error(f"Error deleting attribute: {str(e)}")
raise HTTPException(status_code=500, detail=str(e))
@router.delete("/{feature_instance_id}/attributes/single/{attributeId}", response_model=Dict[str, str])
@limiter.limit("30/minute")
def deleteAttributeScoped(
request: Request,
feature_instance_id: str = Path(..., description="Workspace / feature instance id"),
attributeId: str = Path(..., description="Attribute ID to delete"),
context: RequestContext = Depends(getRequestContext),
) -> Dict[str, str]:
"""Same as DELETE /attributes/single/{attributeId}; path includes instance id for workspace UI."""
_assertFeatureInstancePathMatchesContext(feature_instance_id, context)
try:
return _deleteSingleNeutralizationAttribute(context, attributeId)
except HTTPException:
raise
except Exception as e:
logger.error(f"Error deleting attribute: {str(e)}")
raise HTTPException(status_code=500, detail=str(e))
def _retriggerNeutralizationBody(context: RequestContext, fileId: str) -> Dict[str, str]:
if not fileId:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=routeApiMsg("fileId is required"),
)
service = NeutralizationPlayground(
context.user,
str(context.mandateId) if context.mandateId else "",
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
)
service.cleanupFileAttributes(fileId)
return {"message": f"Neutralization re-triggered for file {fileId}", "fileId": fileId}
@router.post("/retrigger", response_model=Dict[str, str])
@limiter.limit("10/minute")
def retriggerNeutralization(
request: Request,
retriggerData: Dict[str, str] = Body(...),
context: RequestContext = Depends(getRequestContext),
) -> Dict[str, str]:
"""Re-trigger neutralization for a specific file."""
try:
return _retriggerNeutralizationBody(context, retriggerData.get("fileId", ""))
except HTTPException:
raise
except Exception as e:
logger.error(f"Error re-triggering neutralization: {str(e)}")
raise HTTPException(status_code=500, detail=str(e))
@router.post("/{feature_instance_id}/retrigger", response_model=Dict[str, str])
@limiter.limit("10/minute")
def retriggerNeutralizationScoped(
request: Request,
feature_instance_id: str = Path(..., description="Workspace / feature instance id"),
retriggerData: Dict[str, str] = Body(...),
context: RequestContext = Depends(getRequestContext),
) -> Dict[str, str]:
"""Same as POST /retrigger; path includes instance id for workspace UI compatibility."""
_assertFeatureInstancePathMatchesContext(feature_instance_id, context)
try:
return _retriggerNeutralizationBody(context, retriggerData.get("fileId", ""))
except HTTPException:
raise
except Exception as e:
logger.error(f"Error re-triggering neutralization: {str(e)}")
raise HTTPException(status_code=500, detail=str(e))
@router.delete("/attributes/{fileId}", response_model=Dict[str, str])
@limiter.limit("10/minute")
def cleanup_file_attributes(
request: Request,
fileId: str = Path(..., description="File ID to cleanup attributes for"),
context: RequestContext = Depends(getRequestContext)
) -> Dict[str, str]:
"""Clean up neutralization attributes for a specific file"""
try:
service = NeutralizationPlayground(
context.user,
str(context.mandateId) if context.mandateId else "",
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None
)
success = service.cleanupFileAttributes(fileId)
if success:
return {"message": f"Successfully cleaned up attributes for file {fileId}"}
else:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=routeApiMsg("Failed to cleanup file attributes")
)
except HTTPException:
raise
except Exception as e:
logger.error(f"Error cleaning up file attributes: {str(e)}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Error cleaning up file attributes: {str(e)}"
)