536 lines
21 KiB
Python
536 lines
21 KiB
Python
# Copyright (c) 2025 Patrick Motsch
|
|
# All rights reserved.
|
|
from fastapi import APIRouter, HTTPException, Depends, Path, Request, status, Query, Body, File, UploadFile
|
|
from typing import List, Dict, Any, Optional
|
|
import logging
|
|
|
|
# Import auth module
|
|
from modules.auth import limiter, getRequestContext, RequestContext
|
|
|
|
# Import interfaces
|
|
from .datamodelFeatureNeutralizer import DataNeutraliserConfig, DataNeutralizerAttributes, DataNeutralizationSnapshot
|
|
from .neutralizePlayground import NeutralizationPlayground
|
|
from modules.shared.i18nRegistry import apiRouteContext
|
|
routeApiMsg = apiRouteContext("routeFeatureNeutralizer")
|
|
|
|
# Configure logger
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def _assertFeatureInstancePathMatchesContext(featureInstanceIdFromPath: str, context: RequestContext) -> None:
|
|
"""Reject path/instance mismatch when request context already carries an instance id."""
|
|
ctxId = str(context.featureInstanceId).strip() if getattr(context, "featureInstanceId", None) else ""
|
|
pathId = (featureInstanceIdFromPath or "").strip()
|
|
if ctxId and pathId and pathId != ctxId:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_403_FORBIDDEN,
|
|
detail=routeApiMsg("Feature instance id in URL does not match request context (X-Instance-Id)"),
|
|
)
|
|
|
|
|
|
def _fetchNeutralizationAttributes(context: RequestContext, fileId: Optional[str]) -> List[DataNeutralizerAttributes]:
|
|
service = NeutralizationPlayground(
|
|
context.user,
|
|
str(context.mandateId) if context.mandateId else "",
|
|
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
|
|
)
|
|
return service.getAttributes(fileId)
|
|
|
|
|
|
# Create router for neutralization endpoints
|
|
router = APIRouter(
|
|
prefix="/api/neutralization",
|
|
tags=["Data Neutralisation"],
|
|
responses={
|
|
404: {"description": "Not found"},
|
|
400: {"description": "Bad request"},
|
|
401: {"description": "Unauthorized"},
|
|
403: {"description": "Forbidden"},
|
|
500: {"description": "Internal server error"}
|
|
}
|
|
)
|
|
|
|
@router.get("/config", response_model=DataNeutraliserConfig)
|
|
@limiter.limit("30/minute")
|
|
def get_neutralization_config(
|
|
request: Request,
|
|
context: RequestContext = Depends(getRequestContext)
|
|
) -> DataNeutraliserConfig:
|
|
"""Get data neutralization configuration"""
|
|
try:
|
|
mandate_id = str(context.mandateId) if context.mandateId else ""
|
|
feature_instance_id = str(context.featureInstanceId) if context.featureInstanceId else ""
|
|
service = NeutralizationPlayground(
|
|
context.user, mandate_id, featureInstanceId=feature_instance_id or None
|
|
)
|
|
config = service.getConfig()
|
|
|
|
if not config:
|
|
# Return default config instead of 404 (requires mandateId and featureInstanceId for instance-scoped config)
|
|
return DataNeutraliserConfig(
|
|
mandateId=mandate_id,
|
|
featureInstanceId=feature_instance_id,
|
|
userId=context.user.id,
|
|
enabled=True,
|
|
namesToParse="",
|
|
sharepointSourcePath="",
|
|
sharepointTargetPath=""
|
|
)
|
|
|
|
return config
|
|
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"Error getting neutralization config: {str(e)}")
|
|
raise HTTPException(
|
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
detail=f"Error getting neutralization config: {str(e)}"
|
|
)
|
|
|
|
@router.post("/config", response_model=DataNeutraliserConfig)
|
|
@limiter.limit("10/minute")
|
|
def save_neutralization_config(
|
|
request: Request,
|
|
config_data: Dict[str, Any] = Body(...),
|
|
context: RequestContext = Depends(getRequestContext)
|
|
) -> DataNeutraliserConfig:
|
|
"""Save or update data neutralization configuration"""
|
|
try:
|
|
service = NeutralizationPlayground(
|
|
context.user,
|
|
str(context.mandateId) if context.mandateId else "",
|
|
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None
|
|
)
|
|
config = service.saveConfig(config_data)
|
|
|
|
return config
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error saving neutralization config: {str(e)}")
|
|
raise HTTPException(
|
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
detail=f"Error saving neutralization config: {str(e)}"
|
|
)
|
|
|
|
@router.post("/neutralize-file")
|
|
@limiter.limit("20/minute")
|
|
async def neutralize_file(
|
|
request: Request,
|
|
file: UploadFile = File(..., description="File to neutralize (PDF, DOCX, XLSX, PPTX, TXT, CSV, JSON)"),
|
|
context: RequestContext = Depends(getRequestContext)
|
|
) -> Dict[str, Any]:
|
|
"""Upload and neutralize a file. Returns neutralized text or base64-encoded file for download."""
|
|
try:
|
|
if not file.filename or not file.filename.strip():
|
|
raise HTTPException(
|
|
status_code=status.HTTP_400_BAD_REQUEST,
|
|
detail=routeApiMsg("File name is required")
|
|
)
|
|
content = await file.read()
|
|
if not content:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_400_BAD_REQUEST,
|
|
detail=routeApiMsg("File is empty")
|
|
)
|
|
service = NeutralizationPlayground(
|
|
context.user,
|
|
str(context.mandateId) if context.mandateId else "",
|
|
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None
|
|
)
|
|
result = await service.processUploadedFileAsync(content, file.filename or "file")
|
|
logger.info(f"Neutralize file result keys: {list(result.keys())}, has_base64={bool(result.get('neutralized_file_base64'))}, has_text={result.get('neutralized_text') is not None}")
|
|
return result
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"Error neutralizing file: {str(e)}", exc_info=True)
|
|
raise HTTPException(
|
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
detail=f"Error neutralizing file: {str(e)}"
|
|
)
|
|
|
|
|
|
@router.post("/neutralize-text", response_model=Dict[str, Any])
|
|
@limiter.limit("20/minute")
|
|
def neutralize_text(
|
|
request: Request,
|
|
text_data: Dict[str, Any] = Body(...),
|
|
context: RequestContext = Depends(getRequestContext)
|
|
) -> Dict[str, Any]:
|
|
"""Neutralize text content"""
|
|
try:
|
|
text = text_data.get("text", "")
|
|
file_id = text_data.get("fileId")
|
|
|
|
if not text:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_400_BAD_REQUEST,
|
|
detail=routeApiMsg("Text content is required")
|
|
)
|
|
|
|
service = NeutralizationPlayground(
|
|
context.user,
|
|
str(context.mandateId) if context.mandateId else "",
|
|
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None
|
|
)
|
|
result = service.neutralizeText(text, file_id)
|
|
|
|
return result
|
|
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"Error neutralizing text: {str(e)}")
|
|
raise HTTPException(
|
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
detail=f"Error neutralizing text: {str(e)}"
|
|
)
|
|
|
|
@router.post("/resolve-text", response_model=Dict[str, str])
|
|
@limiter.limit("20/minute")
|
|
def resolve_text(
|
|
request: Request,
|
|
text_data: Dict[str, str] = Body(...),
|
|
context: RequestContext = Depends(getRequestContext)
|
|
) -> Dict[str, str]:
|
|
"""Resolve UIDs in neutralized text back to original text"""
|
|
try:
|
|
text = text_data.get("text", "")
|
|
|
|
if not text:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_400_BAD_REQUEST,
|
|
detail=routeApiMsg("Text content is required")
|
|
)
|
|
|
|
service = NeutralizationPlayground(
|
|
context.user,
|
|
str(context.mandateId) if context.mandateId else "",
|
|
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None
|
|
)
|
|
resolved_text = service.resolveText(text)
|
|
|
|
return {"resolved_text": resolved_text}
|
|
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"Error resolving text: {str(e)}")
|
|
raise HTTPException(
|
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
detail=f"Error resolving text: {str(e)}"
|
|
)
|
|
|
|
@router.get("/attributes", response_model=List[DataNeutralizerAttributes])
|
|
@limiter.limit("30/minute")
|
|
def get_neutralization_attributes(
|
|
request: Request,
|
|
fileId: Optional[str] = Query(None, description="Filter by file ID"),
|
|
context: RequestContext = Depends(getRequestContext)
|
|
) -> List[DataNeutralizerAttributes]:
|
|
"""Get neutralization attributes, optionally filtered by file ID"""
|
|
try:
|
|
return _fetchNeutralizationAttributes(context, fileId)
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"Error getting neutralization attributes: {str(e)}")
|
|
raise HTTPException(
|
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
detail=f"Error getting neutralization attributes: {str(e)}"
|
|
)
|
|
|
|
|
|
@router.get("/{feature_instance_id}/attributes", response_model=List[DataNeutralizerAttributes])
|
|
@limiter.limit("30/minute")
|
|
def get_neutralization_attributes_scoped(
|
|
request: Request,
|
|
feature_instance_id: str = Path(..., description="Workspace / feature instance id (must match X-Instance-Id when set)"),
|
|
fileId: Optional[str] = Query(None, description="Filter by file ID"),
|
|
context: RequestContext = Depends(getRequestContext),
|
|
) -> List[DataNeutralizerAttributes]:
|
|
"""Same as GET /attributes; path includes instance id for workspace UI compatibility."""
|
|
_assertFeatureInstancePathMatchesContext(feature_instance_id, context)
|
|
try:
|
|
return _fetchNeutralizationAttributes(context, fileId)
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"Error getting neutralization attributes: {str(e)}")
|
|
raise HTTPException(
|
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
detail=f"Error getting neutralization attributes: {str(e)}"
|
|
)
|
|
|
|
@router.get("/snapshots", response_model=List[DataNeutralizationSnapshot])
|
|
@limiter.limit("30/minute")
|
|
def get_neutralization_snapshots(
|
|
request: Request,
|
|
context: RequestContext = Depends(getRequestContext),
|
|
) -> List[DataNeutralizationSnapshot]:
|
|
"""Return neutralized-text snapshots (full text with placeholders) for the current feature instance."""
|
|
try:
|
|
service = NeutralizationPlayground(
|
|
context.user,
|
|
str(context.mandateId) if context.mandateId else "",
|
|
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
|
|
)
|
|
return service.getSnapshots()
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"Error getting neutralization snapshots: {e}")
|
|
raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e))
|
|
|
|
|
|
@router.get("/{feature_instance_id}/snapshots", response_model=List[DataNeutralizationSnapshot])
|
|
@limiter.limit("30/minute")
|
|
def get_neutralization_snapshots_scoped(
|
|
request: Request,
|
|
feature_instance_id: str = Path(..., description="Workspace instance id (must match X-Instance-Id when set)"),
|
|
context: RequestContext = Depends(getRequestContext),
|
|
) -> List[DataNeutralizationSnapshot]:
|
|
"""Same as GET /snapshots; path includes instance id for workspace UI (explicit scope)."""
|
|
_assertFeatureInstancePathMatchesContext(feature_instance_id, context)
|
|
try:
|
|
service = NeutralizationPlayground(
|
|
context.user,
|
|
str(context.mandateId) if context.mandateId else "",
|
|
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
|
|
)
|
|
return service.getSnapshots()
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"Error getting neutralization snapshots (scoped): {e}")
|
|
raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e))
|
|
|
|
|
|
@router.post("/process-sharepoint", response_model=Dict[str, Any])
|
|
@limiter.limit("5/minute")
|
|
async def process_sharepoint_files(
|
|
request: Request,
|
|
paths_data: Dict[str, str] = Body(...),
|
|
context: RequestContext = Depends(getRequestContext)
|
|
) -> Dict[str, Any]:
|
|
"""Process files from SharePoint source path and store neutralized files in target path"""
|
|
try:
|
|
source_path = paths_data.get("sourcePath", "")
|
|
target_path = paths_data.get("targetPath", "")
|
|
|
|
if not source_path or not target_path:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_400_BAD_REQUEST,
|
|
detail=routeApiMsg("Both source and target paths are required")
|
|
)
|
|
|
|
service = NeutralizationPlayground(
|
|
context.user,
|
|
str(context.mandateId) if context.mandateId else "",
|
|
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None
|
|
)
|
|
result = await service.processSharepointFiles(source_path, target_path)
|
|
|
|
return result
|
|
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"Error processing SharePoint files: {str(e)}")
|
|
raise HTTPException(
|
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
detail=f"Error processing SharePoint files: {str(e)}"
|
|
)
|
|
|
|
@router.post("/batch-process", response_model=Dict[str, Any])
|
|
@limiter.limit("10/minute")
|
|
def batch_process_files(
|
|
request: Request,
|
|
files_data: List[Dict[str, Any]] = Body(...),
|
|
context: RequestContext = Depends(getRequestContext)
|
|
) -> Dict[str, Any]:
|
|
"""Process multiple files for neutralization"""
|
|
try:
|
|
if not files_data:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_400_BAD_REQUEST,
|
|
detail=routeApiMsg("Files data is required")
|
|
)
|
|
|
|
service = NeutralizationPlayground(
|
|
context.user,
|
|
str(context.mandateId) if context.mandateId else "",
|
|
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None
|
|
)
|
|
result = service.batchNeutralizeFiles(files_data)
|
|
|
|
return result
|
|
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"Error batch processing files: {str(e)}")
|
|
raise HTTPException(
|
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
detail=f"Error batch processing files: {str(e)}"
|
|
)
|
|
|
|
@router.get("/stats", response_model=Dict[str, Any])
|
|
@limiter.limit("30/minute")
|
|
def get_neutralization_stats(
|
|
request: Request,
|
|
context: RequestContext = Depends(getRequestContext)
|
|
) -> Dict[str, Any]:
|
|
"""Get neutralization processing statistics"""
|
|
try:
|
|
service = NeutralizationPlayground(
|
|
context.user,
|
|
str(context.mandateId) if context.mandateId else "",
|
|
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None
|
|
)
|
|
stats = service.getProcessingStats()
|
|
|
|
return stats
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error getting neutralization stats: {str(e)}")
|
|
raise HTTPException(
|
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
detail=f"Error getting neutralization stats: {str(e)}"
|
|
)
|
|
|
|
def _deleteSingleNeutralizationAttribute(context: RequestContext, attributeId: str) -> Dict[str, str]:
|
|
service = NeutralizationPlayground(
|
|
context.user,
|
|
str(context.mandateId) if context.mandateId else "",
|
|
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
|
|
)
|
|
success = service.deleteAttribute(attributeId)
|
|
if success:
|
|
return {"message": f"Attribute {attributeId} deleted"}
|
|
raise HTTPException(
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
|
detail=f"Attribute {attributeId} not found",
|
|
)
|
|
|
|
|
|
@router.delete("/attributes/single/{attributeId}", response_model=Dict[str, str])
|
|
@limiter.limit("30/minute")
|
|
def deleteAttribute(
|
|
request: Request,
|
|
attributeId: str = Path(..., description="Attribute ID to delete"),
|
|
context: RequestContext = Depends(getRequestContext),
|
|
) -> Dict[str, str]:
|
|
"""Delete a single neutralization attribute by ID."""
|
|
try:
|
|
return _deleteSingleNeutralizationAttribute(context, attributeId)
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"Error deleting attribute: {str(e)}")
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
@router.delete("/{feature_instance_id}/attributes/single/{attributeId}", response_model=Dict[str, str])
|
|
@limiter.limit("30/minute")
|
|
def deleteAttributeScoped(
|
|
request: Request,
|
|
feature_instance_id: str = Path(..., description="Workspace / feature instance id"),
|
|
attributeId: str = Path(..., description="Attribute ID to delete"),
|
|
context: RequestContext = Depends(getRequestContext),
|
|
) -> Dict[str, str]:
|
|
"""Same as DELETE /attributes/single/{attributeId}; path includes instance id for workspace UI."""
|
|
_assertFeatureInstancePathMatchesContext(feature_instance_id, context)
|
|
try:
|
|
return _deleteSingleNeutralizationAttribute(context, attributeId)
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"Error deleting attribute: {str(e)}")
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
def _retriggerNeutralizationBody(context: RequestContext, fileId: str) -> Dict[str, str]:
|
|
if not fileId:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_400_BAD_REQUEST,
|
|
detail=routeApiMsg("fileId is required"),
|
|
)
|
|
service = NeutralizationPlayground(
|
|
context.user,
|
|
str(context.mandateId) if context.mandateId else "",
|
|
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
|
|
)
|
|
service.cleanupFileAttributes(fileId)
|
|
return {"message": f"Neutralization re-triggered for file {fileId}", "fileId": fileId}
|
|
|
|
|
|
@router.post("/retrigger", response_model=Dict[str, str])
|
|
@limiter.limit("10/minute")
|
|
def retriggerNeutralization(
|
|
request: Request,
|
|
retriggerData: Dict[str, str] = Body(...),
|
|
context: RequestContext = Depends(getRequestContext),
|
|
) -> Dict[str, str]:
|
|
"""Re-trigger neutralization for a specific file."""
|
|
try:
|
|
return _retriggerNeutralizationBody(context, retriggerData.get("fileId", ""))
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"Error re-triggering neutralization: {str(e)}")
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
@router.post("/{feature_instance_id}/retrigger", response_model=Dict[str, str])
|
|
@limiter.limit("10/minute")
|
|
def retriggerNeutralizationScoped(
|
|
request: Request,
|
|
feature_instance_id: str = Path(..., description="Workspace / feature instance id"),
|
|
retriggerData: Dict[str, str] = Body(...),
|
|
context: RequestContext = Depends(getRequestContext),
|
|
) -> Dict[str, str]:
|
|
"""Same as POST /retrigger; path includes instance id for workspace UI compatibility."""
|
|
_assertFeatureInstancePathMatchesContext(feature_instance_id, context)
|
|
try:
|
|
return _retriggerNeutralizationBody(context, retriggerData.get("fileId", ""))
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"Error re-triggering neutralization: {str(e)}")
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
@router.delete("/attributes/{fileId}", response_model=Dict[str, str])
|
|
@limiter.limit("10/minute")
|
|
def cleanup_file_attributes(
|
|
request: Request,
|
|
fileId: str = Path(..., description="File ID to cleanup attributes for"),
|
|
context: RequestContext = Depends(getRequestContext)
|
|
) -> Dict[str, str]:
|
|
"""Clean up neutralization attributes for a specific file"""
|
|
try:
|
|
service = NeutralizationPlayground(
|
|
context.user,
|
|
str(context.mandateId) if context.mandateId else "",
|
|
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None
|
|
)
|
|
success = service.cleanupFileAttributes(fileId)
|
|
|
|
if success:
|
|
return {"message": f"Successfully cleaned up attributes for file {fileId}"}
|
|
else:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
detail=routeApiMsg("Failed to cleanup file attributes")
|
|
)
|
|
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"Error cleaning up file attributes: {str(e)}")
|
|
raise HTTPException(
|
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
detail=f"Error cleaning up file attributes: {str(e)}"
|
|
)
|