# Copyright (c) 2025 Patrick Motsch # All rights reserved. from fastapi import APIRouter, HTTPException, Depends, Path, Request, status, Query, Body, File, UploadFile from typing import List, Dict, Any, Optional import logging # Import auth module from modules.auth import limiter, getRequestContext, RequestContext # Import interfaces from .datamodelFeatureNeutralizer import DataNeutraliserConfig, DataNeutralizerAttributes, DataNeutralizationSnapshot from .neutralizePlayground import NeutralizationPlayground from modules.shared.i18nRegistry import apiRouteContext routeApiMsg = apiRouteContext("routeFeatureNeutralizer") # Configure logger logger = logging.getLogger(__name__) def _assertFeatureInstancePathMatchesContext(featureInstanceIdFromPath: str, context: RequestContext) -> None: """Reject path/instance mismatch when request context already carries an instance id.""" ctxId = str(context.featureInstanceId).strip() if getattr(context, "featureInstanceId", None) else "" pathId = (featureInstanceIdFromPath or "").strip() if ctxId and pathId and pathId != ctxId: raise HTTPException( status_code=status.HTTP_403_FORBIDDEN, detail=routeApiMsg("Feature instance id in URL does not match request context (X-Instance-Id)"), ) def _fetchNeutralizationAttributes(context: RequestContext, fileId: Optional[str]) -> List[DataNeutralizerAttributes]: service = NeutralizationPlayground( context.user, str(context.mandateId) if context.mandateId else "", featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None, ) return service.getAttributes(fileId) # Create router for neutralization endpoints router = APIRouter( prefix="/api/neutralization", tags=["Data Neutralisation"], responses={ 404: {"description": "Not found"}, 400: {"description": "Bad request"}, 401: {"description": "Unauthorized"}, 403: {"description": "Forbidden"}, 500: {"description": "Internal server error"} } ) @router.get("/config", response_model=DataNeutraliserConfig) @limiter.limit("30/minute") def get_neutralization_config( request: Request, context: RequestContext = Depends(getRequestContext) ) -> DataNeutraliserConfig: """Get data neutralization configuration""" try: mandate_id = str(context.mandateId) if context.mandateId else "" feature_instance_id = str(context.featureInstanceId) if context.featureInstanceId else "" service = NeutralizationPlayground( context.user, mandate_id, featureInstanceId=feature_instance_id or None ) config = service.getConfig() if not config: # Return default config instead of 404 (requires mandateId and featureInstanceId for instance-scoped config) return DataNeutraliserConfig( mandateId=mandate_id, featureInstanceId=feature_instance_id, userId=context.user.id, enabled=True, namesToParse="", sharepointSourcePath="", sharepointTargetPath="" ) return config except HTTPException: raise except Exception as e: logger.error(f"Error getting neutralization config: {str(e)}") raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=f"Error getting neutralization config: {str(e)}" ) @router.post("/config", response_model=DataNeutraliserConfig) @limiter.limit("10/minute") def save_neutralization_config( request: Request, config_data: Dict[str, Any] = Body(...), context: RequestContext = Depends(getRequestContext) ) -> DataNeutraliserConfig: """Save or update data neutralization configuration""" try: service = NeutralizationPlayground( context.user, str(context.mandateId) if context.mandateId else "", featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None ) config = service.saveConfig(config_data) return config except Exception as e: logger.error(f"Error saving neutralization config: {str(e)}") raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=f"Error saving neutralization config: {str(e)}" ) @router.post("/neutralize-file") @limiter.limit("20/minute") async def neutralize_file( request: Request, file: UploadFile = File(..., description="File to neutralize (PDF, DOCX, XLSX, PPTX, TXT, CSV, JSON)"), context: RequestContext = Depends(getRequestContext) ) -> Dict[str, Any]: """Upload and neutralize a file. Returns neutralized text or base64-encoded file for download.""" try: if not file.filename or not file.filename.strip(): raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, detail=routeApiMsg("File name is required") ) content = await file.read() if not content: raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, detail=routeApiMsg("File is empty") ) service = NeutralizationPlayground( context.user, str(context.mandateId) if context.mandateId else "", featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None ) result = await service.processUploadedFileAsync(content, file.filename or "file") logger.info(f"Neutralize file result keys: {list(result.keys())}, has_base64={bool(result.get('neutralized_file_base64'))}, has_text={result.get('neutralized_text') is not None}") return result except HTTPException: raise except Exception as e: logger.error(f"Error neutralizing file: {str(e)}", exc_info=True) raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=f"Error neutralizing file: {str(e)}" ) @router.post("/neutralize-text", response_model=Dict[str, Any]) @limiter.limit("20/minute") def neutralize_text( request: Request, text_data: Dict[str, Any] = Body(...), context: RequestContext = Depends(getRequestContext) ) -> Dict[str, Any]: """Neutralize text content""" try: text = text_data.get("text", "") file_id = text_data.get("fileId") if not text: raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, detail=routeApiMsg("Text content is required") ) service = NeutralizationPlayground( context.user, str(context.mandateId) if context.mandateId else "", featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None ) result = service.neutralizeText(text, file_id) return result except HTTPException: raise except Exception as e: logger.error(f"Error neutralizing text: {str(e)}") raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=f"Error neutralizing text: {str(e)}" ) @router.post("/resolve-text", response_model=Dict[str, str]) @limiter.limit("20/minute") def resolve_text( request: Request, text_data: Dict[str, str] = Body(...), context: RequestContext = Depends(getRequestContext) ) -> Dict[str, str]: """Resolve UIDs in neutralized text back to original text""" try: text = text_data.get("text", "") if not text: raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, detail=routeApiMsg("Text content is required") ) service = NeutralizationPlayground( context.user, str(context.mandateId) if context.mandateId else "", featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None ) resolved_text = service.resolveText(text) return {"resolved_text": resolved_text} except HTTPException: raise except Exception as e: logger.error(f"Error resolving text: {str(e)}") raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=f"Error resolving text: {str(e)}" ) @router.get("/attributes", response_model=List[DataNeutralizerAttributes]) @limiter.limit("30/minute") def get_neutralization_attributes( request: Request, fileId: Optional[str] = Query(None, description="Filter by file ID"), context: RequestContext = Depends(getRequestContext) ) -> List[DataNeutralizerAttributes]: """Get neutralization attributes, optionally filtered by file ID""" try: return _fetchNeutralizationAttributes(context, fileId) except HTTPException: raise except Exception as e: logger.error(f"Error getting neutralization attributes: {str(e)}") raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=f"Error getting neutralization attributes: {str(e)}" ) @router.get("/{feature_instance_id}/attributes", response_model=List[DataNeutralizerAttributes]) @limiter.limit("30/minute") def get_neutralization_attributes_scoped( request: Request, feature_instance_id: str = Path(..., description="Workspace / feature instance id (must match X-Instance-Id when set)"), fileId: Optional[str] = Query(None, description="Filter by file ID"), context: RequestContext = Depends(getRequestContext), ) -> List[DataNeutralizerAttributes]: """Same as GET /attributes; path includes instance id for workspace UI compatibility.""" _assertFeatureInstancePathMatchesContext(feature_instance_id, context) try: return _fetchNeutralizationAttributes(context, fileId) except HTTPException: raise except Exception as e: logger.error(f"Error getting neutralization attributes: {str(e)}") raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=f"Error getting neutralization attributes: {str(e)}" ) @router.get("/snapshots", response_model=List[DataNeutralizationSnapshot]) @limiter.limit("30/minute") def get_neutralization_snapshots( request: Request, context: RequestContext = Depends(getRequestContext), ) -> List[DataNeutralizationSnapshot]: """Return neutralized-text snapshots (full text with placeholders) for the current feature instance.""" try: service = NeutralizationPlayground( context.user, str(context.mandateId) if context.mandateId else "", featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None, ) return service.getSnapshots() except HTTPException: raise except Exception as e: logger.error(f"Error getting neutralization snapshots: {e}") raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)) @router.get("/{feature_instance_id}/snapshots", response_model=List[DataNeutralizationSnapshot]) @limiter.limit("30/minute") def get_neutralization_snapshots_scoped( request: Request, feature_instance_id: str = Path(..., description="Workspace instance id (must match X-Instance-Id when set)"), context: RequestContext = Depends(getRequestContext), ) -> List[DataNeutralizationSnapshot]: """Same as GET /snapshots; path includes instance id for workspace UI (explicit scope).""" _assertFeatureInstancePathMatchesContext(feature_instance_id, context) try: service = NeutralizationPlayground( context.user, str(context.mandateId) if context.mandateId else "", featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None, ) return service.getSnapshots() except HTTPException: raise except Exception as e: logger.error(f"Error getting neutralization snapshots (scoped): {e}") raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)) @router.post("/process-sharepoint", response_model=Dict[str, Any]) @limiter.limit("5/minute") async def process_sharepoint_files( request: Request, paths_data: Dict[str, str] = Body(...), context: RequestContext = Depends(getRequestContext) ) -> Dict[str, Any]: """Process files from SharePoint source path and store neutralized files in target path""" try: source_path = paths_data.get("sourcePath", "") target_path = paths_data.get("targetPath", "") if not source_path or not target_path: raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, detail=routeApiMsg("Both source and target paths are required") ) service = NeutralizationPlayground( context.user, str(context.mandateId) if context.mandateId else "", featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None ) result = await service.processSharepointFiles(source_path, target_path) return result except HTTPException: raise except Exception as e: logger.error(f"Error processing SharePoint files: {str(e)}") raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=f"Error processing SharePoint files: {str(e)}" ) @router.post("/batch-process", response_model=Dict[str, Any]) @limiter.limit("10/minute") def batch_process_files( request: Request, files_data: List[Dict[str, Any]] = Body(...), context: RequestContext = Depends(getRequestContext) ) -> Dict[str, Any]: """Process multiple files for neutralization""" try: if not files_data: raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, detail=routeApiMsg("Files data is required") ) service = NeutralizationPlayground( context.user, str(context.mandateId) if context.mandateId else "", featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None ) result = service.batchNeutralizeFiles(files_data) return result except HTTPException: raise except Exception as e: logger.error(f"Error batch processing files: {str(e)}") raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=f"Error batch processing files: {str(e)}" ) @router.get("/stats", response_model=Dict[str, Any]) @limiter.limit("30/minute") def get_neutralization_stats( request: Request, context: RequestContext = Depends(getRequestContext) ) -> Dict[str, Any]: """Get neutralization processing statistics""" try: service = NeutralizationPlayground( context.user, str(context.mandateId) if context.mandateId else "", featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None ) stats = service.getProcessingStats() return stats except Exception as e: logger.error(f"Error getting neutralization stats: {str(e)}") raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=f"Error getting neutralization stats: {str(e)}" ) def _deleteSingleNeutralizationAttribute(context: RequestContext, attributeId: str) -> Dict[str, str]: service = NeutralizationPlayground( context.user, str(context.mandateId) if context.mandateId else "", featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None, ) success = service.deleteAttribute(attributeId) if success: return {"message": f"Attribute {attributeId} deleted"} raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, detail=f"Attribute {attributeId} not found", ) @router.delete("/attributes/single/{attributeId}", response_model=Dict[str, str]) @limiter.limit("30/minute") def deleteAttribute( request: Request, attributeId: str = Path(..., description="Attribute ID to delete"), context: RequestContext = Depends(getRequestContext), ) -> Dict[str, str]: """Delete a single neutralization attribute by ID.""" try: return _deleteSingleNeutralizationAttribute(context, attributeId) except HTTPException: raise except Exception as e: logger.error(f"Error deleting attribute: {str(e)}") raise HTTPException(status_code=500, detail=str(e)) @router.delete("/{feature_instance_id}/attributes/single/{attributeId}", response_model=Dict[str, str]) @limiter.limit("30/minute") def deleteAttributeScoped( request: Request, feature_instance_id: str = Path(..., description="Workspace / feature instance id"), attributeId: str = Path(..., description="Attribute ID to delete"), context: RequestContext = Depends(getRequestContext), ) -> Dict[str, str]: """Same as DELETE /attributes/single/{attributeId}; path includes instance id for workspace UI.""" _assertFeatureInstancePathMatchesContext(feature_instance_id, context) try: return _deleteSingleNeutralizationAttribute(context, attributeId) except HTTPException: raise except Exception as e: logger.error(f"Error deleting attribute: {str(e)}") raise HTTPException(status_code=500, detail=str(e)) def _retriggerNeutralizationBody(context: RequestContext, fileId: str) -> Dict[str, str]: if not fileId: raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, detail=routeApiMsg("fileId is required"), ) service = NeutralizationPlayground( context.user, str(context.mandateId) if context.mandateId else "", featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None, ) service.cleanupFileAttributes(fileId) return {"message": f"Neutralization re-triggered for file {fileId}", "fileId": fileId} @router.post("/retrigger", response_model=Dict[str, str]) @limiter.limit("10/minute") def retriggerNeutralization( request: Request, retriggerData: Dict[str, str] = Body(...), context: RequestContext = Depends(getRequestContext), ) -> Dict[str, str]: """Re-trigger neutralization for a specific file.""" try: return _retriggerNeutralizationBody(context, retriggerData.get("fileId", "")) except HTTPException: raise except Exception as e: logger.error(f"Error re-triggering neutralization: {str(e)}") raise HTTPException(status_code=500, detail=str(e)) @router.post("/{feature_instance_id}/retrigger", response_model=Dict[str, str]) @limiter.limit("10/minute") def retriggerNeutralizationScoped( request: Request, feature_instance_id: str = Path(..., description="Workspace / feature instance id"), retriggerData: Dict[str, str] = Body(...), context: RequestContext = Depends(getRequestContext), ) -> Dict[str, str]: """Same as POST /retrigger; path includes instance id for workspace UI compatibility.""" _assertFeatureInstancePathMatchesContext(feature_instance_id, context) try: return _retriggerNeutralizationBody(context, retriggerData.get("fileId", "")) except HTTPException: raise except Exception as e: logger.error(f"Error re-triggering neutralization: {str(e)}") raise HTTPException(status_code=500, detail=str(e)) @router.delete("/attributes/{fileId}", response_model=Dict[str, str]) @limiter.limit("10/minute") def cleanup_file_attributes( request: Request, fileId: str = Path(..., description="File ID to cleanup attributes for"), context: RequestContext = Depends(getRequestContext) ) -> Dict[str, str]: """Clean up neutralization attributes for a specific file""" try: service = NeutralizationPlayground( context.user, str(context.mandateId) if context.mandateId else "", featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None ) success = service.cleanupFileAttributes(fileId) if success: return {"message": f"Successfully cleaned up attributes for file {fileId}"} else: raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=routeApiMsg("Failed to cleanup file attributes") ) except HTTPException: raise except Exception as e: logger.error(f"Error cleaning up file attributes: {str(e)}") raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=f"Error cleaning up file attributes: {str(e)}" )