gateway/modules/routes/routeDataFiles.py
2025-09-17 02:12:34 +02:00

617 lines
22 KiB
Python

from fastapi import APIRouter, HTTPException, Depends, File, UploadFile, Form, Path, Request, status, Query, Response, Body
from fastapi.responses import JSONResponse, FileResponse
from typing import List, Dict, Any, Optional, Union
import logging
from datetime import datetime, timezone
from dataclasses import dataclass
import io
import inspect
import importlib
import os
from pydantic import BaseModel
# Import auth module
from modules.security.auth import limiter, getCurrentUser
# Import interfaces
import modules.interfaces.interfaceComponentObjects as interfaceComponentObjects
from modules.interfaces.interfaceComponentModel import FileItem, FilePreview
from modules.shared.attributeUtils import getModelAttributeDefinitions, AttributeResponse, AttributeDefinition
from modules.interfaces.interfaceAppModel import User, DataNeutraliserConfig, DataNeutralizerAttributes
from modules.services.serviceNeutralization import NeutralizationService
# Configure logger
logger = logging.getLogger(__name__)
# Model attributes for FileItem
fileAttributes = getModelAttributeDefinitions(FileItem)
# Create router for file endpoints
router = APIRouter(
prefix="/api/files",
tags=["Manage Files"],
responses={
404: {"description": "Not found"},
400: {"description": "Bad request"},
401: {"description": "Unauthorized"},
403: {"description": "Forbidden"},
500: {"description": "Internal server error"}
}
)
@router.get("/list", response_model=List[FileItem])
@limiter.limit("30/minute")
async def get_files(
request: Request,
currentUser: User = Depends(getCurrentUser)
) -> List[FileItem]:
"""Get all files"""
try:
managementInterface = interfaceComponentObjects.getInterface(currentUser)
# Get all files generically - only metadata, no binary data
files = managementInterface.getAllFiles()
# Return files directly since they are already FileItem objects
return files
except Exception as e:
logger.error(f"Error getting files: {str(e)}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to get files: {str(e)}"
)
@router.post("/upload", status_code=status.HTTP_201_CREATED)
@limiter.limit("10/minute")
async def upload_file(
request: Request,
file: UploadFile = File(...),
workflowId: Optional[str] = Form(None),
currentUser: User = Depends(getCurrentUser)
) -> JSONResponse:
# Add fileName property to UploadFile for consistency with backend model
file.fileName = file.filename
"""Upload a file"""
try:
managementInterface = interfaceComponentObjects.getInterface(currentUser)
# Read file
fileContent = await file.read()
# Check size limits
maxSize = int(interfaceComponentObjects.APP_CONFIG.get("File_Management_MAX_UPLOAD_SIZE_MB")) * 1024 * 1024 # in bytes
if len(fileContent) > maxSize:
raise HTTPException(
status_code=status.HTTP_413_REQUEST_ENTITY_TOO_LARGE,
detail=f"File too large. Maximum size: {interfaceComponentObjects.APP_CONFIG.get('File_Management_MAX_UPLOAD_SIZE_MB')}MB"
)
# Save file via LucyDOM interface in the database
fileItem, duplicateType = managementInterface.saveUploadedFile(fileContent, file.filename)
# Determine response message based on duplicate type
if duplicateType == "exact_duplicate":
message = f"File '{file.filename}' already exists with identical content. Reusing existing file."
elif duplicateType == "name_conflict":
message = f"File '{file.filename}' already exists with different content. Uploaded as '{fileItem.fileName}'."
else: # new_file
message = "File uploaded successfully"
# If workflowId is provided, update the file information
if workflowId:
updateData = {"workflowId": workflowId}
managementInterface.updateFile(fileItem.id, updateData)
fileItem.workflowId = workflowId
# Convert FileItem to dictionary for JSON response
fileMeta = fileItem.to_dict()
# Response with duplicate information
return JSONResponse({
"message": message,
"file": fileMeta,
"duplicateType": duplicateType,
"originalFileName": file.filename,
"storedFileName": fileItem.fileName,
"isDuplicate": duplicateType != "new_file"
})
except interfaceComponentObjects.FileStorageError as e:
logger.error(f"Error during file upload (storage): {str(e)}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=str(e)
)
except Exception as e:
logger.error(f"Error during file upload: {str(e)}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Error during file upload: {str(e)}"
)
@router.get("/{fileId}", response_model=FileItem)
@limiter.limit("30/minute")
async def get_file(
request: Request,
fileId: str = Path(..., description="ID of the file"),
currentUser: User = Depends(getCurrentUser)
) -> FileItem:
"""Get a file"""
try:
managementInterface = interfaceComponentObjects.getInterface(currentUser)
# Get file via LucyDOM interface from the database
fileData = managementInterface.getFile(fileId)
if not fileData:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"File with ID {fileId} not found"
)
return fileData
except interfaceComponentObjects.FileNotFoundError as e:
logger.warning(f"File not found: {str(e)}")
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=str(e)
)
except interfaceComponentObjects.FilePermissionError as e:
logger.warning(f"No permission for file: {str(e)}")
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail=str(e)
)
except interfaceComponentObjects.FileError as e:
logger.error(f"Error retrieving file: {str(e)}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=str(e)
)
except Exception as e:
logger.error(f"Unexpected error retrieving file: {str(e)}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Error retrieving file: {str(e)}"
)
@router.put("/{fileId}", response_model=FileItem)
@limiter.limit("10/minute")
async def update_file(
request: Request,
fileId: str = Path(..., description="ID of the file to update"),
file_info: Dict[str, Any] = Body(...),
currentUser: User = Depends(getCurrentUser)
) -> FileItem:
"""Update file info"""
try:
managementInterface = interfaceComponentObjects.getInterface(currentUser)
# Get the file from the database
file = managementInterface.getFile(fileId)
if not file:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"File with ID {fileId} not found"
)
# Check if user has access to the file using the interface's permission system
if not managementInterface._canModify("files", fileId):
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Not authorized to update this file"
)
# Update the file
result = managementInterface.updateFile(fileId, file_info)
if not result:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to update file"
)
# Get updated file
updatedFile = managementInterface.getFile(fileId)
return updatedFile
except HTTPException as he:
raise he
except Exception as e:
logger.error(f"Error updating file: {str(e)}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=str(e)
)
@router.delete("/{fileId}", response_model=Dict[str, Any])
@limiter.limit("10/minute")
async def delete_file(
request: Request,
fileId: str = Path(..., description="ID of the file to delete"),
currentUser: User = Depends(getCurrentUser)
) -> Dict[str, Any]:
"""Delete a file"""
managementInterface = interfaceComponentObjects.getInterface(currentUser)
# Check if the file exists
existingFile = managementInterface.getFile(fileId)
if not existingFile:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"File with ID {fileId} not found"
)
success = managementInterface.deleteFile(fileId)
if not success:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Error deleting the file"
)
return {"message": f"File with ID {fileId} successfully deleted"}
@router.get("/stats", response_model=Dict[str, Any])
@limiter.limit("30/minute")
async def get_file_stats(
request: Request,
currentUser: User = Depends(getCurrentUser)
) -> Dict[str, Any]:
"""Returns statistics about the stored files"""
try:
managementInterface = interfaceComponentObjects.getInterface(currentUser)
# Get all files - metadata only
allFiles = managementInterface.getAllFiles()
# Calculate statistics
totalFiles = len(allFiles)
totalSize = sum(file.fileSize for file in allFiles)
# Group by file type
fileTypes = {}
for file in allFiles:
fileType = file.mimeType.split("/")[0]
if fileType not in fileTypes:
fileTypes[fileType] = 0
fileTypes[fileType] += 1
return {
"totalFiles": totalFiles,
"totalSizeBytes": totalSize,
"fileTypes": fileTypes
}
except Exception as e:
logger.error(f"Error retrieving file statistics: {str(e)}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Error retrieving file statistics: {str(e)}"
)
@router.get("/{fileId}/download")
@limiter.limit("30/minute")
async def download_file(
request: Request,
fileId: str = Path(..., description="ID of the file to download"),
currentUser: User = Depends(getCurrentUser)
) -> Response:
"""Download a file"""
try:
managementInterface = interfaceComponentObjects.getInterface(currentUser)
# Get file data
fileData = managementInterface.getFile(fileId)
if not fileData:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"File with ID {fileId} not found"
)
# Get file content
fileContent = managementInterface.getFileData(fileId)
if not fileContent:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"File content not found for ID {fileId}"
)
# Return file as response
# Properly encode filename for Content-Disposition header to handle Unicode characters
import urllib.parse
encoded_filename = urllib.parse.quote(fileData.fileName)
return Response(
content=fileContent,
media_type=fileData.mimeType,
headers={
"Content-Disposition": f"attachment; filename*=UTF-8''{encoded_filename}"
}
)
except HTTPException:
raise
except Exception as e:
logger.error(f"Error downloading file: {str(e)}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Error downloading file: {str(e)}"
)
@router.get("/{fileId}/preview", response_model=FilePreview)
@limiter.limit("30/minute")
async def preview_file(
request: Request,
fileId: str = Path(..., description="ID of the file to preview"),
currentUser: User = Depends(getCurrentUser)
) -> FilePreview:
"""Preview a file's content"""
try:
managementInterface = interfaceComponentObjects.getInterface(currentUser)
# Get file preview using the correct method
preview = managementInterface.getFileContent(fileId)
if not preview:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"File with ID {fileId} not found or no content available"
)
return preview
except HTTPException:
raise
except Exception as e:
logger.error(f"Error previewing file: {str(e)}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Error previewing file: {str(e)}"
)
# Data Neutralization endpoints
@router.get("/neutralization/config", response_model=DataNeutraliserConfig)
@limiter.limit("30/minute")
async def get_neutralization_config(
request: Request,
currentUser: User = Depends(getCurrentUser)
) -> DataNeutraliserConfig:
"""Get data neutralization configuration"""
try:
service = NeutralizationService(currentUser)
config = service.get_config()
if not config:
# Return default config instead of 404
return DataNeutraliserConfig(
mandateId=currentUser.mandateId,
userId=currentUser.id,
enabled=True,
namesToParse="",
sharepointSourcePath="",
sharepointTargetPath=""
)
return config
except HTTPException:
raise
except Exception as e:
logger.error(f"Error getting neutralization config: {str(e)}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Error getting neutralization config: {str(e)}"
)
@router.post("/neutralization/config", response_model=DataNeutraliserConfig)
@limiter.limit("10/minute")
async def save_neutralization_config(
request: Request,
config_data: Dict[str, Any] = Body(...),
currentUser: User = Depends(getCurrentUser)
) -> DataNeutraliserConfig:
"""Save or update data neutralization configuration"""
try:
service = NeutralizationService(currentUser)
config = service.save_config(config_data)
return config
except Exception as e:
logger.error(f"Error saving neutralization config: {str(e)}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Error saving neutralization config: {str(e)}"
)
@router.post("/neutralization/neutralize-text", response_model=Dict[str, Any])
@limiter.limit("20/minute")
async def neutralize_text(
request: Request,
text_data: Dict[str, Any] = Body(...),
currentUser: User = Depends(getCurrentUser)
) -> Dict[str, Any]:
"""Neutralize text content"""
try:
text = text_data.get("text", "")
file_id = text_data.get("fileId")
if not text:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Text content is required"
)
service = NeutralizationService(currentUser)
result = service.neutralize_text(text, file_id)
return result
except HTTPException:
raise
except Exception as e:
logger.error(f"Error neutralizing text: {str(e)}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Error neutralizing text: {str(e)}"
)
@router.post("/neutralization/resolve-text", response_model=Dict[str, str])
@limiter.limit("20/minute")
async def resolve_text(
request: Request,
text_data: Dict[str, str] = Body(...),
currentUser: User = Depends(getCurrentUser)
) -> Dict[str, str]:
"""Resolve UIDs in neutralized text back to original text"""
try:
text = text_data.get("text", "")
if not text:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Text content is required"
)
service = NeutralizationService(currentUser)
resolved_text = service.resolve_text(text)
return {"resolved_text": resolved_text}
except HTTPException:
raise
except Exception as e:
logger.error(f"Error resolving text: {str(e)}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Error resolving text: {str(e)}"
)
@router.get("/neutralization/attributes", response_model=List[DataNeutralizerAttributes])
@limiter.limit("30/minute")
async def get_neutralization_attributes(
request: Request,
fileId: Optional[str] = Query(None, description="Filter by file ID"),
currentUser: User = Depends(getCurrentUser)
) -> List[DataNeutralizerAttributes]:
"""Get neutralization attributes, optionally filtered by file ID"""
try:
service = NeutralizationService(currentUser)
attributes = service.get_attributes(fileId)
return attributes
except Exception as e:
logger.error(f"Error getting neutralization attributes: {str(e)}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Error getting neutralization attributes: {str(e)}"
)
@router.post("/neutralization/process-sharepoint", response_model=Dict[str, Any])
@limiter.limit("5/minute")
async def process_sharepoint_files(
request: Request,
paths_data: Dict[str, str] = Body(...),
currentUser: User = Depends(getCurrentUser)
) -> Dict[str, Any]:
"""Process files from SharePoint source path and store neutralized files in target path"""
try:
source_path = paths_data.get("sourcePath", "")
target_path = paths_data.get("targetPath", "")
if not source_path or not target_path:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Both source and target paths are required"
)
service = NeutralizationService(currentUser)
result = await service.process_sharepoint_files(source_path, target_path)
return result
except HTTPException:
raise
except Exception as e:
logger.error(f"Error processing SharePoint files: {str(e)}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Error processing SharePoint files: {str(e)}"
)
@router.post("/neutralization/batch-process", response_model=Dict[str, Any])
@limiter.limit("10/minute")
async def batch_process_files(
request: Request,
files_data: List[Dict[str, Any]] = Body(...),
currentUser: User = Depends(getCurrentUser)
) -> Dict[str, Any]:
"""Process multiple files for neutralization"""
try:
if not files_data:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Files data is required"
)
service = NeutralizationService(currentUser)
result = service.batch_neutralize_files(files_data)
return result
except HTTPException:
raise
except Exception as e:
logger.error(f"Error batch processing files: {str(e)}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Error batch processing files: {str(e)}"
)
@router.get("/neutralization/stats", response_model=Dict[str, Any])
@limiter.limit("30/minute")
async def get_neutralization_stats(
request: Request,
currentUser: User = Depends(getCurrentUser)
) -> Dict[str, Any]:
"""Get neutralization processing statistics"""
try:
service = NeutralizationService(currentUser)
stats = service.get_processing_stats()
return stats
except Exception as e:
logger.error(f"Error getting neutralization stats: {str(e)}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Error getting neutralization stats: {str(e)}"
)
@router.delete("/neutralization/attributes/{fileId}", response_model=Dict[str, str])
@limiter.limit("10/minute")
async def cleanup_file_attributes(
request: Request,
fileId: str = Path(..., description="File ID to cleanup attributes for"),
currentUser: User = Depends(getCurrentUser)
) -> Dict[str, str]:
"""Clean up neutralization attributes for a specific file"""
try:
service = NeutralizationService(currentUser)
success = service.cleanup_file_attributes(fileId)
if success:
return {"message": f"Successfully cleaned up attributes for file {fileId}"}
else:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to cleanup file attributes"
)
except HTTPException:
raise
except Exception as e:
logger.error(f"Error cleaning up file attributes: {str(e)}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Error cleaning up file attributes: {str(e)}"
)