617 lines
22 KiB
Python
617 lines
22 KiB
Python
from fastapi import APIRouter, HTTPException, Depends, File, UploadFile, Form, Path, Request, status, Query, Response, Body
|
|
from fastapi.responses import JSONResponse, FileResponse
|
|
from typing import List, Dict, Any, Optional, Union
|
|
import logging
|
|
from datetime import datetime, timezone
|
|
from dataclasses import dataclass
|
|
import io
|
|
import inspect
|
|
import importlib
|
|
import os
|
|
from pydantic import BaseModel
|
|
|
|
# Import auth module
|
|
from modules.security.auth import limiter, getCurrentUser
|
|
|
|
# Import interfaces
|
|
import modules.interfaces.interfaceComponentObjects as interfaceComponentObjects
|
|
from modules.interfaces.interfaceComponentModel import FileItem, FilePreview
|
|
from modules.shared.attributeUtils import getModelAttributeDefinitions, AttributeResponse, AttributeDefinition
|
|
from modules.interfaces.interfaceAppModel import User, DataNeutraliserConfig, DataNeutralizerAttributes
|
|
from modules.services.serviceNeutralization import NeutralizationService
|
|
|
|
# Configure logger
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Model attributes for FileItem
|
|
fileAttributes = getModelAttributeDefinitions(FileItem)
|
|
|
|
# Create router for file endpoints
|
|
router = APIRouter(
|
|
prefix="/api/files",
|
|
tags=["Manage Files"],
|
|
responses={
|
|
404: {"description": "Not found"},
|
|
400: {"description": "Bad request"},
|
|
401: {"description": "Unauthorized"},
|
|
403: {"description": "Forbidden"},
|
|
500: {"description": "Internal server error"}
|
|
}
|
|
)
|
|
|
|
@router.get("/list", response_model=List[FileItem])
|
|
@limiter.limit("30/minute")
|
|
async def get_files(
|
|
request: Request,
|
|
currentUser: User = Depends(getCurrentUser)
|
|
) -> List[FileItem]:
|
|
"""Get all files"""
|
|
try:
|
|
managementInterface = interfaceComponentObjects.getInterface(currentUser)
|
|
|
|
# Get all files generically - only metadata, no binary data
|
|
files = managementInterface.getAllFiles()
|
|
|
|
# Return files directly since they are already FileItem objects
|
|
return files
|
|
except Exception as e:
|
|
logger.error(f"Error getting files: {str(e)}")
|
|
raise HTTPException(
|
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
detail=f"Failed to get files: {str(e)}"
|
|
)
|
|
|
|
@router.post("/upload", status_code=status.HTTP_201_CREATED)
|
|
@limiter.limit("10/minute")
|
|
async def upload_file(
|
|
request: Request,
|
|
file: UploadFile = File(...),
|
|
workflowId: Optional[str] = Form(None),
|
|
currentUser: User = Depends(getCurrentUser)
|
|
) -> JSONResponse:
|
|
# Add fileName property to UploadFile for consistency with backend model
|
|
file.fileName = file.filename
|
|
"""Upload a file"""
|
|
try:
|
|
managementInterface = interfaceComponentObjects.getInterface(currentUser)
|
|
|
|
# Read file
|
|
fileContent = await file.read()
|
|
|
|
# Check size limits
|
|
maxSize = int(interfaceComponentObjects.APP_CONFIG.get("File_Management_MAX_UPLOAD_SIZE_MB")) * 1024 * 1024 # in bytes
|
|
if len(fileContent) > maxSize:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_413_REQUEST_ENTITY_TOO_LARGE,
|
|
detail=f"File too large. Maximum size: {interfaceComponentObjects.APP_CONFIG.get('File_Management_MAX_UPLOAD_SIZE_MB')}MB"
|
|
)
|
|
|
|
# Save file via LucyDOM interface in the database
|
|
fileItem, duplicateType = managementInterface.saveUploadedFile(fileContent, file.filename)
|
|
|
|
# Determine response message based on duplicate type
|
|
if duplicateType == "exact_duplicate":
|
|
message = f"File '{file.filename}' already exists with identical content. Reusing existing file."
|
|
elif duplicateType == "name_conflict":
|
|
message = f"File '{file.filename}' already exists with different content. Uploaded as '{fileItem.fileName}'."
|
|
else: # new_file
|
|
message = "File uploaded successfully"
|
|
|
|
# If workflowId is provided, update the file information
|
|
if workflowId:
|
|
updateData = {"workflowId": workflowId}
|
|
managementInterface.updateFile(fileItem.id, updateData)
|
|
fileItem.workflowId = workflowId
|
|
|
|
# Convert FileItem to dictionary for JSON response
|
|
fileMeta = fileItem.to_dict()
|
|
|
|
# Response with duplicate information
|
|
return JSONResponse({
|
|
"message": message,
|
|
"file": fileMeta,
|
|
"duplicateType": duplicateType,
|
|
"originalFileName": file.filename,
|
|
"storedFileName": fileItem.fileName,
|
|
"isDuplicate": duplicateType != "new_file"
|
|
})
|
|
|
|
except interfaceComponentObjects.FileStorageError as e:
|
|
logger.error(f"Error during file upload (storage): {str(e)}")
|
|
raise HTTPException(
|
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
detail=str(e)
|
|
)
|
|
except Exception as e:
|
|
logger.error(f"Error during file upload: {str(e)}")
|
|
raise HTTPException(
|
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
detail=f"Error during file upload: {str(e)}"
|
|
)
|
|
|
|
@router.get("/{fileId}", response_model=FileItem)
|
|
@limiter.limit("30/minute")
|
|
async def get_file(
|
|
request: Request,
|
|
fileId: str = Path(..., description="ID of the file"),
|
|
currentUser: User = Depends(getCurrentUser)
|
|
) -> FileItem:
|
|
"""Get a file"""
|
|
try:
|
|
managementInterface = interfaceComponentObjects.getInterface(currentUser)
|
|
|
|
# Get file via LucyDOM interface from the database
|
|
fileData = managementInterface.getFile(fileId)
|
|
if not fileData:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
|
detail=f"File with ID {fileId} not found"
|
|
)
|
|
|
|
return fileData
|
|
|
|
except interfaceComponentObjects.FileNotFoundError as e:
|
|
logger.warning(f"File not found: {str(e)}")
|
|
raise HTTPException(
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
|
detail=str(e)
|
|
)
|
|
except interfaceComponentObjects.FilePermissionError as e:
|
|
logger.warning(f"No permission for file: {str(e)}")
|
|
raise HTTPException(
|
|
status_code=status.HTTP_403_FORBIDDEN,
|
|
detail=str(e)
|
|
)
|
|
except interfaceComponentObjects.FileError as e:
|
|
logger.error(f"Error retrieving file: {str(e)}")
|
|
raise HTTPException(
|
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
detail=str(e)
|
|
)
|
|
except Exception as e:
|
|
logger.error(f"Unexpected error retrieving file: {str(e)}")
|
|
raise HTTPException(
|
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
detail=f"Error retrieving file: {str(e)}"
|
|
)
|
|
|
|
@router.put("/{fileId}", response_model=FileItem)
|
|
@limiter.limit("10/minute")
|
|
async def update_file(
|
|
request: Request,
|
|
fileId: str = Path(..., description="ID of the file to update"),
|
|
file_info: Dict[str, Any] = Body(...),
|
|
currentUser: User = Depends(getCurrentUser)
|
|
) -> FileItem:
|
|
"""Update file info"""
|
|
try:
|
|
managementInterface = interfaceComponentObjects.getInterface(currentUser)
|
|
|
|
# Get the file from the database
|
|
file = managementInterface.getFile(fileId)
|
|
if not file:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
|
detail=f"File with ID {fileId} not found"
|
|
)
|
|
|
|
# Check if user has access to the file using the interface's permission system
|
|
if not managementInterface._canModify("files", fileId):
|
|
raise HTTPException(
|
|
status_code=status.HTTP_403_FORBIDDEN,
|
|
detail="Not authorized to update this file"
|
|
)
|
|
|
|
# Update the file
|
|
result = managementInterface.updateFile(fileId, file_info)
|
|
if not result:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
detail="Failed to update file"
|
|
)
|
|
|
|
# Get updated file
|
|
updatedFile = managementInterface.getFile(fileId)
|
|
return updatedFile
|
|
|
|
except HTTPException as he:
|
|
raise he
|
|
except Exception as e:
|
|
logger.error(f"Error updating file: {str(e)}")
|
|
raise HTTPException(
|
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
detail=str(e)
|
|
)
|
|
|
|
@router.delete("/{fileId}", response_model=Dict[str, Any])
|
|
@limiter.limit("10/minute")
|
|
async def delete_file(
|
|
request: Request,
|
|
fileId: str = Path(..., description="ID of the file to delete"),
|
|
currentUser: User = Depends(getCurrentUser)
|
|
) -> Dict[str, Any]:
|
|
"""Delete a file"""
|
|
managementInterface = interfaceComponentObjects.getInterface(currentUser)
|
|
|
|
# Check if the file exists
|
|
existingFile = managementInterface.getFile(fileId)
|
|
if not existingFile:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
|
detail=f"File with ID {fileId} not found"
|
|
)
|
|
|
|
success = managementInterface.deleteFile(fileId)
|
|
if not success:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
detail="Error deleting the file"
|
|
)
|
|
|
|
return {"message": f"File with ID {fileId} successfully deleted"}
|
|
|
|
@router.get("/stats", response_model=Dict[str, Any])
|
|
@limiter.limit("30/minute")
|
|
async def get_file_stats(
|
|
request: Request,
|
|
currentUser: User = Depends(getCurrentUser)
|
|
) -> Dict[str, Any]:
|
|
"""Returns statistics about the stored files"""
|
|
try:
|
|
managementInterface = interfaceComponentObjects.getInterface(currentUser)
|
|
|
|
# Get all files - metadata only
|
|
allFiles = managementInterface.getAllFiles()
|
|
|
|
# Calculate statistics
|
|
totalFiles = len(allFiles)
|
|
totalSize = sum(file.fileSize for file in allFiles)
|
|
|
|
# Group by file type
|
|
fileTypes = {}
|
|
for file in allFiles:
|
|
fileType = file.mimeType.split("/")[0]
|
|
if fileType not in fileTypes:
|
|
fileTypes[fileType] = 0
|
|
fileTypes[fileType] += 1
|
|
|
|
return {
|
|
"totalFiles": totalFiles,
|
|
"totalSizeBytes": totalSize,
|
|
"fileTypes": fileTypes
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error retrieving file statistics: {str(e)}")
|
|
raise HTTPException(
|
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
detail=f"Error retrieving file statistics: {str(e)}"
|
|
)
|
|
|
|
@router.get("/{fileId}/download")
|
|
@limiter.limit("30/minute")
|
|
async def download_file(
|
|
request: Request,
|
|
fileId: str = Path(..., description="ID of the file to download"),
|
|
currentUser: User = Depends(getCurrentUser)
|
|
) -> Response:
|
|
"""Download a file"""
|
|
try:
|
|
managementInterface = interfaceComponentObjects.getInterface(currentUser)
|
|
|
|
# Get file data
|
|
fileData = managementInterface.getFile(fileId)
|
|
if not fileData:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
|
detail=f"File with ID {fileId} not found"
|
|
)
|
|
|
|
# Get file content
|
|
fileContent = managementInterface.getFileData(fileId)
|
|
if not fileContent:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
|
detail=f"File content not found for ID {fileId}"
|
|
)
|
|
|
|
# Return file as response
|
|
# Properly encode filename for Content-Disposition header to handle Unicode characters
|
|
import urllib.parse
|
|
encoded_filename = urllib.parse.quote(fileData.fileName)
|
|
|
|
return Response(
|
|
content=fileContent,
|
|
media_type=fileData.mimeType,
|
|
headers={
|
|
"Content-Disposition": f"attachment; filename*=UTF-8''{encoded_filename}"
|
|
}
|
|
)
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"Error downloading file: {str(e)}")
|
|
raise HTTPException(
|
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
detail=f"Error downloading file: {str(e)}"
|
|
)
|
|
|
|
@router.get("/{fileId}/preview", response_model=FilePreview)
|
|
@limiter.limit("30/minute")
|
|
async def preview_file(
|
|
request: Request,
|
|
fileId: str = Path(..., description="ID of the file to preview"),
|
|
currentUser: User = Depends(getCurrentUser)
|
|
) -> FilePreview:
|
|
"""Preview a file's content"""
|
|
try:
|
|
managementInterface = interfaceComponentObjects.getInterface(currentUser)
|
|
|
|
# Get file preview using the correct method
|
|
preview = managementInterface.getFileContent(fileId)
|
|
if not preview:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
|
detail=f"File with ID {fileId} not found or no content available"
|
|
)
|
|
|
|
return preview
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"Error previewing file: {str(e)}")
|
|
raise HTTPException(
|
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
detail=f"Error previewing file: {str(e)}"
|
|
)
|
|
|
|
# Data Neutralization endpoints
|
|
|
|
@router.get("/neutralization/config", response_model=DataNeutraliserConfig)
|
|
@limiter.limit("30/minute")
|
|
async def get_neutralization_config(
|
|
request: Request,
|
|
currentUser: User = Depends(getCurrentUser)
|
|
) -> DataNeutraliserConfig:
|
|
"""Get data neutralization configuration"""
|
|
try:
|
|
service = NeutralizationService(currentUser)
|
|
config = service.get_config()
|
|
|
|
if not config:
|
|
# Return default config instead of 404
|
|
return DataNeutraliserConfig(
|
|
mandateId=currentUser.mandateId,
|
|
userId=currentUser.id,
|
|
enabled=True,
|
|
namesToParse="",
|
|
sharepointSourcePath="",
|
|
sharepointTargetPath=""
|
|
)
|
|
|
|
return config
|
|
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"Error getting neutralization config: {str(e)}")
|
|
raise HTTPException(
|
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
detail=f"Error getting neutralization config: {str(e)}"
|
|
)
|
|
|
|
@router.post("/neutralization/config", response_model=DataNeutraliserConfig)
|
|
@limiter.limit("10/minute")
|
|
async def save_neutralization_config(
|
|
request: Request,
|
|
config_data: Dict[str, Any] = Body(...),
|
|
currentUser: User = Depends(getCurrentUser)
|
|
) -> DataNeutraliserConfig:
|
|
"""Save or update data neutralization configuration"""
|
|
try:
|
|
service = NeutralizationService(currentUser)
|
|
config = service.save_config(config_data)
|
|
|
|
return config
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error saving neutralization config: {str(e)}")
|
|
raise HTTPException(
|
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
detail=f"Error saving neutralization config: {str(e)}"
|
|
)
|
|
|
|
@router.post("/neutralization/neutralize-text", response_model=Dict[str, Any])
|
|
@limiter.limit("20/minute")
|
|
async def neutralize_text(
|
|
request: Request,
|
|
text_data: Dict[str, Any] = Body(...),
|
|
currentUser: User = Depends(getCurrentUser)
|
|
) -> Dict[str, Any]:
|
|
"""Neutralize text content"""
|
|
try:
|
|
text = text_data.get("text", "")
|
|
file_id = text_data.get("fileId")
|
|
|
|
if not text:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_400_BAD_REQUEST,
|
|
detail="Text content is required"
|
|
)
|
|
|
|
service = NeutralizationService(currentUser)
|
|
result = service.neutralize_text(text, file_id)
|
|
|
|
return result
|
|
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"Error neutralizing text: {str(e)}")
|
|
raise HTTPException(
|
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
detail=f"Error neutralizing text: {str(e)}"
|
|
)
|
|
|
|
@router.post("/neutralization/resolve-text", response_model=Dict[str, str])
|
|
@limiter.limit("20/minute")
|
|
async def resolve_text(
|
|
request: Request,
|
|
text_data: Dict[str, str] = Body(...),
|
|
currentUser: User = Depends(getCurrentUser)
|
|
) -> Dict[str, str]:
|
|
"""Resolve UIDs in neutralized text back to original text"""
|
|
try:
|
|
text = text_data.get("text", "")
|
|
|
|
if not text:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_400_BAD_REQUEST,
|
|
detail="Text content is required"
|
|
)
|
|
|
|
service = NeutralizationService(currentUser)
|
|
resolved_text = service.resolve_text(text)
|
|
|
|
return {"resolved_text": resolved_text}
|
|
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"Error resolving text: {str(e)}")
|
|
raise HTTPException(
|
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
detail=f"Error resolving text: {str(e)}"
|
|
)
|
|
|
|
@router.get("/neutralization/attributes", response_model=List[DataNeutralizerAttributes])
|
|
@limiter.limit("30/minute")
|
|
async def get_neutralization_attributes(
|
|
request: Request,
|
|
fileId: Optional[str] = Query(None, description="Filter by file ID"),
|
|
currentUser: User = Depends(getCurrentUser)
|
|
) -> List[DataNeutralizerAttributes]:
|
|
"""Get neutralization attributes, optionally filtered by file ID"""
|
|
try:
|
|
service = NeutralizationService(currentUser)
|
|
attributes = service.get_attributes(fileId)
|
|
|
|
return attributes
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error getting neutralization attributes: {str(e)}")
|
|
raise HTTPException(
|
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
detail=f"Error getting neutralization attributes: {str(e)}"
|
|
)
|
|
|
|
@router.post("/neutralization/process-sharepoint", response_model=Dict[str, Any])
|
|
@limiter.limit("5/minute")
|
|
async def process_sharepoint_files(
|
|
request: Request,
|
|
paths_data: Dict[str, str] = Body(...),
|
|
currentUser: User = Depends(getCurrentUser)
|
|
) -> Dict[str, Any]:
|
|
"""Process files from SharePoint source path and store neutralized files in target path"""
|
|
try:
|
|
source_path = paths_data.get("sourcePath", "")
|
|
target_path = paths_data.get("targetPath", "")
|
|
|
|
if not source_path or not target_path:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_400_BAD_REQUEST,
|
|
detail="Both source and target paths are required"
|
|
)
|
|
|
|
service = NeutralizationService(currentUser)
|
|
result = await service.process_sharepoint_files(source_path, target_path)
|
|
|
|
return result
|
|
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"Error processing SharePoint files: {str(e)}")
|
|
raise HTTPException(
|
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
detail=f"Error processing SharePoint files: {str(e)}"
|
|
)
|
|
|
|
@router.post("/neutralization/batch-process", response_model=Dict[str, Any])
|
|
@limiter.limit("10/minute")
|
|
async def batch_process_files(
|
|
request: Request,
|
|
files_data: List[Dict[str, Any]] = Body(...),
|
|
currentUser: User = Depends(getCurrentUser)
|
|
) -> Dict[str, Any]:
|
|
"""Process multiple files for neutralization"""
|
|
try:
|
|
if not files_data:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_400_BAD_REQUEST,
|
|
detail="Files data is required"
|
|
)
|
|
|
|
service = NeutralizationService(currentUser)
|
|
result = service.batch_neutralize_files(files_data)
|
|
|
|
return result
|
|
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"Error batch processing files: {str(e)}")
|
|
raise HTTPException(
|
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
detail=f"Error batch processing files: {str(e)}"
|
|
)
|
|
|
|
@router.get("/neutralization/stats", response_model=Dict[str, Any])
|
|
@limiter.limit("30/minute")
|
|
async def get_neutralization_stats(
|
|
request: Request,
|
|
currentUser: User = Depends(getCurrentUser)
|
|
) -> Dict[str, Any]:
|
|
"""Get neutralization processing statistics"""
|
|
try:
|
|
service = NeutralizationService(currentUser)
|
|
stats = service.get_processing_stats()
|
|
|
|
return stats
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error getting neutralization stats: {str(e)}")
|
|
raise HTTPException(
|
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
detail=f"Error getting neutralization stats: {str(e)}"
|
|
)
|
|
|
|
@router.delete("/neutralization/attributes/{fileId}", response_model=Dict[str, str])
|
|
@limiter.limit("10/minute")
|
|
async def cleanup_file_attributes(
|
|
request: Request,
|
|
fileId: str = Path(..., description="File ID to cleanup attributes for"),
|
|
currentUser: User = Depends(getCurrentUser)
|
|
) -> Dict[str, str]:
|
|
"""Clean up neutralization attributes for a specific file"""
|
|
try:
|
|
service = NeutralizationService(currentUser)
|
|
success = service.cleanup_file_attributes(fileId)
|
|
|
|
if success:
|
|
return {"message": f"Successfully cleaned up attributes for file {fileId}"}
|
|
else:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
detail="Failed to cleanup file attributes"
|
|
)
|
|
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"Error cleaning up file attributes: {str(e)}")
|
|
raise HTTPException(
|
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
detail=f"Error cleaning up file attributes: {str(e)}"
|
|
)
|
|
|