Re-indexing the same file always triggered a full embedding run — ingestion.skipped.duplicate never fired. Two independent causes: 1. _computeIngestionHash included contentObjectId in its payload, but extractors generate fresh uuid4() per run, making the hash a per-run nonce. Now hashed over (contentType, data) in extractor order — stable across re-extractions, sensitive to content, ordering, and type changes. 2. _autoIndexFile upserted the fresh pre-scan FileContentIndex before requestIngestion's duplicate check, wiping structure._ingestion and status=indexed from the prior run. The pre-upsert now merges the existing _ingestion metadata and preserves the indexed status. Verified end-to-end: second PATCH /scope on an already-indexed file logs and returns in ~2s with zero embedding API calls. Adds test_ingestion_hash_stability.py (5 cases).
1292 lines
54 KiB
Python
1292 lines
54 KiB
Python
# Copyright (c) 2025 Patrick Motsch
|
||
# All rights reserved.
|
||
from fastapi import APIRouter, HTTPException, Depends, File, UploadFile, Form, Path, Request, status, Query, Response, Body, BackgroundTasks
|
||
from fastapi.responses import JSONResponse
|
||
from typing import List, Dict, Any, Optional
|
||
import logging
|
||
import json
|
||
|
||
# Import auth module
|
||
from modules.auth import limiter, getCurrentUser, getRequestContext, RequestContext
|
||
|
||
# Import interfaces
|
||
import modules.interfaces.interfaceDbManagement as interfaceDbManagement
|
||
from modules.datamodels.datamodelFiles import FileItem, FilePreview
|
||
from modules.datamodels.datamodelFileFolder import FileFolder
|
||
from modules.shared.attributeUtils import getModelAttributeDefinitions
|
||
from modules.datamodels.datamodelUam import User
|
||
from modules.datamodels.datamodelPagination import PaginationParams, PaginatedResponse, PaginationMetadata, normalize_pagination_dict
|
||
from modules.shared.i18nRegistry import apiRouteContext
|
||
from modules.routes.routeHelpers import enrichRowsWithFkLabels
|
||
routeApiMsg = apiRouteContext("routeDataFiles")
|
||
|
||
# Configure logger
|
||
logger = logging.getLogger(__name__)
|
||
|
||
|
||
def _resolveFileWithScope(currentUser: User, context: RequestContext, fileId: str):
|
||
"""Returns (managementInterface, fileItem) with RBAC scoped to the file's own mandate/instance.
|
||
|
||
Files generated by workflows (e.g. AI report outputs) carry their own
|
||
mandateId/featureInstanceId. Direct download links via <a href> cannot send
|
||
custom scope headers, so we resolve the scope from the FileItem itself and
|
||
re-check RBAC in that scope.
|
||
|
||
Returns (None, None) if the file does not exist or the user lacks access
|
||
in the file's actual scope.
|
||
"""
|
||
requestMandateId = str(context.mandateId) if context.mandateId else None
|
||
requestInstanceId = str(context.featureInstanceId) if context.featureInstanceId else None
|
||
|
||
mgmt = interfaceDbManagement.getInterface(
|
||
currentUser,
|
||
mandateId=requestMandateId,
|
||
featureInstanceId=requestInstanceId,
|
||
)
|
||
fileItem = mgmt.getFile(fileId)
|
||
if fileItem:
|
||
return mgmt, fileItem
|
||
|
||
metas = mgmt.db.getRecordset(FileItem, recordFilter={"id": fileId})
|
||
if not metas:
|
||
return None, None
|
||
|
||
meta = metas[0]
|
||
fileMandateId = meta.get("mandateId") or None
|
||
fileInstanceId = meta.get("featureInstanceId") or None
|
||
|
||
if not fileMandateId and not fileInstanceId:
|
||
return None, None
|
||
|
||
if fileMandateId == requestMandateId and fileInstanceId == requestInstanceId:
|
||
return None, None
|
||
|
||
scopedMgmt = interfaceDbManagement.getInterface(
|
||
currentUser,
|
||
mandateId=fileMandateId,
|
||
featureInstanceId=fileInstanceId,
|
||
)
|
||
fileItem = scopedMgmt.getFile(fileId)
|
||
if not fileItem:
|
||
return None, None
|
||
|
||
return scopedMgmt, fileItem
|
||
|
||
|
||
async def _autoIndexFile(fileId: str, fileName: str, mimeType: str, user):
|
||
"""Background task: pre-scan + extraction + knowledge indexing.
|
||
Step 1: Structure Pre-Scan (AI-free) -> FileContentIndex (persisted)
|
||
Step 2: Content extraction via runExtraction -> ContentParts
|
||
Step 3: KnowledgeService.requestIngestion -> idempotent chunking + embedding -> Knowledge Store"""
|
||
userId = user.id if hasattr(user, "id") else str(user)
|
||
try:
|
||
mgmtInterface = interfaceDbManagement.getInterface(user)
|
||
mgmtInterface.updateFile(fileId, {"status": "processing"})
|
||
|
||
rawBytes = mgmtInterface.getFileData(fileId)
|
||
if not rawBytes:
|
||
logger.warning(f"Auto-index: no file data for {fileId}, skipping")
|
||
mgmtInterface.updateFile(fileId, {"status": "active"})
|
||
return
|
||
|
||
file_meta = mgmtInterface.getFile(fileId)
|
||
feature_instance_id = ""
|
||
mandate_id = ""
|
||
file_scope = "personal"
|
||
if file_meta:
|
||
if isinstance(file_meta, dict):
|
||
feature_instance_id = file_meta.get("featureInstanceId") or ""
|
||
mandate_id = file_meta.get("mandateId") or ""
|
||
file_scope = file_meta.get("scope") or "personal"
|
||
else:
|
||
feature_instance_id = getattr(file_meta, "featureInstanceId", None) or ""
|
||
mandate_id = getattr(file_meta, "mandateId", None) or ""
|
||
file_scope = getattr(file_meta, "scope", None) or "personal"
|
||
|
||
logger.info(f"Auto-index starting for {fileName} ({len(rawBytes)} bytes, {mimeType})")
|
||
|
||
# Step 1: Structure Pre-Scan (AI-free)
|
||
from modules.serviceCenter.services.serviceKnowledge.subPreScan import preScanDocument
|
||
contentIndex = await preScanDocument(
|
||
fileData=rawBytes,
|
||
mimeType=mimeType,
|
||
fileId=fileId,
|
||
fileName=fileName,
|
||
userId=userId,
|
||
featureInstanceId=str(feature_instance_id) if feature_instance_id else "",
|
||
mandateId=str(mandate_id) if mandate_id else "",
|
||
scope=file_scope,
|
||
)
|
||
logger.info(
|
||
f"Pre-scan complete for {fileName}: "
|
||
f"{contentIndex.totalObjects} objects"
|
||
)
|
||
|
||
# Persist FileContentIndex immediately.
|
||
# IMPORTANT: preserve `_ingestion` metadata and `status="indexed"` from any
|
||
# prior successful run — otherwise this upsert wipes the idempotency cache
|
||
# and requestIngestion cannot detect duplicates (AC4 breaks).
|
||
from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface
|
||
knowledgeDb = getKnowledgeInterface()
|
||
try:
|
||
_existing = knowledgeDb.getFileContentIndex(fileId)
|
||
except Exception:
|
||
_existing = None
|
||
if _existing:
|
||
_existingStruct = (
|
||
_existing.get("structure") if isinstance(_existing, dict)
|
||
else getattr(_existing, "structure", {})
|
||
) or {}
|
||
_existingStatus = (
|
||
_existing.get("status") if isinstance(_existing, dict)
|
||
else getattr(_existing, "status", "")
|
||
) or ""
|
||
if "_ingestion" in _existingStruct:
|
||
contentIndex.structure = dict(contentIndex.structure or {})
|
||
contentIndex.structure["_ingestion"] = _existingStruct["_ingestion"]
|
||
if _existingStatus == "indexed":
|
||
contentIndex.status = "indexed"
|
||
knowledgeDb.upsertFileContentIndex(contentIndex)
|
||
|
||
# Step 2: Content extraction (AI-free, produces ContentParts)
|
||
from modules.serviceCenter.services.serviceExtraction.subRegistry import ExtractorRegistry, ChunkerRegistry
|
||
from modules.serviceCenter.services.serviceExtraction.subPipeline import runExtraction
|
||
from modules.datamodels.datamodelExtraction import ExtractionOptions
|
||
|
||
extractorRegistry = ExtractorRegistry()
|
||
chunkerRegistry = ChunkerRegistry()
|
||
# mergeStrategy=None: keep per-page / per-section granularity for RAG ingestion.
|
||
# The default MergeStrategy concatenates all text parts into a single blob, which
|
||
# collapses a 500-page PDF into one ContentChunk and destroys semantic retrieval.
|
||
options = ExtractionOptions(mergeStrategy=None)
|
||
|
||
extracted = runExtraction(
|
||
extractorRegistry, chunkerRegistry,
|
||
rawBytes, fileName, mimeType, options,
|
||
)
|
||
|
||
contentObjects = []
|
||
for part in extracted.parts:
|
||
contentType = "text"
|
||
if part.typeGroup == "image":
|
||
contentType = "image"
|
||
elif part.typeGroup in ("binary", "container"):
|
||
contentType = "other"
|
||
|
||
if not part.data or not part.data.strip():
|
||
continue
|
||
|
||
contentObjects.append({
|
||
"contentObjectId": part.id,
|
||
"contentType": contentType,
|
||
"data": part.data,
|
||
"contextRef": {
|
||
"containerPath": fileName,
|
||
"location": part.label or "file",
|
||
**(part.metadata or {}),
|
||
},
|
||
})
|
||
|
||
logger.info(f"Extracted {len(contentObjects)} content objects from {fileName}")
|
||
|
||
if not contentObjects:
|
||
knowledgeDb.updateFileStatus(fileId, "indexed")
|
||
mgmtInterface.updateFile(fileId, {"status": "active"})
|
||
return
|
||
|
||
# Step 3: Knowledge indexing (chunking + embedding)
|
||
from modules.serviceCenter import getService
|
||
from modules.serviceCenter.context import ServiceCenterContext
|
||
|
||
ctx = ServiceCenterContext(
|
||
user=user,
|
||
mandate_id=str(mandate_id) if mandate_id else "",
|
||
feature_instance_id=str(feature_instance_id) if feature_instance_id else "",
|
||
)
|
||
knowledgeService = getService("knowledge", ctx)
|
||
|
||
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
|
||
|
||
await knowledgeService.requestIngestion(
|
||
IngestionJob(
|
||
sourceKind="file",
|
||
sourceId=fileId,
|
||
fileName=fileName,
|
||
mimeType=mimeType,
|
||
userId=userId,
|
||
featureInstanceId=str(feature_instance_id) if feature_instance_id else "",
|
||
mandateId=str(mandate_id) if mandate_id else "",
|
||
contentObjects=contentObjects,
|
||
structure=contentIndex.structure,
|
||
provenance={"lane": "upload", "route": "routeDataFiles._autoIndexFile"},
|
||
)
|
||
)
|
||
|
||
# Re-acquire interface after await to avoid stale user context from the singleton
|
||
mgmtInterface = interfaceDbManagement.getInterface(user)
|
||
mgmtInterface.updateFile(fileId, {"status": "active"})
|
||
logger.info(f"Auto-index complete for file {fileId} ({fileName})")
|
||
|
||
except Exception as e:
|
||
logger.error(f"Auto-index failed for file {fileId}: {e}", exc_info=True)
|
||
try:
|
||
errMgmt = interfaceDbManagement.getInterface(user)
|
||
errMgmt.updateFile(fileId, {"status": "active"})
|
||
except Exception:
|
||
pass
|
||
|
||
# Model attributes for FileItem
|
||
fileAttributes = getModelAttributeDefinitions(FileItem)
|
||
|
||
# Create router for file endpoints
|
||
router = APIRouter(
|
||
prefix="/api/files",
|
||
tags=["Manage Files"],
|
||
responses={
|
||
404: {"description": "Not found"},
|
||
400: {"description": "Bad request"},
|
||
401: {"description": "Unauthorized"},
|
||
403: {"description": "Forbidden"},
|
||
500: {"description": "Internal server error"}
|
||
}
|
||
)
|
||
|
||
@router.get("/list")
|
||
@limiter.limit("120/minute")
|
||
def get_files(
|
||
request: Request,
|
||
pagination: Optional[str] = Query(None, description="JSON-encoded PaginationParams object"),
|
||
mode: Optional[str] = Query(None, description="'filterValues' for distinct column values, 'ids' for all filtered IDs"),
|
||
column: Optional[str] = Query(None, description="Column key (required when mode=filterValues)"),
|
||
currentUser: User = Depends(getCurrentUser),
|
||
context: RequestContext = Depends(getRequestContext)
|
||
):
|
||
"""
|
||
Get files with optional pagination, sorting, and filtering.
|
||
|
||
Query Parameters:
|
||
- pagination: JSON-encoded PaginationParams object, or None for no pagination
|
||
|
||
Examples:
|
||
- GET /api/files/list (no pagination - returns all items)
|
||
- GET /api/files/list?pagination={"page":1,"pageSize":10,"sort":[]}
|
||
- GET /api/files/list?pagination={"page":2,"pageSize":20,"sort":[{"field":"fileName","direction":"asc"}]}
|
||
"""
|
||
try:
|
||
# Parse pagination parameter
|
||
paginationParams = None
|
||
if pagination:
|
||
try:
|
||
paginationDict = json.loads(pagination)
|
||
if paginationDict:
|
||
# Normalize pagination dict (handles top-level "search" field)
|
||
paginationDict = normalize_pagination_dict(paginationDict)
|
||
paginationParams = PaginationParams(**paginationDict)
|
||
except (json.JSONDecodeError, ValueError) as e:
|
||
raise HTTPException(
|
||
status_code=400,
|
||
detail=f"Invalid pagination parameter: {str(e)}"
|
||
)
|
||
|
||
from modules.routes.routeHelpers import (
|
||
handleIdsMode,
|
||
handleFilterValuesInMemory,
|
||
)
|
||
|
||
managementInterface = interfaceDbManagement.getInterface(
|
||
currentUser,
|
||
mandateId=str(context.mandateId) if context.mandateId else None,
|
||
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None
|
||
)
|
||
|
||
if mode == "filterValues":
|
||
if not column:
|
||
raise HTTPException(status_code=400, detail="column parameter required for mode=filterValues")
|
||
allFiles = managementInterface.getAllFiles()
|
||
items = allFiles if isinstance(allFiles, list) else (allFiles.items if hasattr(allFiles, "items") else [])
|
||
itemDicts = [f.model_dump() if hasattr(f, "model_dump") else (dict(f) if not isinstance(f, dict) else f) for f in items]
|
||
enrichRowsWithFkLabels(itemDicts, FileItem)
|
||
return handleFilterValuesInMemory(itemDicts, column, pagination)
|
||
|
||
if mode == "ids":
|
||
recordFilter = {"sysCreatedBy": managementInterface.userId}
|
||
return handleIdsMode(managementInterface.db, FileItem, pagination, recordFilter)
|
||
|
||
recordFilter = None
|
||
if paginationParams and paginationParams.filters and "folderId" in paginationParams.filters:
|
||
fVal = paginationParams.filters.get("folderId")
|
||
# For a concrete folderId we use recordFilter (exact equality).
|
||
# For null / empty (= "root") we keep it in pagination.filters so the
|
||
# connector applies `IS NULL OR = ''` – files predating the folderId
|
||
# fix were stored with an empty string instead of NULL.
|
||
if fVal is None or (isinstance(fVal, str) and fVal.strip() == ""):
|
||
paginationParams.filters["folderId"] = None
|
||
else:
|
||
paginationParams.filters.pop("folderId")
|
||
recordFilter = {"folderId": fVal}
|
||
|
||
result = managementInterface.getAllFiles(pagination=paginationParams, recordFilter=recordFilter)
|
||
|
||
def _filesToDicts(items):
|
||
return [f.model_dump() if hasattr(f, "model_dump") else (dict(f) if not isinstance(f, dict) else f) for f in items]
|
||
|
||
if paginationParams:
|
||
enriched = enrichRowsWithFkLabels(_filesToDicts(result.items), FileItem)
|
||
return {
|
||
"items": enriched,
|
||
"pagination": PaginationMetadata(
|
||
currentPage=paginationParams.page,
|
||
pageSize=paginationParams.pageSize,
|
||
totalItems=result.totalItems,
|
||
totalPages=result.totalPages,
|
||
sort=paginationParams.sort,
|
||
filters=paginationParams.filters
|
||
).model_dump(),
|
||
}
|
||
else:
|
||
items = result if isinstance(result, list) else (result.items if hasattr(result, "items") else [result])
|
||
enriched = enrichRowsWithFkLabels(_filesToDicts(items), FileItem)
|
||
return {"items": enriched, "pagination": None}
|
||
except HTTPException:
|
||
raise
|
||
except Exception as e:
|
||
logger.error(f"Error getting files: {str(e)}")
|
||
raise HTTPException(
|
||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||
detail=f"Failed to get files: {str(e)}"
|
||
)
|
||
|
||
|
||
@router.post("/upload", status_code=status.HTTP_201_CREATED)
|
||
@limiter.limit("10/minute")
|
||
async def upload_file(
|
||
request: Request,
|
||
file: UploadFile = File(...),
|
||
workflowId: Optional[str] = Form(None),
|
||
featureInstanceId: Optional[str] = Form(None),
|
||
folderId: Optional[str] = Form(None),
|
||
currentUser: User = Depends(getCurrentUser),
|
||
context: RequestContext = Depends(getRequestContext),
|
||
) -> JSONResponse:
|
||
# Add fileName property to UploadFile for consistency with backend model
|
||
file.fileName = file.filename
|
||
"""Upload a file"""
|
||
try:
|
||
managementInterface = interfaceDbManagement.getInterface(
|
||
currentUser,
|
||
mandateId=str(context.mandateId) if context.mandateId else None,
|
||
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
|
||
)
|
||
|
||
# Read file
|
||
fileContent = await file.read()
|
||
|
||
# Check size limits
|
||
maxSize = int(interfaceDbManagement.APP_CONFIG.get("File_Management_MAX_UPLOAD_SIZE_MB")) * 1024 * 1024 # in bytes
|
||
if len(fileContent) > maxSize:
|
||
raise HTTPException(
|
||
status_code=status.HTTP_413_REQUEST_ENTITY_TOO_LARGE,
|
||
detail=f"File too large. Maximum size: {interfaceDbManagement.APP_CONFIG.get('File_Management_MAX_UPLOAD_SIZE_MB')}MB"
|
||
)
|
||
|
||
# Normalize folderId: empty string / "null" / "root" → None (root folder)
|
||
normalizedFolderId: Optional[str] = folderId
|
||
if isinstance(normalizedFolderId, str):
|
||
trimmed = normalizedFolderId.strip()
|
||
if not trimmed or trimmed.lower() in {"null", "none", "root"}:
|
||
normalizedFolderId = None
|
||
else:
|
||
normalizedFolderId = trimmed
|
||
|
||
# Save file via LucyDOM interface in the database
|
||
fileItem, duplicateType = managementInterface.saveUploadedFile(
|
||
fileContent, file.filename, folderId=normalizedFolderId
|
||
)
|
||
|
||
if featureInstanceId and not fileItem.featureInstanceId:
|
||
managementInterface.updateFile(fileItem.id, {"featureInstanceId": featureInstanceId})
|
||
fileItem.featureInstanceId = featureInstanceId
|
||
|
||
# For exact duplicates we keep the existing record, but move it into the
|
||
# target folder so the user actually sees their upload land where they expect.
|
||
if duplicateType == "exact_duplicate" and normalizedFolderId != getattr(fileItem, "folderId", None):
|
||
managementInterface.updateFile(fileItem.id, {"folderId": normalizedFolderId})
|
||
fileItem.folderId = normalizedFolderId
|
||
|
||
# Determine response message based on duplicate type
|
||
if duplicateType == "exact_duplicate":
|
||
message = f"File '{file.filename}' already exists with identical content. Reusing existing file."
|
||
elif duplicateType == "name_conflict":
|
||
message = f"File '{file.filename}' already exists with different content. Uploaded as '{fileItem.fileName}'."
|
||
else: # new_file
|
||
message = "File uploaded successfully"
|
||
|
||
# Convert FileItem to dictionary for JSON response
|
||
fileMeta = fileItem.model_dump()
|
||
|
||
# If workflowId is provided, include it in the response (not stored in FileItem model)
|
||
if workflowId:
|
||
fileMeta["workflowId"] = workflowId
|
||
|
||
# Trigger background auto-index pipeline (non-blocking)
|
||
# Also runs for duplicates in case the original was never successfully indexed
|
||
shouldIndex = duplicateType == "new_file"
|
||
if not shouldIndex:
|
||
try:
|
||
from modules.interfaces.interfaceDbKnowledge import getInterface as _getKnowledgeInterface
|
||
_kDb = _getKnowledgeInterface()
|
||
_existingIndex = _kDb.getFileContentIndex(fileItem.id)
|
||
if not _existingIndex:
|
||
shouldIndex = True
|
||
logger.info(f"Re-triggering auto-index for duplicate {fileItem.id} (not yet indexed)")
|
||
except Exception:
|
||
shouldIndex = True
|
||
|
||
if shouldIndex:
|
||
try:
|
||
import asyncio
|
||
asyncio.ensure_future(_autoIndexFile(
|
||
fileId=fileItem.id,
|
||
fileName=fileItem.fileName,
|
||
mimeType=fileItem.mimeType,
|
||
user=currentUser,
|
||
))
|
||
except Exception as indexErr:
|
||
logger.warning(f"Auto-index trigger failed (non-blocking): {indexErr}")
|
||
|
||
# Response with duplicate information
|
||
return JSONResponse({
|
||
"message": message,
|
||
"file": fileMeta,
|
||
"duplicateType": duplicateType,
|
||
"originalFileName": file.filename,
|
||
"storedFileName": fileItem.fileName,
|
||
"isDuplicate": duplicateType != "new_file"
|
||
})
|
||
|
||
except interfaceDbManagement.FileStorageError as e:
|
||
logger.error(f"Error during file upload (storage): {str(e)}")
|
||
raise HTTPException(
|
||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||
detail=str(e)
|
||
)
|
||
except Exception as e:
|
||
logger.error(f"Error during file upload: {str(e)}")
|
||
raise HTTPException(
|
||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||
detail=f"Error during file upload: {str(e)}"
|
||
)
|
||
|
||
# ── Folder endpoints (MUST be before /{fileId} catch-all) ─────────────────────
|
||
|
||
@router.get("/folders", response_model=List[Dict[str, Any]])
|
||
@limiter.limit("30/minute")
|
||
def list_folders(
|
||
request: Request,
|
||
parentId: Optional[str] = Query(None, description="Parent folder ID (omit for all folders)"),
|
||
currentUser: User = Depends(getCurrentUser),
|
||
context: RequestContext = Depends(getRequestContext)
|
||
) -> List[Dict[str, Any]]:
|
||
"""List folders for the current user."""
|
||
try:
|
||
mgmt = interfaceDbManagement.getInterface(
|
||
currentUser,
|
||
mandateId=str(context.mandateId) if context.mandateId else None,
|
||
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
|
||
)
|
||
if parentId is not None:
|
||
return mgmt.listFolders(parentId=parentId)
|
||
return mgmt.listFolders()
|
||
except Exception as e:
|
||
logger.error(f"Error listing folders: {e}")
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
@router.post("/folders", status_code=status.HTTP_201_CREATED)
|
||
@limiter.limit("10/minute")
|
||
def create_folder(
|
||
request: Request,
|
||
body: Dict[str, Any] = Body(...),
|
||
currentUser: User = Depends(getCurrentUser),
|
||
context: RequestContext = Depends(getRequestContext)
|
||
) -> Dict[str, Any]:
|
||
"""Create a new folder."""
|
||
name = body.get("name", "")
|
||
parentId = body.get("parentId")
|
||
if not name:
|
||
raise HTTPException(status_code=400, detail=routeApiMsg("name is required"))
|
||
try:
|
||
mgmt = interfaceDbManagement.getInterface(
|
||
currentUser,
|
||
mandateId=str(context.mandateId) if context.mandateId else None,
|
||
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
|
||
)
|
||
return mgmt.createFolder(name=name, parentId=parentId)
|
||
except ValueError as e:
|
||
raise HTTPException(status_code=400, detail=str(e))
|
||
except Exception as e:
|
||
logger.error(f"Error creating folder: {e}")
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
@router.put("/folders/{folderId}")
|
||
@limiter.limit("10/minute")
|
||
def rename_folder(
|
||
request: Request,
|
||
folderId: str = Path(...),
|
||
body: Dict[str, Any] = Body(...),
|
||
currentUser: User = Depends(getCurrentUser),
|
||
context: RequestContext = Depends(getRequestContext)
|
||
) -> Dict[str, Any]:
|
||
"""Rename a folder."""
|
||
newName = body.get("name", "")
|
||
if not newName:
|
||
raise HTTPException(status_code=400, detail=routeApiMsg("name is required"))
|
||
try:
|
||
mgmt = interfaceDbManagement.getInterface(
|
||
currentUser,
|
||
mandateId=str(context.mandateId) if context.mandateId else None,
|
||
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
|
||
)
|
||
mgmt.renameFolder(folderId, newName)
|
||
return {"success": True, "folderId": folderId, "name": newName}
|
||
except ValueError as e:
|
||
raise HTTPException(status_code=400, detail=str(e))
|
||
except Exception as e:
|
||
logger.error(f"Error renaming folder: {e}")
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
@router.delete("/folders/{folderId}")
|
||
@limiter.limit("10/minute")
|
||
def delete_folder(
|
||
request: Request,
|
||
folderId: str = Path(...),
|
||
recursive: bool = Query(False, description="Delete folder contents recursively"),
|
||
currentUser: User = Depends(getCurrentUser),
|
||
context: RequestContext = Depends(getRequestContext)
|
||
) -> Dict[str, Any]:
|
||
"""Delete a folder. Use recursive=true to delete non-empty folders."""
|
||
try:
|
||
mgmt = interfaceDbManagement.getInterface(
|
||
currentUser,
|
||
mandateId=str(context.mandateId) if context.mandateId else None,
|
||
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
|
||
)
|
||
return mgmt.deleteFolder(folderId, recursive=recursive)
|
||
except ValueError as e:
|
||
raise HTTPException(status_code=400, detail=str(e))
|
||
except Exception as e:
|
||
logger.error(f"Error deleting folder: {e}")
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
@router.post("/folders/{folderId}/move")
|
||
@limiter.limit("10/minute")
|
||
def move_folder(
|
||
request: Request,
|
||
folderId: str = Path(...),
|
||
body: Dict[str, Any] = Body(...),
|
||
currentUser: User = Depends(getCurrentUser),
|
||
context: RequestContext = Depends(getRequestContext)
|
||
) -> Dict[str, Any]:
|
||
"""Move a folder to a new parent."""
|
||
targetParentId = body.get("targetParentId")
|
||
try:
|
||
mgmt = interfaceDbManagement.getInterface(
|
||
currentUser,
|
||
mandateId=str(context.mandateId) if context.mandateId else None,
|
||
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
|
||
)
|
||
mgmt.moveFolder(folderId, targetParentId)
|
||
return {"success": True, "folderId": folderId, "parentId": targetParentId}
|
||
except ValueError as e:
|
||
raise HTTPException(status_code=400, detail=str(e))
|
||
except Exception as e:
|
||
logger.error(f"Error moving folder: {e}")
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
@router.patch("/folders/{folderId}/scope")
|
||
@limiter.limit("10/minute")
|
||
def _updateFolderScope(
|
||
request: Request,
|
||
folderId: str = Path(..., description="ID of the folder"),
|
||
scope: str = Body(..., embed=True),
|
||
context: RequestContext = Depends(getRequestContext),
|
||
) -> Dict[str, Any]:
|
||
"""Update the scope of a folder. Propagates to all files inside (recursively). Global scope requires sysAdmin."""
|
||
validScopes = {"personal", "featureInstance", "mandate", "global"}
|
||
if scope not in validScopes:
|
||
raise HTTPException(status_code=400, detail=f"Invalid scope: {scope}. Must be one of {validScopes}")
|
||
if scope == "global" and not context.isSysAdmin:
|
||
raise HTTPException(status_code=403, detail=routeApiMsg("Only sysadmins can set global scope"))
|
||
try:
|
||
mgmt = interfaceDbManagement.getInterface(
|
||
context.user,
|
||
mandateId=str(context.mandateId) if context.mandateId else None,
|
||
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
|
||
)
|
||
folder = mgmt.getFolder(folderId)
|
||
if not folder:
|
||
raise HTTPException(status_code=404, detail=routeApiMsg("Folder not found"))
|
||
mgmt.updateFolder(folderId, {"scope": scope})
|
||
fileIds = _collectFolderFileIds(mgmt, folderId)
|
||
for fid in fileIds:
|
||
try:
|
||
mgmt.updateFile(fid, {"scope": scope})
|
||
except Exception as e:
|
||
logger.error("Folder scope propagation: failed to update file %s: %s", fid, e)
|
||
logger.info("Updated scope=%s for folder %s: %d files affected", scope, folderId, len(fileIds))
|
||
return {"folderId": folderId, "scope": scope, "filesUpdated": len(fileIds)}
|
||
except HTTPException:
|
||
raise
|
||
except Exception as e:
|
||
logger.error(f"Error updating folder scope: {e}")
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
@router.patch("/folders/{folderId}/neutralize")
|
||
@limiter.limit("10/minute")
|
||
def updateFolderNeutralize(
|
||
request: Request,
|
||
background_tasks: BackgroundTasks,
|
||
folderId: str = Path(..., description="ID of the folder"),
|
||
neutralize: bool = Body(..., embed=True),
|
||
context: RequestContext = Depends(getRequestContext),
|
||
) -> Dict[str, Any]:
|
||
"""Toggle neutralization on a folder. Propagates to all files inside (recursively).
|
||
|
||
When turning ON: all files in the folder get ``neutralize=True``, their
|
||
knowledge indexes are purged synchronously, and background re-indexing
|
||
is triggered.
|
||
When turning OFF: files revert to ``neutralize=False`` unless they were
|
||
individually marked (not implemented yet -- all are reverted).
|
||
"""
|
||
try:
|
||
mgmt = interfaceDbManagement.getInterface(
|
||
context.user,
|
||
mandateId=str(context.mandateId) if context.mandateId else None,
|
||
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
|
||
)
|
||
|
||
folder = mgmt.getFolder(folderId)
|
||
if not folder:
|
||
raise HTTPException(status_code=404, detail=routeApiMsg("Folder not found"))
|
||
|
||
mgmt.updateFolder(folderId, {"neutralize": neutralize})
|
||
|
||
fileIds = _collectFolderFileIds(mgmt, folderId)
|
||
logger.info("Folder neutralize toggle %s for folder %s: %d files affected", neutralize, folderId, len(fileIds))
|
||
|
||
from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface
|
||
knowledgeDb = getKnowledgeInterface()
|
||
|
||
for fid in fileIds:
|
||
try:
|
||
mgmt.updateFile(fid, {"neutralize": neutralize})
|
||
if neutralize:
|
||
try:
|
||
knowledgeDb.deleteFileContentIndex(fid)
|
||
except Exception as e:
|
||
logger.warning("Folder neutralize: failed to purge index for file %s: %s", fid, e)
|
||
else:
|
||
try:
|
||
from modules.datamodels.datamodelKnowledge import FileContentIndex
|
||
indices = knowledgeDb.db.getRecordset(FileContentIndex, recordFilter={"id": fid})
|
||
for idx in indices:
|
||
idxId = idx.get("id") if isinstance(idx, dict) else getattr(idx, "id", None)
|
||
if idxId:
|
||
knowledgeDb.db.recordModify(FileContentIndex, idxId, {
|
||
"neutralizationStatus": "original",
|
||
"isNeutralized": False,
|
||
})
|
||
except Exception as e:
|
||
logger.warning("Folder neutralize OFF: metadata update failed for %s: %s", fid, e)
|
||
except Exception as e:
|
||
logger.error("Folder neutralize: failed to update file %s: %s", fid, e)
|
||
|
||
for fid in fileIds:
|
||
fileMeta = mgmt.getFile(fid)
|
||
if fileMeta:
|
||
fn = fileMeta.fileName if hasattr(fileMeta, "fileName") else fileMeta.get("fileName", "")
|
||
mt = fileMeta.mimeType if hasattr(fileMeta, "mimeType") else fileMeta.get("mimeType", "")
|
||
|
||
async def _reindex(fileId=fid, fileName=fn, mimeType=mt):
|
||
try:
|
||
await _autoIndexFile(fileId=fileId, fileName=fileName, mimeType=mimeType, user=context.user)
|
||
except Exception as ex:
|
||
logger.error("Folder neutralize re-index failed for %s: %s", fileId, ex)
|
||
|
||
background_tasks.add_task(_reindex)
|
||
|
||
return {"folderId": folderId, "neutralize": neutralize, "filesUpdated": len(fileIds)}
|
||
except HTTPException:
|
||
raise
|
||
except Exception as e:
|
||
logger.error(f"Error updating folder neutralize flag: {e}")
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
def _collectFolderFileIds(mgmt, folderId: str) -> List[str]:
|
||
"""Recursively collect all file IDs in a folder and its sub-folders."""
|
||
fileIds = []
|
||
try:
|
||
files = mgmt.listFiles(folderId=folderId)
|
||
if isinstance(files, dict):
|
||
files = files.get("files", [])
|
||
for f in (files or []):
|
||
fid = f.get("id") if isinstance(f, dict) else getattr(f, "id", None)
|
||
if fid:
|
||
fileIds.append(fid)
|
||
except Exception as e:
|
||
logger.warning("_collectFolderFileIds: listFiles failed for folder %s: %s", folderId, e)
|
||
|
||
try:
|
||
subFolders = mgmt.listFolders(parentId=folderId)
|
||
for sf in (subFolders or []):
|
||
sfId = sf.get("id") if isinstance(sf, dict) else getattr(sf, "id", None)
|
||
if sfId:
|
||
fileIds.extend(_collectFolderFileIds(mgmt, sfId))
|
||
except Exception as e:
|
||
logger.warning("_collectFolderFileIds: listFolders failed for folder %s: %s", folderId, e)
|
||
|
||
return fileIds
|
||
|
||
|
||
@router.get("/folders/{folderId}/download")
|
||
@limiter.limit("10/minute")
|
||
def download_folder(
|
||
request: Request,
|
||
folderId: str = Path(..., description="ID of the folder to download as ZIP"),
|
||
currentUser: User = Depends(getCurrentUser),
|
||
context: RequestContext = Depends(getRequestContext)
|
||
) -> Response:
|
||
"""Download a folder (including subfolders) as a ZIP archive."""
|
||
import io
|
||
import zipfile
|
||
import urllib.parse
|
||
|
||
try:
|
||
mgmt = interfaceDbManagement.getInterface(
|
||
currentUser,
|
||
mandateId=str(context.mandateId) if context.mandateId else None,
|
||
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
|
||
)
|
||
|
||
folder = mgmt.getFolder(folderId)
|
||
if not folder:
|
||
raise HTTPException(status_code=404, detail=f"Folder {folderId} not found")
|
||
|
||
folderName = folder.get("name", "download")
|
||
|
||
def _collectFiles(parentId: str, pathPrefix: str):
|
||
"""Recursively collect (zipPath, fileId) tuples."""
|
||
entries = []
|
||
for f in mgmt._getFilesByCurrentUser(recordFilter={"folderId": parentId}):
|
||
fname = f.get("fileName") or f.get("name") or f.get("id", "file")
|
||
entries.append((f"{pathPrefix}{fname}", f["id"]))
|
||
for sub in mgmt.listFolders(parentId=parentId):
|
||
subName = sub.get("name", sub["id"])
|
||
entries.extend(_collectFiles(sub["id"], f"{pathPrefix}{subName}/"))
|
||
return entries
|
||
|
||
fileEntries = _collectFiles(folderId, "")
|
||
if not fileEntries:
|
||
raise HTTPException(status_code=404, detail=routeApiMsg("Folder is empty"))
|
||
|
||
buf = io.BytesIO()
|
||
with zipfile.ZipFile(buf, "w", zipfile.ZIP_DEFLATED) as zf:
|
||
for zipPath, fileId in fileEntries:
|
||
data = mgmt.getFileData(fileId)
|
||
if data:
|
||
zf.writestr(zipPath, data)
|
||
|
||
buf.seek(0)
|
||
zipBytes = buf.getvalue()
|
||
encodedName = urllib.parse.quote(f"{folderName}.zip")
|
||
|
||
return Response(
|
||
content=zipBytes,
|
||
media_type="application/zip",
|
||
headers={
|
||
"Content-Disposition": f"attachment; filename*=UTF-8''{encodedName}"
|
||
}
|
||
)
|
||
except HTTPException:
|
||
raise
|
||
except Exception as e:
|
||
logger.error(f"Error downloading folder as ZIP: {e}")
|
||
raise HTTPException(status_code=500, detail=f"Error downloading folder: {str(e)}")
|
||
|
||
|
||
@router.post("/batch-delete")
|
||
@limiter.limit("10/minute")
|
||
def batch_delete_items(
|
||
request: Request,
|
||
body: Dict[str, Any] = Body(...),
|
||
currentUser: User = Depends(getCurrentUser),
|
||
context: RequestContext = Depends(getRequestContext)
|
||
) -> Dict[str, Any]:
|
||
"""Batch delete files/folders with a single SQL-backed operation per type."""
|
||
fileIds = body.get("fileIds") or []
|
||
folderIds = body.get("folderIds") or []
|
||
recursiveFolders = bool(body.get("recursiveFolders", True))
|
||
|
||
if not isinstance(fileIds, list) or not isinstance(folderIds, list):
|
||
raise HTTPException(status_code=400, detail=routeApiMsg("fileIds and folderIds must be arrays"))
|
||
|
||
try:
|
||
mgmt = interfaceDbManagement.getInterface(
|
||
currentUser,
|
||
mandateId=str(context.mandateId) if context.mandateId else None,
|
||
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
|
||
)
|
||
|
||
result = {"deletedFiles": 0, "deletedFolders": 0}
|
||
|
||
if fileIds:
|
||
fileResult = mgmt.deleteFilesBatch(fileIds)
|
||
result["deletedFiles"] += fileResult.get("deletedFiles", 0)
|
||
|
||
if folderIds:
|
||
folderResult = mgmt.deleteFoldersBatch(folderIds, recursive=recursiveFolders)
|
||
result["deletedFiles"] += folderResult.get("deletedFiles", 0)
|
||
result["deletedFolders"] += folderResult.get("deletedFolders", 0)
|
||
|
||
return result
|
||
except ValueError as e:
|
||
raise HTTPException(status_code=400, detail=str(e))
|
||
except Exception as e:
|
||
logger.error(f"Error in batch delete: {e}")
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
@router.post("/batch-move")
|
||
@limiter.limit("10/minute")
|
||
def batch_move_items(
|
||
request: Request,
|
||
body: Dict[str, Any] = Body(...),
|
||
currentUser: User = Depends(getCurrentUser),
|
||
context: RequestContext = Depends(getRequestContext)
|
||
) -> Dict[str, Any]:
|
||
"""Batch move files/folders with a single SQL-backed operation per type."""
|
||
fileIds = body.get("fileIds") or []
|
||
folderIds = body.get("folderIds") or []
|
||
targetFolderId = body.get("targetFolderId")
|
||
targetParentId = body.get("targetParentId")
|
||
|
||
if not isinstance(fileIds, list) or not isinstance(folderIds, list):
|
||
raise HTTPException(status_code=400, detail=routeApiMsg("fileIds and folderIds must be arrays"))
|
||
|
||
try:
|
||
mgmt = interfaceDbManagement.getInterface(
|
||
currentUser,
|
||
mandateId=str(context.mandateId) if context.mandateId else None,
|
||
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
|
||
)
|
||
|
||
result = {"movedFiles": 0, "movedFolders": 0}
|
||
|
||
if fileIds:
|
||
fileResult = mgmt.moveFilesBatch(fileIds, targetFolderId=targetFolderId)
|
||
result["movedFiles"] += fileResult.get("movedFiles", 0)
|
||
|
||
if folderIds:
|
||
folderResult = mgmt.moveFoldersBatch(folderIds, targetParentId=targetParentId)
|
||
result["movedFolders"] += folderResult.get("movedFolders", 0)
|
||
|
||
return result
|
||
except ValueError as e:
|
||
raise HTTPException(status_code=400, detail=str(e))
|
||
except Exception as e:
|
||
logger.error(f"Error in batch move: {e}")
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
# ── Scope & neutralize tagging endpoints (before /{fileId} catch-all) ─────────
|
||
|
||
@router.patch("/{fileId}/scope")
|
||
@limiter.limit("30/minute")
|
||
def updateFileScope(
|
||
request: Request,
|
||
background_tasks: BackgroundTasks,
|
||
fileId: str = Path(..., description="ID of the file"),
|
||
scope: str = Body(..., embed=True),
|
||
context: RequestContext = Depends(getRequestContext),
|
||
) -> Dict[str, Any]:
|
||
"""Update the scope of a file. Global scope requires sysAdmin."""
|
||
try:
|
||
validScopes = {"personal", "featureInstance", "mandate", "global"}
|
||
if scope not in validScopes:
|
||
raise HTTPException(status_code=400, detail=f"Invalid scope: {scope}. Must be one of {validScopes}")
|
||
|
||
if scope == "global" and not context.isSysAdmin:
|
||
raise HTTPException(status_code=403, detail=routeApiMsg("Only sysadmins can set global scope"))
|
||
|
||
managementInterface = interfaceDbManagement.getInterface(
|
||
context.user,
|
||
mandateId=str(context.mandateId) if context.mandateId else None,
|
||
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
|
||
)
|
||
|
||
managementInterface.updateFile(fileId, {"scope": scope})
|
||
|
||
try:
|
||
from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface
|
||
from modules.datamodels.datamodelKnowledge import FileContentIndex
|
||
knowledgeDb = getKnowledgeInterface()
|
||
indices = knowledgeDb.db.getRecordset(FileContentIndex, recordFilter={"id": fileId})
|
||
for idx in indices:
|
||
idxId = idx.get("id") if isinstance(idx, dict) else getattr(idx, "id", None)
|
||
if idxId:
|
||
knowledgeDb.db.recordModify(FileContentIndex, idxId, {"scope": scope})
|
||
except Exception as e:
|
||
logger.warning(f"Failed to update FileContentIndex scope for file {fileId}: {e}")
|
||
|
||
fileMeta = managementInterface.getFile(fileId)
|
||
if fileMeta:
|
||
fn = fileMeta.fileName if hasattr(fileMeta, "fileName") else fileMeta.get("fileName", "")
|
||
mt = fileMeta.mimeType if hasattr(fileMeta, "mimeType") else fileMeta.get("mimeType", "")
|
||
|
||
async def _runReindexAfterScopeChange():
|
||
try:
|
||
await _autoIndexFile(fileId=fileId, fileName=fn, mimeType=mt, user=context.user)
|
||
except Exception as ex:
|
||
logger.warning("Re-index after scope change failed for %s: %s", fileId, ex)
|
||
|
||
background_tasks.add_task(_runReindexAfterScopeChange)
|
||
|
||
return {"fileId": fileId, "scope": scope, "updated": True}
|
||
except HTTPException:
|
||
raise
|
||
except Exception as e:
|
||
logger.error(f"Error updating file scope: {e}")
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
@router.patch("/{fileId}/neutralize")
|
||
@limiter.limit("30/minute")
|
||
def updateFileNeutralize(
|
||
request: Request,
|
||
background_tasks: BackgroundTasks,
|
||
fileId: str = Path(..., description="ID of the file"),
|
||
neutralize: bool = Body(..., embed=True),
|
||
context: RequestContext = Depends(getRequestContext),
|
||
) -> Dict[str, Any]:
|
||
"""Toggle neutralization flag on a file.
|
||
|
||
FAILSAFE: When turning neutralize ON, the existing Knowledge Store index
|
||
and all content chunks are deleted SYNCHRONOUSLY before the response is
|
||
returned. The re-index happens in a background task. If re-indexing
|
||
fails the file simply has no index — no un-neutralized data can leak.
|
||
"""
|
||
try:
|
||
managementInterface = interfaceDbManagement.getInterface(
|
||
context.user,
|
||
mandateId=str(context.mandateId) if context.mandateId else None,
|
||
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
|
||
)
|
||
|
||
managementInterface.updateFile(fileId, {"neutralize": neutralize})
|
||
|
||
from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface
|
||
knowledgeDb = getKnowledgeInterface()
|
||
|
||
if neutralize:
|
||
# ── CRITICAL: purge existing (potentially un-neutralized) index
|
||
# This MUST succeed before the response is sent so that no stale
|
||
# raw-text chunks remain searchable while re-indexing runs.
|
||
try:
|
||
knowledgeDb.deleteFileContentIndex(fileId)
|
||
logger.info("Neutralize toggle ON: deleted index + chunks for file %s", fileId)
|
||
except Exception as e:
|
||
logger.error("Neutralize toggle ON: FAILED to delete index for file %s: %s", fileId, e)
|
||
raise HTTPException(
|
||
status_code=500,
|
||
detail=f"Could not purge existing index for neutralization — aborting toggle. Error: {e}",
|
||
)
|
||
else:
|
||
# Turning neutralize OFF: update metadata only; re-index will overwrite
|
||
try:
|
||
from modules.datamodels.datamodelKnowledge import FileContentIndex
|
||
indices = knowledgeDb.db.getRecordset(FileContentIndex, recordFilter={"id": fileId})
|
||
for idx in indices:
|
||
idxId = idx.get("id") if isinstance(idx, dict) else getattr(idx, "id", None)
|
||
if idxId:
|
||
knowledgeDb.db.recordModify(FileContentIndex, idxId, {
|
||
"neutralizationStatus": "original",
|
||
"isNeutralized": False,
|
||
})
|
||
except Exception as e:
|
||
logger.warning("Failed to update FileContentIndex after neutralize-OFF for %s: %s", fileId, e)
|
||
|
||
# Background re-index (safe: if it fails, there is simply no index)
|
||
fileMeta = managementInterface.getFile(fileId)
|
||
if fileMeta:
|
||
fn = fileMeta.fileName if hasattr(fileMeta, "fileName") else fileMeta.get("fileName", "")
|
||
mt = fileMeta.mimeType if hasattr(fileMeta, "mimeType") else fileMeta.get("mimeType", "")
|
||
|
||
async def _runReindexAfterNeutralizeToggle():
|
||
try:
|
||
await _autoIndexFile(fileId=fileId, fileName=fn, mimeType=mt, user=context.user)
|
||
except Exception as ex:
|
||
logger.error("Re-index after neutralize toggle failed for %s: %s (file has NO index until next re-index)", fileId, ex)
|
||
|
||
background_tasks.add_task(_runReindexAfterNeutralizeToggle)
|
||
|
||
return {"fileId": fileId, "neutralize": neutralize, "updated": True}
|
||
except HTTPException:
|
||
raise
|
||
except Exception as e:
|
||
logger.error(f"Error updating file neutralize flag: {e}")
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
# ── File endpoints with path parameters (catch-all /{fileId}) ─────────────────
|
||
|
||
@router.get("/{fileId}")
|
||
@limiter.limit("30/minute")
|
||
def get_file(
|
||
request: Request,
|
||
fileId: str = Path(..., description="ID of the file"),
|
||
currentUser: User = Depends(getCurrentUser),
|
||
context: RequestContext = Depends(getRequestContext)
|
||
):
|
||
"""Get a file. Resolves the file's mandate/instance scope automatically."""
|
||
try:
|
||
_mgmt, fileData = _resolveFileWithScope(currentUser, context, fileId)
|
||
if not fileData:
|
||
raise HTTPException(
|
||
status_code=status.HTTP_404_NOT_FOUND,
|
||
detail=f"File with ID {fileId} not found"
|
||
)
|
||
|
||
fileDict = fileData.model_dump() if hasattr(fileData, "model_dump") else dict(fileData)
|
||
enriched = enrichRowsWithFkLabels([fileDict], FileItem)
|
||
return enriched[0]
|
||
|
||
except interfaceDbManagement.FileNotFoundError as e:
|
||
logger.warning(f"File not found: {str(e)}")
|
||
raise HTTPException(
|
||
status_code=status.HTTP_404_NOT_FOUND,
|
||
detail=str(e)
|
||
)
|
||
except interfaceDbManagement.FilePermissionError as e:
|
||
logger.warning(f"No permission for file: {str(e)}")
|
||
raise HTTPException(
|
||
status_code=status.HTTP_403_FORBIDDEN,
|
||
detail=str(e)
|
||
)
|
||
except interfaceDbManagement.FileError as e:
|
||
logger.error(f"Error retrieving file: {str(e)}")
|
||
raise HTTPException(
|
||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||
detail=str(e)
|
||
)
|
||
except Exception as e:
|
||
logger.error(f"Unexpected error retrieving file: {str(e)}")
|
||
raise HTTPException(
|
||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||
detail=f"Error retrieving file: {str(e)}"
|
||
)
|
||
|
||
@router.put("/{fileId}", response_model=FileItem)
|
||
@limiter.limit("10/minute")
|
||
def update_file(
|
||
request: Request,
|
||
fileId: str = Path(..., description="ID of the file to update"),
|
||
file_info: Dict[str, Any] = Body(...),
|
||
currentUser: User = Depends(getCurrentUser)
|
||
) -> FileItem:
|
||
"""Update file info"""
|
||
try:
|
||
_EDITABLE_FIELDS = {"fileName", "scope", "tags", "description", "folderId", "neutralize"}
|
||
safeData = {k: v for k, v in file_info.items() if k in _EDITABLE_FIELDS}
|
||
if not safeData:
|
||
raise HTTPException(status_code=400, detail=routeApiMsg("No editable fields provided"))
|
||
|
||
managementInterface = interfaceDbManagement.getInterface(currentUser)
|
||
|
||
file = managementInterface.getFile(fileId)
|
||
if not file:
|
||
raise HTTPException(
|
||
status_code=status.HTTP_404_NOT_FOUND,
|
||
detail=f"File with ID {fileId} not found"
|
||
)
|
||
|
||
if safeData.get("scope") == "global" and not getattr(currentUser, "isSysAdmin", False):
|
||
raise HTTPException(
|
||
status_code=status.HTTP_403_FORBIDDEN,
|
||
detail=routeApiMsg("Only sysadmins can set global scope"),
|
||
)
|
||
|
||
if not managementInterface.checkRbacPermission(FileItem, "update", fileId):
|
||
raise HTTPException(
|
||
status_code=status.HTTP_403_FORBIDDEN,
|
||
detail=routeApiMsg("Not authorized to update this file")
|
||
)
|
||
|
||
result = managementInterface.updateFile(fileId, safeData)
|
||
if not result:
|
||
raise HTTPException(
|
||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||
detail=routeApiMsg("Failed to update file")
|
||
)
|
||
|
||
# Get updated file
|
||
updatedFile = managementInterface.getFile(fileId)
|
||
return updatedFile
|
||
|
||
except HTTPException as he:
|
||
raise he
|
||
except Exception as e:
|
||
logger.error(f"Error updating file: {str(e)}")
|
||
raise HTTPException(
|
||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||
detail=str(e)
|
||
)
|
||
|
||
@router.delete("/{fileId}", response_model=Dict[str, Any])
|
||
@limiter.limit("10/minute")
|
||
def delete_file(
|
||
request: Request,
|
||
fileId: str = Path(..., description="ID of the file to delete"),
|
||
currentUser: User = Depends(getCurrentUser)
|
||
) -> Dict[str, Any]:
|
||
"""Delete a file"""
|
||
managementInterface = interfaceDbManagement.getInterface(currentUser)
|
||
|
||
# Check if the file exists
|
||
existingFile = managementInterface.getFile(fileId)
|
||
if not existingFile:
|
||
raise HTTPException(
|
||
status_code=status.HTTP_404_NOT_FOUND,
|
||
detail=f"File with ID {fileId} not found"
|
||
)
|
||
|
||
success = managementInterface.deleteFile(fileId)
|
||
if not success:
|
||
raise HTTPException(
|
||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||
detail=routeApiMsg("Error deleting the file")
|
||
)
|
||
|
||
return {"message": f"File with ID {fileId} successfully deleted"}
|
||
|
||
@router.get("/{fileId}/download")
|
||
@limiter.limit("30/minute")
|
||
def download_file(
|
||
request: Request,
|
||
fileId: str = Path(..., description="ID of the file to download"),
|
||
currentUser: User = Depends(getCurrentUser),
|
||
context: RequestContext = Depends(getRequestContext)
|
||
) -> Response:
|
||
"""Download a file. Resolves the file's mandate/instance scope automatically,
|
||
so direct <a href> links work even when X-Mandate-Id / X-Instance-Id headers
|
||
are not sent by the browser."""
|
||
try:
|
||
managementInterface, fileData = _resolveFileWithScope(currentUser, context, fileId)
|
||
if not fileData:
|
||
raise HTTPException(
|
||
status_code=status.HTTP_404_NOT_FOUND,
|
||
detail=f"File with ID {fileId} not found"
|
||
)
|
||
|
||
fileContent = managementInterface.getFileData(fileId)
|
||
if not fileContent:
|
||
raise HTTPException(
|
||
status_code=status.HTTP_404_NOT_FOUND,
|
||
detail=f"File content not found for ID {fileId}"
|
||
)
|
||
|
||
# Return file as response
|
||
# Properly encode filename for Content-Disposition header to handle Unicode characters
|
||
import urllib.parse
|
||
encoded_filename = urllib.parse.quote(fileData.fileName)
|
||
|
||
return Response(
|
||
content=fileContent,
|
||
media_type=fileData.mimeType,
|
||
headers={
|
||
"Content-Disposition": f"attachment; filename*=UTF-8''{encoded_filename}"
|
||
}
|
||
)
|
||
except HTTPException:
|
||
raise
|
||
except Exception as e:
|
||
logger.error(f"Error downloading file: {str(e)}")
|
||
raise HTTPException(
|
||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||
detail=f"Error downloading file: {str(e)}"
|
||
)
|
||
|
||
@router.get("/{fileId}/preview", response_model=FilePreview)
|
||
@limiter.limit("30/minute")
|
||
def preview_file(
|
||
request: Request,
|
||
fileId: str = Path(..., description="ID of the file to preview"),
|
||
currentUser: User = Depends(getCurrentUser),
|
||
context: RequestContext = Depends(getRequestContext)
|
||
) -> FilePreview:
|
||
"""Preview a file's content. Resolves the file's mandate/instance scope automatically."""
|
||
try:
|
||
managementInterface, fileMeta = _resolveFileWithScope(currentUser, context, fileId)
|
||
if not fileMeta:
|
||
raise HTTPException(
|
||
status_code=status.HTTP_404_NOT_FOUND,
|
||
detail=f"File with ID {fileId} not found"
|
||
)
|
||
|
||
preview = managementInterface.getFileContent(fileId)
|
||
if not preview:
|
||
raise HTTPException(
|
||
status_code=status.HTTP_404_NOT_FOUND,
|
||
detail=f"File with ID {fileId} not found or no content available"
|
||
)
|
||
|
||
return preview
|
||
except HTTPException:
|
||
raise
|
||
except Exception as e:
|
||
logger.error(f"Error previewing file: {str(e)}")
|
||
raise HTTPException(
|
||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||
detail=f"Error previewing file: {str(e)}"
|
||
)
|
||
|
||
|
||
@router.post("/{fileId}/move")
|
||
@limiter.limit("10/minute")
|
||
def move_file(
|
||
request: Request,
|
||
fileId: str = Path(...),
|
||
body: Dict[str, Any] = Body(...),
|
||
currentUser: User = Depends(getCurrentUser),
|
||
context: RequestContext = Depends(getRequestContext)
|
||
) -> Dict[str, Any]:
|
||
"""Move a file to a different folder."""
|
||
targetFolderId = body.get("targetFolderId")
|
||
try:
|
||
mgmt = interfaceDbManagement.getInterface(
|
||
currentUser,
|
||
mandateId=str(context.mandateId) if context.mandateId else None,
|
||
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
|
||
)
|
||
mgmt.updateFile(fileId, {"folderId": targetFolderId})
|
||
|
||
if targetFolderId:
|
||
try:
|
||
targetFolder = mgmt.getFolder(targetFolderId)
|
||
folderNeut = (targetFolder.get("neutralize") if isinstance(targetFolder, dict)
|
||
else getattr(targetFolder, "neutralize", False)) if targetFolder else False
|
||
if folderNeut:
|
||
mgmt.updateFile(fileId, {"neutralize": True})
|
||
logger.info("File %s moved to neutralized folder %s — inherited neutralize=True", fileId, targetFolderId)
|
||
except Exception as e:
|
||
logger.warning("File move: folder neutralize inheritance check failed for %s: %s", fileId, e)
|
||
|
||
return {"success": True, "fileId": fileId, "folderId": targetFolderId}
|
||
except Exception as e:
|
||
logger.error(f"Error moving file: {e}")
|
||
raise HTTPException(status_code=500, detail=str(e))
|