# Copyright (c) 2026 PowerOn AG # All rights reserved. from fastapi import APIRouter, HTTPException, Depends, File, UploadFile, Form, Path, Request, status, Query, Response, Body, BackgroundTasks from fastapi.responses import JSONResponse from typing import List, Dict, Any, Optional import asyncio import io import logging import json import math import urllib.parse import zipfile # Import auth module from modules.auth import limiter, getCurrentUser, getRequestContext, RequestContext # Import interfaces from modules.interfaces import interfaceDbManagement from modules.datamodels.datamodelFiles import FileItem, FilePreview, FileFolder from modules.shared.attributeUtils import getModelAttributeDefinitions from modules.datamodels.datamodelUam import User from modules.datamodels.datamodelPagination import PaginationParams, PaginatedResponse, PaginationMetadata, normalize_pagination_dict from modules.shared.i18nRegistry import apiRouteContext from modules.dbHelpers.fkLabelResolver import enrichRowsWithFkLabels routeApiMsg = apiRouteContext("routeDataFiles") # Configure logger logger = logging.getLogger(__name__) def _resolveFileWithScope(currentUser: User, context: RequestContext, fileId: str): """Returns (managementInterface, fileItem) with RBAC scoped to the file's own mandate/instance. Files generated by workflows (e.g. AI report outputs) carry their own mandateId/featureInstanceId. Direct download links via cannot send custom scope headers, so we resolve the scope from the FileItem itself and re-check RBAC in that scope. Returns (None, None) if the file does not exist or the user lacks access in the file's actual scope. """ requestMandateId = str(context.mandateId) if context.mandateId else None requestInstanceId = str(context.featureInstanceId) if context.featureInstanceId else None mgmt = interfaceDbManagement.getInterface( currentUser, mandateId=requestMandateId, featureInstanceId=requestInstanceId, ) fileItem = mgmt.getFile(fileId) if fileItem: return mgmt, fileItem metas = mgmt.db.getRecordset(FileItem, recordFilter={"id": fileId}) if not metas: return None, None meta = metas[0] fileMandateId = meta.get("mandateId") or None fileInstanceId = meta.get("featureInstanceId") or None if not fileMandateId and not fileInstanceId: return None, None if fileMandateId == requestMandateId and fileInstanceId == requestInstanceId: return None, None scopedMgmt = interfaceDbManagement.getInterface( currentUser, mandateId=fileMandateId, featureInstanceId=fileInstanceId, ) fileItem = scopedMgmt.getFile(fileId) if not fileItem: return None, None return scopedMgmt, fileItem async def _autoIndexFile(fileId: str, fileName: str, mimeType: str, user, *, mandateId: str = None, featureInstanceId: str = None): """Background task: pre-scan + extraction + knowledge indexing. Step 1: Structure Pre-Scan (AI-free) -> FileContentIndex (persisted) Step 2: Content extraction via runExtraction -> ContentParts Step 3: KnowledgeService.requestIngestion -> idempotent chunking + embedding -> Knowledge Store""" userId = user.id if hasattr(user, "id") else str(user) try: mgmtInterface = interfaceDbManagement.getInterface( user, mandateId=mandateId or None, featureInstanceId=featureInstanceId or None, ) mgmtInterface.updateFile(fileId, {"status": "processing"}) rawBytes = mgmtInterface.getFileData(fileId) if not rawBytes: logger.warning(f"Auto-index: no file data for {fileId}, skipping") mgmtInterface.updateFile(fileId, {"status": "active"}) return file_meta = mgmtInterface.getFile(fileId) featureInstanceId = "" mandateId = "" file_scope = "personal" if file_meta: if isinstance(file_meta, dict): featureInstanceId = file_meta.get("featureInstanceId") or "" mandateId = file_meta.get("mandateId") or "" file_scope = file_meta.get("scope") or "personal" else: featureInstanceId = getattr(file_meta, "featureInstanceId", None) or "" mandateId = getattr(file_meta, "mandateId", None) or "" file_scope = getattr(file_meta, "scope", None) or "personal" logger.info(f"Auto-index starting for {fileName} ({len(rawBytes)} bytes, {mimeType})") # Step 1: Structure Pre-Scan (AI-free) from modules.serviceCenter.services.serviceKnowledge.subPreScan import preScanDocument contentIndex = await preScanDocument( fileData=rawBytes, mimeType=mimeType, fileId=fileId, fileName=fileName, userId=userId, featureInstanceId=str(featureInstanceId) if featureInstanceId else "", mandateId=str(mandateId) if mandateId else "", scope=file_scope, ) logger.info( f"Pre-scan complete for {fileName}: " f"{contentIndex.totalObjects} objects" ) # Persist FileContentIndex immediately. # IMPORTANT: preserve `_ingestion` metadata and `status="indexed"` from any # prior successful run — otherwise this upsert wipes the idempotency cache # and requestIngestion cannot detect duplicates (AC4 breaks). from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface knowledgeDb = getKnowledgeInterface() try: _existing = knowledgeDb.getFileContentIndex(fileId) except Exception: _existing = None if _existing: _existingStruct = ( _existing.get("structure") if isinstance(_existing, dict) else getattr(_existing, "structure", {}) ) or {} _existingStatus = ( _existing.get("status") if isinstance(_existing, dict) else getattr(_existing, "status", "") ) or "" if "_ingestion" in _existingStruct: contentIndex.structure = dict(contentIndex.structure or {}) contentIndex.structure["_ingestion"] = _existingStruct["_ingestion"] if _existingStatus == "indexed": contentIndex.status = "indexed" knowledgeDb.upsertFileContentIndex(contentIndex) # Step 2: Content extraction (AI-free, produces ContentParts) from modules.serviceCenter.services.serviceExtraction.subRegistry import ExtractorRegistry, ChunkerRegistry from modules.serviceCenter.services.serviceExtraction.subPipeline import runExtraction from modules.datamodels.datamodelExtraction import ExtractionOptions extractorRegistry = ExtractorRegistry() chunkerRegistry = ChunkerRegistry() # mergeStrategy=None: keep per-page / per-section granularity for RAG ingestion. # The default MergeStrategy concatenates all text parts into a single blob, which # collapses a 500-page PDF into one ContentChunk and destroys semantic retrieval. options = ExtractionOptions(mergeStrategy=None) extracted = runExtraction( extractorRegistry, chunkerRegistry, rawBytes, fileName, mimeType, options, ) contentObjects = [] for part in extracted.parts: contentType = "text" if part.typeGroup == "image": contentType = "image" elif part.typeGroup in ("binary", "container"): contentType = "other" if not part.data or not part.data.strip(): continue contentObjects.append({ "contentObjectId": part.id, "contentType": contentType, "data": part.data, "contextRef": { "containerPath": fileName, "location": part.label or "file", **(part.metadata or {}), }, }) logger.info(f"Extracted {len(contentObjects)} content objects from {fileName}") if not contentObjects: knowledgeDb.updateFileStatus(fileId, "indexed") mgmtInterface.updateFile(fileId, {"status": "active"}) return # Step 3: Knowledge indexing (chunking + embedding) from modules.serviceCenter import getService from modules.serviceCenter.context import ServiceCenterContext ctx = ServiceCenterContext( user=user, mandateId=str(mandateId) if mandateId else "", featureInstanceId=str(featureInstanceId) if featureInstanceId else "", ) knowledgeService = getService("knowledge", ctx) from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob handle = await knowledgeService.requestIngestion( IngestionJob( sourceKind="file", sourceId=fileId, fileName=fileName, mimeType=mimeType, userId=userId, featureInstanceId=str(featureInstanceId) if featureInstanceId else "", mandateId=str(mandateId) if mandateId else "", contentObjects=contentObjects, structure=contentIndex.structure, provenance={"lane": "upload", "route": "routeDataFiles._autoIndexFile"}, ) ) # Re-acquire interface after await to avoid stale user context from the singleton mgmtInterface = interfaceDbManagement.getInterface(user) mgmtInterface.updateFile(fileId, {"status": "active"}) if handle.status == "failed": logger.warning(f"Auto-index ingestion failed for file {fileId} ({fileName}): {handle.error}") else: logger.info(f"Auto-index complete for file {fileId} ({fileName})") except Exception as e: logger.error(f"Auto-index failed for file {fileId}: {e}", exc_info=True) try: errMgmt = interfaceDbManagement.getInterface(user) errMgmt.updateFile(fileId, {"status": "active"}) except Exception: pass # Model attributes for FileItem fileAttributes = getModelAttributeDefinitions(FileItem) # Create router for file endpoints router = APIRouter( prefix="/api/files", tags=["Manage Files"], responses={ 404: {"description": "Not found"}, 400: {"description": "Bad request"}, 401: {"description": "Unauthorized"}, 403: {"description": "Forbidden"}, 500: {"description": "Internal server error"} } ) def _getInterfaceForOwnedItem(currentUser: User, context, itemId: str, modelClass) -> Any: """Create a management interface scoped to the item's own context. Looks up the item by ID (unscoped) to resolve its mandateId/featureInstanceId, then creates the interface with THAT context. This ensures toggle operations work regardless of which page the user is on.""" unscoped = interfaceDbManagement.getInterface(currentUser) record = unscoped.db.getRecord(modelClass, itemId) if not record: raise interfaceDbManagement.FileNotFoundError(f"Item {itemId} not found") itemMandateId = record.get("mandateId") if isinstance(record, dict) else getattr(record, "mandateId", None) itemInstanceId = record.get("featureInstanceId") if isinstance(record, dict) else getattr(record, "featureInstanceId", None) return interfaceDbManagement.getInterface( currentUser, mandateId=str(itemMandateId) if itemMandateId else None, featureInstanceId=str(itemInstanceId) if itemInstanceId else None, ) @router.get("/folders/tree") @limiter.limit("120/minute") def get_folder_tree( request: Request, owner: str = Query("me", description="'me' | 'shared'"), currentUser: User = Depends(getCurrentUser), context: RequestContext = Depends(getRequestContext), ): try: managementInterface = interfaceDbManagement.getInterface( currentUser, mandateId=str(context.mandateId) if context.mandateId else None, featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None, ) o = (owner or "me").strip().lower() if o == "me": folders = managementInterface.getOwnFolderTree() elif o == "shared": folders = managementInterface.getSharedFolderTree() else: raise HTTPException(status_code=400, detail="owner must be 'me' or 'shared'") _enrichFoldersWithMixed(managementInterface.db, str(currentUser.id), folders, o) return folders except HTTPException: raise except Exception as e: logger.error(f"get_folder_tree error: {e}") raise HTTPException(status_code=500, detail=str(e)) @router.post("/attributes") @limiter.limit("120/minute") def getAttributesForIds( request: Request, body: Dict[str, Any] = Body(...), currentUser: User = Depends(getCurrentUser), context: RequestContext = Depends(getRequestContext), ): """Return current attribute values (neutralize, scope, ragIndexEnabled) for a list of node IDs. For folder IDs, computes 'mixed' by checking direct children. The frontend sends this after every toggle to refresh visible nodes without reloading the tree structure.""" ids = body.get("ids", []) if not isinstance(ids, list) or len(ids) == 0: return {} if len(ids) > 500: raise HTTPException(status_code=400, detail="Max 500 IDs per request") try: managementInterface = interfaceDbManagement.getInterface( currentUser, mandateId=str(context.mandateId) if context.mandateId else None, featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None, ) db = managementInterface.db userId = str(currentUser.id) allFolders = db.getRecordset(FileFolder, recordFilter={"sysCreatedBy": userId}) or [] allFiles = db.getRecordset(FileItem, recordFilter={"sysCreatedBy": userId}) or [] folderById = {f["id"]: f for f in allFolders} fileById = {f["id"]: f for f in allFiles} logger.info( "getAttributesForIds: %d ids requested, %d folders found, %d files found", len(ids), len(allFolders), len(allFiles), ) result: Dict[str, Dict[str, Any]] = {} for nodeId in ids: if nodeId.startswith("__filesRoot:"): attrs = _computeSyntheticRootAttrs(allFolders, allFiles) result[nodeId] = attrs elif nodeId in folderById: folder = folderById[nodeId] attrs = _computeFolderAttrs(folder, allFolders, allFiles) result[nodeId] = attrs elif nodeId in fileById: f = fileById[nodeId] result[nodeId] = { "neutralize": bool(f.get("neutralize", False)), "scope": f.get("scope", "personal"), } else: logger.debug("getAttributesForIds: unknown id=%s", nodeId) logger.info("getAttributesForIds: returning %d entries", len(result)) return result except HTTPException: raise except Exception as e: logger.error(f"getAttributesForIds error: {e}") raise HTTPException(status_code=500, detail=str(e)) def _enrichFoldersWithMixed( db, userId: str, folders: List[Dict[str, Any]], ownerMode: str, ) -> None: """Enrich folder dicts in-place: replace raw neutralize/scope with computed values that include ``'mixed'`` when children diverge. For ``ownerMode='me'``, files owned by the user are loaded. For ``'shared'``, files inside the visible shared folders are loaded.""" if not folders: return if ownerMode == "me": allFiles = db.getRecordset(FileItem, recordFilter={"sysCreatedBy": userId}) or [] else: folderIds = {f["id"] for f in folders} allFiles = [] for fid in folderIds: allFiles.extend(db.getRecordset(FileItem, recordFilter={"folderId": fid}) or []) computed: Dict[str, Dict[str, Any]] = {} for folder in folders: computed[folder["id"]] = _computeFolderAttrs(folder, folders, allFiles) for folder in folders: attrs = computed[folder["id"]] folder["neutralize"] = attrs["neutralize"] folder["scope"] = attrs["scope"] def _computeFolderAttrs( folder: Dict[str, Any], allFolders: List[Dict[str, Any]], allFiles: List[Dict[str, Any]], ) -> Dict[str, Any]: """Compute attributes for a folder. Recursively checks the entire subtree: if ANY descendant at any depth has a different value, the folder shows 'mixed'. This propagates up through all ancestor levels.""" fid = folder["id"] neutralizeResult = _effectiveNeutralize(fid, allFolders, allFiles) scopeResult = _effectiveScope(fid, allFolders, allFiles) return {"neutralize": neutralizeResult, "scope": scopeResult} def _effectiveNeutralize( folderId: str, allFolders: List[Dict[str, Any]], allFiles: List[Dict[str, Any]], ) -> Any: """Recursively compute effective neutralize for a folder. Returns 'mixed' if any descendants diverge, otherwise the folder's own value.""" childFolders = [f for f in allFolders if f.get("parentId") == folderId] childFiles = [f for f in allFiles if f.get("folderId") == folderId] if not childFolders and not childFiles: folder = next((f for f in allFolders if f["id"] == folderId), None) return bool(folder.get("neutralize", False)) if folder else False childVals = set() for cf in childFolders: effective = _effectiveNeutralize(cf["id"], allFolders, allFiles) if effective == "mixed": return "mixed" childVals.add(effective) for cf in childFiles: childVals.add(bool(cf.get("neutralize", False))) if len(childVals) > 1: return "mixed" if not childVals: folder = next((f for f in allFolders if f["id"] == folderId), None) return bool(folder.get("neutralize", False)) if folder else False return childVals.pop() def _effectiveScope( folderId: str, allFolders: List[Dict[str, Any]], allFiles: List[Dict[str, Any]], ) -> Any: """Recursively compute effective scope for a folder. Returns 'mixed' if any descendants diverge, otherwise the folder's own value.""" childFolders = [f for f in allFolders if f.get("parentId") == folderId] childFiles = [f for f in allFiles if f.get("folderId") == folderId] if not childFolders and not childFiles: folder = next((f for f in allFolders if f["id"] == folderId), None) return folder.get("scope", "personal") if folder else "personal" childVals = set() for cf in childFolders: effective = _effectiveScope(cf["id"], allFolders, allFiles) if effective == "mixed": return "mixed" childVals.add(effective) for cf in childFiles: childVals.add(cf.get("scope", "personal")) if len(childVals) > 1: return "mixed" if not childVals: folder = next((f for f in allFolders if f["id"] == folderId), None) return folder.get("scope", "personal") if folder else "personal" return childVals.pop() def _computeSyntheticRootAttrs( allFolders: List[Dict[str, Any]], allFiles: List[Dict[str, Any]], ) -> Dict[str, Any]: """Compute attributes for the synthetic root by recursively checking the entire tree. If ANY item at any depth diverges, root shows 'mixed'.""" topFolders = [f for f in allFolders if not f.get("parentId")] topFiles = [f for f in allFiles if not f.get("folderId")] neutralizeVals = set() scopeVals = set() for cf in topFolders: nEff = _effectiveNeutralize(cf["id"], allFolders, allFiles) if nEff == "mixed": neutralizeVals.add(True) neutralizeVals.add(False) else: neutralizeVals.add(nEff) sEff = _effectiveScope(cf["id"], allFolders, allFiles) if sEff == "mixed": scopeVals.add("__mixed_a__") scopeVals.add("__mixed_b__") else: scopeVals.add(sEff) for cf in topFiles: neutralizeVals.add(bool(cf.get("neutralize", False))) scopeVals.add(cf.get("scope", "personal")) if not neutralizeVals and not scopeVals: return {"neutralize": False, "scope": "personal"} return { "neutralize": "mixed" if len(neutralizeVals) > 1 else (neutralizeVals.pop() if neutralizeVals else False), "scope": "mixed" if len(scopeVals) > 1 else (scopeVals.pop() if scopeVals else "personal"), } @router.post("/folders", status_code=status.HTTP_201_CREATED) @limiter.limit("30/minute") def create_folder( request: Request, body: Dict[str, Any] = Body(...), currentUser: User = Depends(getCurrentUser), context: RequestContext = Depends(getRequestContext), ): try: name = body.get("name") if not name or not str(name).strip(): raise HTTPException(status_code=400, detail="name is required") parentId = body.get("parentId") or None managementInterface = interfaceDbManagement.getInterface( currentUser, mandateId=str(context.mandateId) if context.mandateId else None, featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None, ) return managementInterface.createFolder(str(name).strip(), parentId) except PermissionError as e: raise HTTPException(status_code=403, detail=str(e)) except interfaceDbManagement.FileNotFoundError as e: raise HTTPException(status_code=404, detail=str(e)) except HTTPException: raise except Exception as e: logger.error(f"create_folder error: {e}") raise HTTPException(status_code=500, detail=str(e)) @router.patch("/folders/{folderId}") @limiter.limit("30/minute") def rename_folder( request: Request, folderId: str = Path(...), body: Dict[str, Any] = Body(...), currentUser: User = Depends(getCurrentUser), context: RequestContext = Depends(getRequestContext), ): try: name = body.get("name") if not name or not str(name).strip(): raise HTTPException(status_code=400, detail="name is required") managementInterface = interfaceDbManagement.getInterface( currentUser, mandateId=str(context.mandateId) if context.mandateId else None, featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None, ) return managementInterface.renameFolder(folderId, str(name).strip()) except PermissionError as e: raise HTTPException(status_code=403, detail=str(e)) except interfaceDbManagement.FileNotFoundError as e: raise HTTPException(status_code=404, detail=str(e)) except HTTPException: raise except Exception as e: logger.error(f"rename_folder error: {e}") raise HTTPException(status_code=500, detail=str(e)) @router.post("/folders/{folderId}/move") @limiter.limit("30/minute") def move_folder( request: Request, folderId: str = Path(...), body: Dict[str, Any] = Body(...), currentUser: User = Depends(getCurrentUser), context: RequestContext = Depends(getRequestContext), ): try: # FE may send `parentId` or `targetParentId`. Accept both so the # FormGeneratorTree generic `provider.moveNodes(targetParentId)` API # remains consistent with the file-move (PUT /api/files/{id}) shape. newParentId = body.get("parentId") if newParentId is None: newParentId = body.get("targetParentId") managementInterface = interfaceDbManagement.getInterface( currentUser, mandateId=str(context.mandateId) if context.mandateId else None, featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None, ) return managementInterface.moveFolder(folderId, newParentId or None) except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) except PermissionError as e: raise HTTPException(status_code=403, detail=str(e)) except interfaceDbManagement.FileNotFoundError as e: raise HTTPException(status_code=404, detail=str(e)) except HTTPException: raise except Exception as e: logger.error(f"move_folder error: {e}") raise HTTPException(status_code=500, detail=str(e)) @router.delete("/folders/{folderId}") @limiter.limit("30/minute") def delete_folder( request: Request, folderId: str = Path(...), cascade: bool = Query(True, description="Cascade delete sub-folders and files"), currentUser: User = Depends(getCurrentUser), context: RequestContext = Depends(getRequestContext), ): try: managementInterface = interfaceDbManagement.getInterface( currentUser, mandateId=str(context.mandateId) if context.mandateId else None, featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None, ) return managementInterface.deleteFolderCascade(folderId) except PermissionError as e: raise HTTPException(status_code=403, detail=str(e)) except interfaceDbManagement.FileNotFoundError as e: raise HTTPException(status_code=404, detail=str(e)) except HTTPException: raise except Exception as e: logger.error(f"delete_folder error: {e}") raise HTTPException(status_code=500, detail=str(e)) @router.patch("/folders/{folderId}/scope") @limiter.limit("30/minute") def patch_folder_scope( request: Request, folderId: str = Path(...), body: Dict[str, Any] = Body(...), currentUser: User = Depends(getCurrentUser), context: RequestContext = Depends(getRequestContext), ): try: scope = body.get("scope") if not scope: raise HTTPException(status_code=400, detail="scope is required") cascadeToFiles = body.get("cascadeChildren", body.get("cascadeToFiles", False)) managementInterface = _getInterfaceForOwnedItem(currentUser, context, folderId, FileFolder) return managementInterface.patchFolderScope(folderId, scope, cascadeToFiles) except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) except PermissionError as e: raise HTTPException(status_code=403, detail=str(e)) except interfaceDbManagement.FileNotFoundError as e: raise HTTPException(status_code=404, detail=str(e)) except HTTPException: raise except Exception as e: logger.error(f"patch_folder_scope error: {e}") raise HTTPException(status_code=500, detail=str(e)) @router.patch("/folders/{folderId}/neutralize") @limiter.limit("30/minute") def patch_folder_neutralize( request: Request, folderId: str = Path(...), body: Dict[str, Any] = Body(...), currentUser: User = Depends(getCurrentUser), context: RequestContext = Depends(getRequestContext), ): try: neutralize = body.get("neutralize") if neutralize is None: raise HTTPException(status_code=400, detail="neutralize is required") managementInterface = _getInterfaceForOwnedItem(currentUser, context, folderId, FileFolder) return managementInterface.patchFolderNeutralize(folderId, bool(neutralize)) except PermissionError as e: raise HTTPException(status_code=403, detail=str(e)) except interfaceDbManagement.FileNotFoundError as e: raise HTTPException(status_code=404, detail=str(e)) except HTTPException: raise except Exception as e: logger.error(f"patch_folder_neutralize error: {e}") raise HTTPException(status_code=500, detail=str(e)) @router.get("/list") @limiter.limit("120/minute") def get_files( request: Request, pagination: Optional[str] = Query(None, description="JSON-encoded PaginationParams object"), mode: Optional[str] = Query(None, description="'filterValues' for distinct column values, 'ids' for all filtered IDs"), column: Optional[str] = Query(None, description="Column key (required when mode=filterValues)"), currentUser: User = Depends(getCurrentUser), context: RequestContext = Depends(getRequestContext) ): """ Get files with optional pagination, sorting, and filtering. Query Parameters: - pagination: JSON-encoded PaginationParams object, or None for no pagination Examples: - GET /api/files/list (no pagination - returns all items) - GET /api/files/list?pagination={"page":1,"pageSize":10,"sort":[]} - GET /api/files/list?pagination={"page":2,"pageSize":20,"sort":[{"field":"fileName","direction":"asc"}]} """ try: # Parse pagination parameter paginationParams = None if pagination: try: paginationDict = json.loads(pagination) if paginationDict: paginationDict = normalize_pagination_dict(paginationDict) paginationParams = PaginationParams(**paginationDict) except (json.JSONDecodeError, ValueError) as e: raise HTTPException( status_code=400, detail=f"Invalid pagination parameter: {str(e)}" ) from modules.dbHelpers.paginationHelpers import ( handleIdsMode, handleFilterValuesInMemory, ) from modules.interfaces.interfaceTableHelpers import ( resolveView, applyViewToParams, buildGroupLayout, effective_group_by_levels, ) import modules.interfaces.interfaceDbApp as _appIface from modules.datamodels.datamodelPagination import AppliedViewMeta managementInterface = interfaceDbManagement.getInterface( currentUser, mandateId=str(context.mandateId) if context.mandateId else None, featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None ) appInterface = _appIface.getInterface(currentUser) # Resolve view and merge config into params viewKey = paginationParams.viewKey if paginationParams else None viewConfig, viewDisplayName = resolveView(appInterface, "files/list", viewKey) viewMeta = AppliedViewMeta(viewKey=viewKey, displayName=viewDisplayName) if viewKey else None paginationParams = applyViewToParams(paginationParams, viewConfig) groupByLevels = effective_group_by_levels(paginationParams, viewConfig) def _filesToDicts(fileItems): return [f.model_dump() if hasattr(f, "model_dump") else (dict(f) if not isinstance(f, dict) else f) for f in fileItems] if mode == "groupSummary": if not pagination: raise HTTPException(status_code=400, detail="pagination required for groupSummary") from modules.interfaces.interfaceTableHelpers import ( apply_strategy_b_filters_and_sort, build_group_summary_groups, ) if not groupByLevels or not groupByLevels[0].get("field"): raise HTTPException( status_code=400, detail="groupByLevels[0].field required for groupSummary", ) field = groupByLevels[0]["field"] null_label = str(groupByLevels[0].get("nullLabel") or "—") allFiles = managementInterface.getAllFiles() allItems = enrichRowsWithFkLabels( _filesToDicts(allFiles if isinstance(allFiles, list) else (allFiles.items if hasattr(allFiles, "items") else [])), FileItem, db=appInterface.db, ) filtered = apply_strategy_b_filters_and_sort(allItems, paginationParams, currentUser) groups_out = build_group_summary_groups(filtered, field, null_label, groupByLevels=groupByLevels) return JSONResponse(content={"groups": groups_out}) if mode == "filterValues": if not column: raise HTTPException(status_code=400, detail="column parameter required for mode=filterValues") allFiles = managementInterface.getAllFiles() items = allFiles if isinstance(allFiles, list) else (allFiles.items if hasattr(allFiles, "items") else []) itemDicts = _filesToDicts(items) enrichRowsWithFkLabels(itemDicts, FileItem, db=appInterface.db) return handleFilterValuesInMemory(itemDicts, column, pagination) if mode == "ids": recordFilter = {"sysCreatedBy": managementInterface.userId} return handleIdsMode(managementInterface.db, FileItem, pagination, recordFilter) if not groupByLevels: # No grouping: let DB handle pagination directly (fastest path) result = managementInterface.getAllFiles(pagination=paginationParams) if paginationParams and hasattr(result, 'items'): enriched = enrichRowsWithFkLabels(_filesToDicts(result.items), FileItem, db=appInterface.db) resp: dict = { "items": enriched, "pagination": PaginationMetadata( currentPage=paginationParams.page, pageSize=paginationParams.pageSize, totalItems=result.totalItems, totalPages=result.totalPages, sort=paginationParams.sort, filters=paginationParams.filters ).model_dump(), } else: items = result if isinstance(result, list) else (result.items if hasattr(result, "items") else [result]) resp = {"items": enrichRowsWithFkLabels(_filesToDicts(items), FileItem, db=appInterface.db), "pagination": None} if viewMeta: resp["appliedView"] = viewMeta.model_dump() return resp # Strategy B grouping: load full list, group, then slice allFiles = managementInterface.getAllFiles() allItems = enrichRowsWithFkLabels( _filesToDicts(allFiles if isinstance(allFiles, list) else (allFiles.items if hasattr(allFiles, "items") else [])), FileItem, db=appInterface.db, ) from modules.interfaces.interfaceTableHelpers import apply_strategy_b_filters_and_sort if paginationParams.filters or paginationParams.sort: allItems = apply_strategy_b_filters_and_sort(allItems, paginationParams, currentUser) if not paginationParams: resp = {"items": allItems, "pagination": None} if viewMeta: resp["appliedView"] = viewMeta.model_dump() return resp totalItems = len(allItems) totalPages = math.ceil(totalItems / paginationParams.pageSize) if totalItems > 0 else 0 page_items, groupLayout = buildGroupLayout(allItems, groupByLevels, paginationParams.page, paginationParams.pageSize) resp = { "items": page_items, "pagination": PaginationMetadata( currentPage=paginationParams.page, pageSize=paginationParams.pageSize, totalItems=totalItems, totalPages=totalPages, sort=paginationParams.sort, filters=paginationParams.filters ).model_dump(), } if groupLayout: resp["groupLayout"] = groupLayout.model_dump() if viewMeta: resp["appliedView"] = viewMeta.model_dump() return resp except HTTPException: raise except Exception as e: logger.error(f"Error getting files: {str(e)}") raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=f"Failed to get files: {str(e)}" ) def _LEGACY_addFileToGroup_REMOVED(): """Removed — file-group tree no longer exists. Use multi-select bulk operations.""" pass @router.post("/upload", status_code=status.HTTP_201_CREATED) @limiter.limit("10/minute") async def upload_file( request: Request, file: UploadFile = File(...), workflowId: Optional[str] = Form(None), featureInstanceId: Optional[str] = Form(None), currentUser: User = Depends(getCurrentUser), context: RequestContext = Depends(getRequestContext), ) -> JSONResponse: # Add fileName property to UploadFile for consistency with backend model file.fileName = file.filename """Upload a file""" try: managementInterface = interfaceDbManagement.getInterface( currentUser, mandateId=str(context.mandateId) if context.mandateId else None, featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None, ) # Read file fileContent = await file.read() # Check size limits maxSize = int(interfaceDbManagement.APP_CONFIG.get("File_Management_MAX_UPLOAD_SIZE_MB")) * 1024 * 1024 # in bytes if len(fileContent) > maxSize: raise HTTPException( status_code=status.HTTP_413_REQUEST_ENTITY_TOO_LARGE, detail=f"File too large. Maximum size: {interfaceDbManagement.APP_CONFIG.get('File_Management_MAX_UPLOAD_SIZE_MB')}MB" ) # Save file via LucyDOM interface in the database fileItem, duplicateType = managementInterface.saveUploadedFile( fileContent, file.filename ) if featureInstanceId and not fileItem.featureInstanceId: managementInterface.updateFile(fileItem.id, {"featureInstanceId": featureInstanceId}) fileItem.featureInstanceId = featureInstanceId # Determine response message based on duplicate type if duplicateType == "exact_duplicate": message = f"File '{file.filename}' already exists with identical content. Reusing existing file." elif duplicateType == "name_conflict": message = f"File '{file.filename}' already exists with different content. Uploaded as '{fileItem.fileName}'." else: # new_file message = "File uploaded successfully" # Convert FileItem to dictionary for JSON response fileMeta = fileItem.model_dump() # If workflowId is provided, include it in the response (not stored in FileItem model) if workflowId: fileMeta["workflowId"] = workflowId # Trigger background auto-index pipeline (non-blocking) # Also runs for duplicates in case the original was never successfully indexed shouldIndex = duplicateType == "new_file" if not shouldIndex: try: from modules.interfaces.interfaceDbKnowledge import getInterface as _getKnowledgeInterface _kDb = _getKnowledgeInterface() _existingIndex = _kDb.getFileContentIndex(fileItem.id) if not _existingIndex: shouldIndex = True logger.info(f"Re-triggering auto-index for duplicate {fileItem.id} (not yet indexed)") except Exception: shouldIndex = True if shouldIndex: try: asyncio.ensure_future(_autoIndexFile( fileId=fileItem.id, fileName=fileItem.fileName, mimeType=fileItem.mimeType, user=currentUser, mandateId=str(context.mandateId) if context.mandateId else None, featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None, )) except Exception as indexErr: logger.warning(f"Auto-index trigger failed (non-blocking): {indexErr}") # Response with duplicate information return JSONResponse({ "message": message, "file": fileMeta, "duplicateType": duplicateType, "originalFileName": file.filename, "storedFileName": fileItem.fileName, "isDuplicate": duplicateType != "new_file" }) except interfaceDbManagement.FileStorageError as e: logger.error(f"Error during file upload (storage): {str(e)}") raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e) ) except Exception as e: logger.error(f"Error during file upload: {str(e)}") raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=f"Error during file upload: {str(e)}" ) @router.post("/batch-delete") @limiter.limit("10/minute") def batch_delete_items( request: Request, body: Dict[str, Any] = Body(...), currentUser: User = Depends(getCurrentUser), context: RequestContext = Depends(getRequestContext) ) -> Dict[str, Any]: """Batch delete files.""" fileIds = body.get("fileIds") or [] if not isinstance(fileIds, list): raise HTTPException(status_code=400, detail=routeApiMsg("fileIds must be an array")) try: mgmt = interfaceDbManagement.getInterface( currentUser, mandateId=str(context.mandateId) if context.mandateId else None, featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None, ) result = {"deletedFiles": 0} if fileIds: fileResult = mgmt.deleteFilesBatch(fileIds) result["deletedFiles"] += fileResult.get("deletedFiles", 0) return result except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) except Exception as e: logger.error(f"Error in batch delete: {e}") raise HTTPException(status_code=500, detail=str(e)) @router.post("/batch-download") @limiter.limit("10/minute") def batchDownload( request: Request, body: Dict[str, Any] = Body(...), currentUser: User = Depends(getCurrentUser), context: RequestContext = Depends(getRequestContext), ): """Download multiple files and/or folders as a single ZIP archive, preserving the folder hierarchy as ZIP paths.""" fileIds = body.get("fileIds") or [] folderIds = body.get("folderIds") or [] if not fileIds and not folderIds: raise HTTPException(status_code=400, detail="fileIds or folderIds required") try: mgmt = interfaceDbManagement.getInterface( currentUser, mandateId=str(context.mandateId) if context.mandateId else None, featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None, ) folderCache: dict[str, dict] = {} def _getFolder(fid: str): if fid not in folderCache: f = mgmt.getFolder(fid) folderCache[fid] = f if f else {} return folderCache[fid] def _folderPath(fid: str) -> str: """Build the full path for a folder by walking up parentId.""" parts: list[str] = [] current = fid visited: set[str] = set() while current and current not in visited: visited.add(current) folder = _getFolder(current) if not folder: break parts.append(folder.get("name", current)) current = folder.get("parentId") parts.reverse() return "/".join(parts) # Collect files from requested folders (recursive) fileEntries: list[tuple[str, str]] = [] seenFileIds: set[str] = set() for fid in folderIds: childFolderIds = mgmt._collectChildFolderIds(fid) for cfid in childFolderIds: prefix = _folderPath(cfid) items = mgmt.db.getRecordset(FileItem, recordFilter={"folderId": cfid}) for item in items: itemId = item.get("id") if isinstance(item, dict) else getattr(item, "id", None) if itemId and itemId not in seenFileIds: seenFileIds.add(itemId) fileEntries.append((itemId, prefix)) # Loose files (not via folder selection) for fid in fileIds: if fid in seenFileIds: continue seenFileIds.add(fid) fileMeta = mgmt.getFile(fid) if not fileMeta: continue fileFolderId = fileMeta.get("folderId") if isinstance(fileMeta, dict) else getattr(fileMeta, "folderId", None) prefix = _folderPath(fileFolderId) if fileFolderId else "" fileEntries.append((fid, prefix)) if not fileEntries: raise HTTPException(status_code=404, detail="No downloadable files found") buf = io.BytesIO() with zipfile.ZipFile(buf, "w", zipfile.ZIP_DEFLATED) as zf: for fid, prefix in fileEntries: try: fileMeta = mgmt.getFile(fid) fileData = mgmt.getFileData(fid) if fileMeta and fileData: name = (fileMeta.get("fileName") if isinstance(fileMeta, dict) else getattr(fileMeta, "fileName", fid)) or fid zipPath = f"{prefix}/{name}" if prefix else name zf.writestr(zipPath, fileData) except Exception as fe: logger.warning(f"batch_download: skipping file {fid}: {fe}") buf.seek(0) from fastapi.responses import StreamingResponse return StreamingResponse( buf, media_type="application/zip", headers={"Content-Disposition": 'attachment; filename="download.zip"'}, ) except HTTPException: raise except Exception as e: logger.error(f"batch_download error: {e}") raise HTTPException(status_code=500, detail=str(e)) # ── Bulk file operations (replace former group-based bulk routes) ───────────── @router.post("/bulk/scope") @limiter.limit("30/minute") def bulk_set_scope( request: Request, body: dict = Body(...), currentUser: User = Depends(getCurrentUser), context: RequestContext = Depends(getRequestContext), ): """Set scope for a list of files by their IDs.""" fileIds: list = body.get("fileIds") or [] scope: str = body.get("scope") or "" if not fileIds: raise HTTPException(status_code=400, detail="fileIds is required") validScopes = {"personal", "featureInstance", "mandate", "global"} if scope not in validScopes: raise HTTPException(status_code=400, detail=f"Invalid scope. Must be one of {validScopes}") if scope == "global" and not context.isSysAdmin: raise HTTPException(status_code=403, detail="Only sysadmins can set global scope") try: managementInterface = interfaceDbManagement.getInterface( currentUser, mandateId=str(context.mandateId) if context.mandateId else None, featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None, ) updated = 0 for fid in fileIds: try: managementInterface.updateFile(fid, {"scope": scope}) updated += 1 except Exception as e: logger.error(f"bulk_set_scope: failed for file {fid}: {e}") return {"scope": scope, "filesUpdated": updated} except HTTPException: raise except Exception as e: logger.error(f"bulk_set_scope error: {e}") raise HTTPException(status_code=500, detail=str(e)) @router.post("/bulk/neutralize") @limiter.limit("30/minute") def bulk_set_neutralize( request: Request, body: dict = Body(...), currentUser: User = Depends(getCurrentUser), context: RequestContext = Depends(getRequestContext), ): """Set neutralize flag for a list of files by their IDs (incl. knowledge purge/reindex).""" fileIds: list = body.get("fileIds") or [] neutralize = body.get("neutralize") if not fileIds: raise HTTPException(status_code=400, detail="fileIds is required") if neutralize is None: raise HTTPException(status_code=400, detail="neutralize is required") try: managementInterface = interfaceDbManagement.getInterface( currentUser, mandateId=str(context.mandateId) if context.mandateId else None, featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None, ) updated = 0 for fid in fileIds: try: managementInterface.updateFile(fid, {"neutralize": neutralize}) if not neutralize: try: from modules.interfaces import interfaceDbKnowledge kIface = interfaceDbKnowledge.getInterface(currentUser) kIface.purgeFileKnowledge(fid) except Exception as ke: logger.warning(f"bulk_set_neutralize: knowledge purge failed for {fid}: {ke}") updated += 1 except Exception as e: logger.error(f"bulk_set_neutralize: failed for file {fid}: {e}") return {"neutralize": neutralize, "filesUpdated": updated} except HTTPException: raise except Exception as e: logger.error(f"bulk_set_neutralize error: {e}") raise HTTPException(status_code=500, detail=str(e)) @router.post("/bulk/download-zip") @limiter.limit("10/minute") async def bulk_download_zip( request: Request, body: dict = Body(...), currentUser: User = Depends(getCurrentUser), context: RequestContext = Depends(getRequestContext), ): """Download a list of files as a ZIP archive.""" fileIds: list = body.get("fileIds") or [] if not fileIds: raise HTTPException(status_code=400, detail="fileIds is required") try: managementInterface = interfaceDbManagement.getInterface( currentUser, mandateId=str(context.mandateId) if context.mandateId else None, featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None, ) buf = io.BytesIO() with zipfile.ZipFile(buf, "w", zipfile.ZIP_DEFLATED) as zf: for fid in fileIds: try: fileMeta = managementInterface.getFile(fid) fileData = managementInterface.getFileData(fid) if fileMeta and fileData: name = (getattr(fileMeta, "fileName", None) or fid) zf.writestr(name, fileData) except Exception as fe: logger.warning(f"bulk_download_zip: skipping file {fid}: {fe}") buf.seek(0) from fastapi.responses import StreamingResponse return StreamingResponse( buf, media_type="application/zip", headers={"Content-Disposition": 'attachment; filename="files.zip"'}, ) except HTTPException: raise except Exception as e: logger.error(f"bulk_download_zip error: {e}") raise HTTPException(status_code=500, detail=str(e)) # ── Scope & neutralize tagging endpoints (before /{fileId} catch-all) ───────── @router.patch("/{fileId}/scope") @limiter.limit("30/minute") def updateFileScope( request: Request, background_tasks: BackgroundTasks, fileId: str = Path(..., description="ID of the file"), scope: str = Body(..., embed=True), context: RequestContext = Depends(getRequestContext), ) -> Dict[str, Any]: """Update the scope of a file. Global scope requires sysAdmin.""" try: validScopes = {"personal", "featureInstance", "mandate", "global"} if scope not in validScopes: raise HTTPException(status_code=400, detail=f"Invalid scope: {scope}. Must be one of {validScopes}") if scope == "global" and not context.isSysAdmin: raise HTTPException(status_code=403, detail=routeApiMsg("Only sysadmins can set global scope")) managementInterface = _getInterfaceForOwnedItem(context.user, context, fileId, FileItem) managementInterface.updateFile(fileId, {"scope": scope}) try: from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface from modules.datamodels.datamodelKnowledge import FileContentIndex knowledgeDb = getKnowledgeInterface() indices = knowledgeDb.db.getRecordset(FileContentIndex, recordFilter={"id": fileId}) for idx in indices: idxId = idx.get("id") if isinstance(idx, dict) else getattr(idx, "id", None) if idxId: knowledgeDb.db.recordModify(FileContentIndex, idxId, {"scope": scope}) except Exception as e: logger.warning(f"Failed to update FileContentIndex scope for file {fileId}: {e}") fileMeta = managementInterface.getFile(fileId) if fileMeta: fn = fileMeta.fileName if hasattr(fileMeta, "fileName") else fileMeta.get("fileName", "") mt = fileMeta.mimeType if hasattr(fileMeta, "mimeType") else fileMeta.get("mimeType", "") async def _runReindexAfterScopeChange(): try: await _autoIndexFile( fileId=fileId, fileName=fn, mimeType=mt, user=context.user, mandateId=str(context.mandateId) if context.mandateId else None, featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None, ) except Exception as ex: logger.warning("Re-index after scope change failed for %s: %s", fileId, ex) background_tasks.add_task(_runReindexAfterScopeChange) return {"fileId": fileId, "scope": scope, "updated": True} except HTTPException: raise except Exception as e: logger.error(f"Error updating file scope: {e}") raise HTTPException(status_code=500, detail=str(e)) @router.patch("/{fileId}/neutralize") @limiter.limit("30/minute") def updateFileNeutralize( request: Request, background_tasks: BackgroundTasks, fileId: str = Path(..., description="ID of the file"), neutralize: bool = Body(..., embed=True), context: RequestContext = Depends(getRequestContext), ) -> Dict[str, Any]: """Toggle neutralization flag on a file. FAILSAFE: When turning neutralize ON, the existing Knowledge Store index and all content chunks are deleted SYNCHRONOUSLY before the response is returned. The re-index happens in a background task. If re-indexing fails the file simply has no index — no un-neutralized data can leak. """ try: managementInterface = _getInterfaceForOwnedItem(context.user, context, fileId, FileItem) managementInterface.updateFile(fileId, {"neutralize": neutralize}) from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface knowledgeDb = getKnowledgeInterface() if neutralize: # ── CRITICAL: purge existing (potentially un-neutralized) index # This MUST succeed before the response is sent so that no stale # raw-text chunks remain searchable while re-indexing runs. try: knowledgeDb.deleteFileContentIndex(fileId) logger.info("Neutralize toggle ON: deleted index + chunks for file %s", fileId) except Exception as e: logger.error("Neutralize toggle ON: FAILED to delete index for file %s: %s", fileId, e) raise HTTPException( status_code=500, detail=f"Could not purge existing index for neutralization — aborting toggle. Error: {e}", ) else: # Turning neutralize OFF: update metadata only; re-index will overwrite try: from modules.datamodels.datamodelKnowledge import FileContentIndex indices = knowledgeDb.db.getRecordset(FileContentIndex, recordFilter={"id": fileId}) for idx in indices: idxId = idx.get("id") if isinstance(idx, dict) else getattr(idx, "id", None) if idxId: knowledgeDb.db.recordModify(FileContentIndex, idxId, { "neutralizationStatus": "original", "isNeutralized": False, }) except Exception as e: logger.warning("Failed to update FileContentIndex after neutralize-OFF for %s: %s", fileId, e) # Background re-index (safe: if it fails, there is simply no index) fileMeta = managementInterface.getFile(fileId) if fileMeta: fn = fileMeta.fileName if hasattr(fileMeta, "fileName") else fileMeta.get("fileName", "") mt = fileMeta.mimeType if hasattr(fileMeta, "mimeType") else fileMeta.get("mimeType", "") async def _runReindexAfterNeutralizeToggle(): try: await _autoIndexFile( fileId=fileId, fileName=fn, mimeType=mt, user=context.user, mandateId=str(context.mandateId) if context.mandateId else None, featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None, ) except Exception as ex: logger.error("Re-index after neutralize toggle failed for %s: %s (file has NO index until next re-index)", fileId, ex) background_tasks.add_task(_runReindexAfterNeutralizeToggle) return {"fileId": fileId, "neutralize": neutralize, "updated": True} except HTTPException: raise except Exception as e: logger.error(f"Error updating file neutralize flag: {e}") raise HTTPException(status_code=500, detail=str(e)) # ── File endpoints with path parameters (catch-all /{fileId}) ───────────────── @router.get("/{fileId}") @limiter.limit("30/minute") def get_file( request: Request, fileId: str = Path(..., description="ID of the file"), currentUser: User = Depends(getCurrentUser), context: RequestContext = Depends(getRequestContext) ): """Get a file. Resolves the file's mandate/instance scope automatically.""" try: _mgmt, fileData = _resolveFileWithScope(currentUser, context, fileId) if not fileData: raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, detail=f"File with ID {fileId} not found" ) fileDict = fileData.model_dump() if hasattr(fileData, "model_dump") else dict(fileData) import modules.interfaces.interfaceDbApp as _appIface enriched = enrichRowsWithFkLabels([fileDict], FileItem, db=_appIface.getInterface(currentUser).db) return enriched[0] except interfaceDbManagement.FileNotFoundError as e: logger.warning(f"File not found: {str(e)}") raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, detail=str(e) ) except interfaceDbManagement.FilePermissionError as e: logger.warning(f"No permission for file: {str(e)}") raise HTTPException( status_code=status.HTTP_403_FORBIDDEN, detail=str(e) ) except interfaceDbManagement.FileError as e: logger.error(f"Error retrieving file: {str(e)}") raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e) ) except Exception as e: logger.error(f"Unexpected error retrieving file: {str(e)}") raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=f"Error retrieving file: {str(e)}" ) @router.put("/{fileId}", response_model=FileItem) @limiter.limit("10/minute") def update_file( request: Request, fileId: str = Path(..., description="ID of the file to update"), file_info: Dict[str, Any] = Body(...), currentUser: User = Depends(getCurrentUser), context: RequestContext = Depends(getRequestContext), ) -> FileItem: """Update file info""" try: _EDITABLE_FIELDS = {"fileName", "folderId", "scope", "tags", "description", "neutralize"} safeData = {k: v for k, v in file_info.items() if k in _EDITABLE_FIELDS} if not safeData: raise HTTPException(status_code=400, detail=routeApiMsg("No editable fields provided")) managementInterface = interfaceDbManagement.getInterface( currentUser, mandateId=str(context.mandateId) if context.mandateId else None, featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None, ) file = managementInterface.getFile(fileId) if not file: raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, detail=f"File with ID {fileId} not found" ) if safeData.get("scope") == "global" and not getattr(currentUser, "isSysAdmin", False): raise HTTPException( status_code=status.HTTP_403_FORBIDDEN, detail=routeApiMsg("Only sysadmins can set global scope"), ) if not managementInterface.checkRbacPermission(FileItem, "update", fileId): raise HTTPException( status_code=status.HTTP_403_FORBIDDEN, detail=routeApiMsg("Not authorized to update this file") ) result = managementInterface.updateFile(fileId, safeData) if not result: raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=routeApiMsg("Failed to update file") ) # Get updated file updatedFile = managementInterface.getFile(fileId) return updatedFile except HTTPException as he: raise he except Exception as e: logger.error(f"Error updating file: {str(e)}") raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e) ) @router.delete("/{fileId}", response_model=Dict[str, Any]) @limiter.limit("10/minute") def delete_file( request: Request, fileId: str = Path(..., description="ID of the file to delete"), currentUser: User = Depends(getCurrentUser), context: RequestContext = Depends(getRequestContext), ) -> Dict[str, Any]: """Delete a file""" managementInterface = interfaceDbManagement.getInterface( currentUser, mandateId=str(context.mandateId) if context.mandateId else None, featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None, ) # Check if the file exists existingFile = managementInterface.getFile(fileId) if not existingFile: raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, detail=f"File with ID {fileId} not found" ) success = managementInterface.deleteFile(fileId) if not success: raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=routeApiMsg("Error deleting the file") ) return {"message": f"File with ID {fileId} successfully deleted"} @router.get("/{fileId}/download") @limiter.limit("30/minute") def download_file( request: Request, fileId: str = Path(..., description="ID of the file to download"), currentUser: User = Depends(getCurrentUser), context: RequestContext = Depends(getRequestContext) ) -> Response: """Download a file. Resolves the file's mandate/instance scope automatically, so direct links work even when X-Mandate-Id / X-Instance-Id headers are not sent by the browser.""" try: managementInterface, fileData = _resolveFileWithScope(currentUser, context, fileId) if not fileData: raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, detail=f"File with ID {fileId} not found" ) fileContent = managementInterface.getFileData(fileId) if not fileContent: raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, detail=f"File content not found for ID {fileId}" ) # Return file as response # Properly encode filename for Content-Disposition header to handle Unicode characters encoded_filename = urllib.parse.quote(fileData.fileName) return Response( content=fileContent, media_type=fileData.mimeType, headers={ "Content-Disposition": f"attachment; filename*=UTF-8''{encoded_filename}" } ) except HTTPException: raise except Exception as e: logger.error(f"Error downloading file: {str(e)}") raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=f"Error downloading file: {str(e)}" ) @router.get("/{fileId}/preview", response_model=FilePreview) @limiter.limit("30/minute") def preview_file( request: Request, fileId: str = Path(..., description="ID of the file to preview"), currentUser: User = Depends(getCurrentUser), context: RequestContext = Depends(getRequestContext) ) -> FilePreview: """Preview a file's content. Resolves the file's mandate/instance scope automatically.""" try: managementInterface, fileMeta = _resolveFileWithScope(currentUser, context, fileId) if not fileMeta: raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, detail=f"File with ID {fileId} not found" ) preview = managementInterface.getFileContent(fileId) if not preview: raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, detail=f"File with ID {fileId} not found or no content available" ) return preview except HTTPException: raise except Exception as e: logger.error(f"Error previewing file: {str(e)}") raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=f"Error previewing file: {str(e)}" )