# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
from fastapi import APIRouter, HTTPException, Depends, File, UploadFile, Form, Path, Request, status, Query, Response, Body, BackgroundTasks
from fastapi.responses import JSONResponse
from typing import List, Dict, Any, Optional
import logging
import json
# Import auth module
from modules.auth import limiter, getCurrentUser, getRequestContext, RequestContext
# Import interfaces
import modules.interfaces.interfaceDbManagement as interfaceDbManagement
from modules.datamodels.datamodelFiles import FileItem, FilePreview
from modules.datamodels.datamodelFileFolder import FileFolder
from modules.shared.attributeUtils import getModelAttributeDefinitions
from modules.datamodels.datamodelUam import User
from modules.datamodels.datamodelPagination import PaginationParams, PaginatedResponse, PaginationMetadata, normalize_pagination_dict
from modules.shared.i18nRegistry import apiRouteContext
from modules.routes.routeHelpers import enrichRowsWithFkLabels
routeApiMsg = apiRouteContext("routeDataFiles")
# Configure logger
logger = logging.getLogger(__name__)
def _resolveFileWithScope(currentUser: User, context: RequestContext, fileId: str):
"""Returns (managementInterface, fileItem) with RBAC scoped to the file's own mandate/instance.
Files generated by workflows (e.g. AI report outputs) carry their own
mandateId/featureInstanceId. Direct download links via cannot send
custom scope headers, so we resolve the scope from the FileItem itself and
re-check RBAC in that scope.
Returns (None, None) if the file does not exist or the user lacks access
in the file's actual scope.
"""
requestMandateId = str(context.mandateId) if context.mandateId else None
requestInstanceId = str(context.featureInstanceId) if context.featureInstanceId else None
mgmt = interfaceDbManagement.getInterface(
currentUser,
mandateId=requestMandateId,
featureInstanceId=requestInstanceId,
)
fileItem = mgmt.getFile(fileId)
if fileItem:
return mgmt, fileItem
metas = mgmt.db.getRecordset(FileItem, recordFilter={"id": fileId})
if not metas:
return None, None
meta = metas[0]
fileMandateId = meta.get("mandateId") or None
fileInstanceId = meta.get("featureInstanceId") or None
if not fileMandateId and not fileInstanceId:
return None, None
if fileMandateId == requestMandateId and fileInstanceId == requestInstanceId:
return None, None
scopedMgmt = interfaceDbManagement.getInterface(
currentUser,
mandateId=fileMandateId,
featureInstanceId=fileInstanceId,
)
fileItem = scopedMgmt.getFile(fileId)
if not fileItem:
return None, None
return scopedMgmt, fileItem
async def _autoIndexFile(fileId: str, fileName: str, mimeType: str, user):
"""Background task: pre-scan + extraction + knowledge indexing.
Step 1: Structure Pre-Scan (AI-free) -> FileContentIndex (persisted)
Step 2: Content extraction via runExtraction -> ContentParts
Step 3: KnowledgeService.indexFile -> chunking + embedding -> Knowledge Store"""
userId = user.id if hasattr(user, "id") else str(user)
try:
mgmtInterface = interfaceDbManagement.getInterface(user)
mgmtInterface.updateFile(fileId, {"status": "processing"})
rawBytes = mgmtInterface.getFileData(fileId)
if not rawBytes:
logger.warning(f"Auto-index: no file data for {fileId}, skipping")
mgmtInterface.updateFile(fileId, {"status": "active"})
return
file_meta = mgmtInterface.getFile(fileId)
feature_instance_id = ""
mandate_id = ""
file_scope = "personal"
if file_meta:
if isinstance(file_meta, dict):
feature_instance_id = file_meta.get("featureInstanceId") or ""
mandate_id = file_meta.get("mandateId") or ""
file_scope = file_meta.get("scope") or "personal"
else:
feature_instance_id = getattr(file_meta, "featureInstanceId", None) or ""
mandate_id = getattr(file_meta, "mandateId", None) or ""
file_scope = getattr(file_meta, "scope", None) or "personal"
logger.info(f"Auto-index starting for {fileName} ({len(rawBytes)} bytes, {mimeType})")
# Step 1: Structure Pre-Scan (AI-free)
from modules.serviceCenter.services.serviceKnowledge.subPreScan import preScanDocument
contentIndex = await preScanDocument(
fileData=rawBytes,
mimeType=mimeType,
fileId=fileId,
fileName=fileName,
userId=userId,
featureInstanceId=str(feature_instance_id) if feature_instance_id else "",
mandateId=str(mandate_id) if mandate_id else "",
scope=file_scope,
)
logger.info(
f"Pre-scan complete for {fileName}: "
f"{contentIndex.totalObjects} objects"
)
# Persist FileContentIndex immediately
from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface
knowledgeDb = getKnowledgeInterface()
knowledgeDb.upsertFileContentIndex(contentIndex)
# Step 2: Content extraction (AI-free, produces ContentParts)
from modules.serviceCenter.services.serviceExtraction.subRegistry import ExtractorRegistry, ChunkerRegistry
from modules.serviceCenter.services.serviceExtraction.subPipeline import runExtraction
from modules.datamodels.datamodelExtraction import ExtractionOptions
extractorRegistry = ExtractorRegistry()
chunkerRegistry = ChunkerRegistry()
options = ExtractionOptions()
extracted = runExtraction(
extractorRegistry, chunkerRegistry,
rawBytes, fileName, mimeType, options,
)
contentObjects = []
for part in extracted.parts:
contentType = "text"
if part.typeGroup == "image":
contentType = "image"
elif part.typeGroup in ("binary", "container"):
contentType = "other"
if not part.data or not part.data.strip():
continue
contentObjects.append({
"contentObjectId": part.id,
"contentType": contentType,
"data": part.data,
"contextRef": {
"containerPath": fileName,
"location": part.label or "file",
**(part.metadata or {}),
},
})
logger.info(f"Extracted {len(contentObjects)} content objects from {fileName}")
if not contentObjects:
knowledgeDb.updateFileStatus(fileId, "indexed")
mgmtInterface.updateFile(fileId, {"status": "active"})
return
# Step 3: Knowledge indexing (chunking + embedding)
from modules.serviceCenter import getService
from modules.serviceCenter.context import ServiceCenterContext
ctx = ServiceCenterContext(
user=user,
mandate_id=str(mandate_id) if mandate_id else "",
feature_instance_id=str(feature_instance_id) if feature_instance_id else "",
)
knowledgeService = getService("knowledge", ctx)
await knowledgeService.indexFile(
fileId=fileId,
fileName=fileName,
mimeType=mimeType,
userId=userId,
featureInstanceId=str(feature_instance_id) if feature_instance_id else "",
mandateId=str(mandate_id) if mandate_id else "",
contentObjects=contentObjects,
structure=contentIndex.structure,
)
# Re-acquire interface after await to avoid stale user context from the singleton
mgmtInterface = interfaceDbManagement.getInterface(user)
mgmtInterface.updateFile(fileId, {"status": "active"})
logger.info(f"Auto-index complete for file {fileId} ({fileName})")
except Exception as e:
logger.error(f"Auto-index failed for file {fileId}: {e}", exc_info=True)
try:
errMgmt = interfaceDbManagement.getInterface(user)
errMgmt.updateFile(fileId, {"status": "active"})
except Exception:
pass
# Model attributes for FileItem
fileAttributes = getModelAttributeDefinitions(FileItem)
# Create router for file endpoints
router = APIRouter(
prefix="/api/files",
tags=["Manage Files"],
responses={
404: {"description": "Not found"},
400: {"description": "Bad request"},
401: {"description": "Unauthorized"},
403: {"description": "Forbidden"},
500: {"description": "Internal server error"}
}
)
@router.get("/list")
@limiter.limit("120/minute")
def get_files(
request: Request,
pagination: Optional[str] = Query(None, description="JSON-encoded PaginationParams object"),
mode: Optional[str] = Query(None, description="'filterValues' for distinct column values, 'ids' for all filtered IDs"),
column: Optional[str] = Query(None, description="Column key (required when mode=filterValues)"),
currentUser: User = Depends(getCurrentUser),
context: RequestContext = Depends(getRequestContext)
):
"""
Get files with optional pagination, sorting, and filtering.
Query Parameters:
- pagination: JSON-encoded PaginationParams object, or None for no pagination
Examples:
- GET /api/files/list (no pagination - returns all items)
- GET /api/files/list?pagination={"page":1,"pageSize":10,"sort":[]}
- GET /api/files/list?pagination={"page":2,"pageSize":20,"sort":[{"field":"fileName","direction":"asc"}]}
"""
try:
# Parse pagination parameter
paginationParams = None
if pagination:
try:
paginationDict = json.loads(pagination)
if paginationDict:
# Normalize pagination dict (handles top-level "search" field)
paginationDict = normalize_pagination_dict(paginationDict)
paginationParams = PaginationParams(**paginationDict)
except (json.JSONDecodeError, ValueError) as e:
raise HTTPException(
status_code=400,
detail=f"Invalid pagination parameter: {str(e)}"
)
from modules.routes.routeHelpers import (
handleIdsMode,
handleFilterValuesInMemory,
)
managementInterface = interfaceDbManagement.getInterface(
currentUser,
mandateId=str(context.mandateId) if context.mandateId else None,
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None
)
if mode == "filterValues":
if not column:
raise HTTPException(status_code=400, detail="column parameter required for mode=filterValues")
allFiles = managementInterface.getAllFiles()
items = allFiles if isinstance(allFiles, list) else (allFiles.items if hasattr(allFiles, "items") else [])
itemDicts = [f.model_dump() if hasattr(f, "model_dump") else (dict(f) if not isinstance(f, dict) else f) for f in items]
enrichRowsWithFkLabels(itemDicts, FileItem)
return handleFilterValuesInMemory(itemDicts, column, pagination)
if mode == "ids":
recordFilter = {"sysCreatedBy": managementInterface.userId}
return handleIdsMode(managementInterface.db, FileItem, pagination, recordFilter)
recordFilter = None
if paginationParams and paginationParams.filters and "folderId" in paginationParams.filters:
fVal = paginationParams.filters.get("folderId")
# For a concrete folderId we use recordFilter (exact equality).
# For null / empty (= "root") we keep it in pagination.filters so the
# connector applies `IS NULL OR = ''` – files predating the folderId
# fix were stored with an empty string instead of NULL.
if fVal is None or (isinstance(fVal, str) and fVal.strip() == ""):
paginationParams.filters["folderId"] = None
else:
paginationParams.filters.pop("folderId")
recordFilter = {"folderId": fVal}
result = managementInterface.getAllFiles(pagination=paginationParams, recordFilter=recordFilter)
def _filesToDicts(items):
return [f.model_dump() if hasattr(f, "model_dump") else (dict(f) if not isinstance(f, dict) else f) for f in items]
if paginationParams:
enriched = enrichRowsWithFkLabels(_filesToDicts(result.items), FileItem)
return {
"items": enriched,
"pagination": PaginationMetadata(
currentPage=paginationParams.page,
pageSize=paginationParams.pageSize,
totalItems=result.totalItems,
totalPages=result.totalPages,
sort=paginationParams.sort,
filters=paginationParams.filters
).model_dump(),
}
else:
items = result if isinstance(result, list) else (result.items if hasattr(result, "items") else [result])
enriched = enrichRowsWithFkLabels(_filesToDicts(items), FileItem)
return {"items": enriched, "pagination": None}
except HTTPException:
raise
except Exception as e:
logger.error(f"Error getting files: {str(e)}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to get files: {str(e)}"
)
@router.post("/upload", status_code=status.HTTP_201_CREATED)
@limiter.limit("10/minute")
async def upload_file(
request: Request,
file: UploadFile = File(...),
workflowId: Optional[str] = Form(None),
featureInstanceId: Optional[str] = Form(None),
folderId: Optional[str] = Form(None),
currentUser: User = Depends(getCurrentUser),
context: RequestContext = Depends(getRequestContext),
) -> JSONResponse:
# Add fileName property to UploadFile for consistency with backend model
file.fileName = file.filename
"""Upload a file"""
try:
managementInterface = interfaceDbManagement.getInterface(
currentUser,
mandateId=str(context.mandateId) if context.mandateId else None,
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
)
# Read file
fileContent = await file.read()
# Check size limits
maxSize = int(interfaceDbManagement.APP_CONFIG.get("File_Management_MAX_UPLOAD_SIZE_MB")) * 1024 * 1024 # in bytes
if len(fileContent) > maxSize:
raise HTTPException(
status_code=status.HTTP_413_REQUEST_ENTITY_TOO_LARGE,
detail=f"File too large. Maximum size: {interfaceDbManagement.APP_CONFIG.get('File_Management_MAX_UPLOAD_SIZE_MB')}MB"
)
# Normalize folderId: empty string / "null" / "root" → None (root folder)
normalizedFolderId: Optional[str] = folderId
if isinstance(normalizedFolderId, str):
trimmed = normalizedFolderId.strip()
if not trimmed or trimmed.lower() in {"null", "none", "root"}:
normalizedFolderId = None
else:
normalizedFolderId = trimmed
# Save file via LucyDOM interface in the database
fileItem, duplicateType = managementInterface.saveUploadedFile(
fileContent, file.filename, folderId=normalizedFolderId
)
if featureInstanceId and not fileItem.featureInstanceId:
managementInterface.updateFile(fileItem.id, {"featureInstanceId": featureInstanceId})
fileItem.featureInstanceId = featureInstanceId
# For exact duplicates we keep the existing record, but move it into the
# target folder so the user actually sees their upload land where they expect.
if duplicateType == "exact_duplicate" and normalizedFolderId != getattr(fileItem, "folderId", None):
managementInterface.updateFile(fileItem.id, {"folderId": normalizedFolderId})
fileItem.folderId = normalizedFolderId
# Determine response message based on duplicate type
if duplicateType == "exact_duplicate":
message = f"File '{file.filename}' already exists with identical content. Reusing existing file."
elif duplicateType == "name_conflict":
message = f"File '{file.filename}' already exists with different content. Uploaded as '{fileItem.fileName}'."
else: # new_file
message = "File uploaded successfully"
# Convert FileItem to dictionary for JSON response
fileMeta = fileItem.model_dump()
# If workflowId is provided, include it in the response (not stored in FileItem model)
if workflowId:
fileMeta["workflowId"] = workflowId
# Trigger background auto-index pipeline (non-blocking)
# Also runs for duplicates in case the original was never successfully indexed
shouldIndex = duplicateType == "new_file"
if not shouldIndex:
try:
from modules.interfaces.interfaceDbKnowledge import getInterface as _getKnowledgeInterface
_kDb = _getKnowledgeInterface()
_existingIndex = _kDb.getFileContentIndex(fileItem.id)
if not _existingIndex:
shouldIndex = True
logger.info(f"Re-triggering auto-index for duplicate {fileItem.id} (not yet indexed)")
except Exception:
shouldIndex = True
if shouldIndex:
try:
import asyncio
asyncio.ensure_future(_autoIndexFile(
fileId=fileItem.id,
fileName=fileItem.fileName,
mimeType=fileItem.mimeType,
user=currentUser,
))
except Exception as indexErr:
logger.warning(f"Auto-index trigger failed (non-blocking): {indexErr}")
# Response with duplicate information
return JSONResponse({
"message": message,
"file": fileMeta,
"duplicateType": duplicateType,
"originalFileName": file.filename,
"storedFileName": fileItem.fileName,
"isDuplicate": duplicateType != "new_file"
})
except interfaceDbManagement.FileStorageError as e:
logger.error(f"Error during file upload (storage): {str(e)}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=str(e)
)
except Exception as e:
logger.error(f"Error during file upload: {str(e)}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Error during file upload: {str(e)}"
)
# ── Folder endpoints (MUST be before /{fileId} catch-all) ─────────────────────
@router.get("/folders", response_model=List[Dict[str, Any]])
@limiter.limit("30/minute")
def list_folders(
request: Request,
parentId: Optional[str] = Query(None, description="Parent folder ID (omit for all folders)"),
currentUser: User = Depends(getCurrentUser),
context: RequestContext = Depends(getRequestContext)
) -> List[Dict[str, Any]]:
"""List folders for the current user."""
try:
mgmt = interfaceDbManagement.getInterface(
currentUser,
mandateId=str(context.mandateId) if context.mandateId else None,
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
)
if parentId is not None:
return mgmt.listFolders(parentId=parentId)
return mgmt.listFolders()
except Exception as e:
logger.error(f"Error listing folders: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.post("/folders", status_code=status.HTTP_201_CREATED)
@limiter.limit("10/minute")
def create_folder(
request: Request,
body: Dict[str, Any] = Body(...),
currentUser: User = Depends(getCurrentUser),
context: RequestContext = Depends(getRequestContext)
) -> Dict[str, Any]:
"""Create a new folder."""
name = body.get("name", "")
parentId = body.get("parentId")
if not name:
raise HTTPException(status_code=400, detail=routeApiMsg("name is required"))
try:
mgmt = interfaceDbManagement.getInterface(
currentUser,
mandateId=str(context.mandateId) if context.mandateId else None,
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
)
return mgmt.createFolder(name=name, parentId=parentId)
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
except Exception as e:
logger.error(f"Error creating folder: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.put("/folders/{folderId}")
@limiter.limit("10/minute")
def rename_folder(
request: Request,
folderId: str = Path(...),
body: Dict[str, Any] = Body(...),
currentUser: User = Depends(getCurrentUser),
context: RequestContext = Depends(getRequestContext)
) -> Dict[str, Any]:
"""Rename a folder."""
newName = body.get("name", "")
if not newName:
raise HTTPException(status_code=400, detail=routeApiMsg("name is required"))
try:
mgmt = interfaceDbManagement.getInterface(
currentUser,
mandateId=str(context.mandateId) if context.mandateId else None,
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
)
mgmt.renameFolder(folderId, newName)
return {"success": True, "folderId": folderId, "name": newName}
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
except Exception as e:
logger.error(f"Error renaming folder: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.delete("/folders/{folderId}")
@limiter.limit("10/minute")
def delete_folder(
request: Request,
folderId: str = Path(...),
recursive: bool = Query(False, description="Delete folder contents recursively"),
currentUser: User = Depends(getCurrentUser),
context: RequestContext = Depends(getRequestContext)
) -> Dict[str, Any]:
"""Delete a folder. Use recursive=true to delete non-empty folders."""
try:
mgmt = interfaceDbManagement.getInterface(
currentUser,
mandateId=str(context.mandateId) if context.mandateId else None,
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
)
return mgmt.deleteFolder(folderId, recursive=recursive)
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
except Exception as e:
logger.error(f"Error deleting folder: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.post("/folders/{folderId}/move")
@limiter.limit("10/minute")
def move_folder(
request: Request,
folderId: str = Path(...),
body: Dict[str, Any] = Body(...),
currentUser: User = Depends(getCurrentUser),
context: RequestContext = Depends(getRequestContext)
) -> Dict[str, Any]:
"""Move a folder to a new parent."""
targetParentId = body.get("targetParentId")
try:
mgmt = interfaceDbManagement.getInterface(
currentUser,
mandateId=str(context.mandateId) if context.mandateId else None,
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
)
mgmt.moveFolder(folderId, targetParentId)
return {"success": True, "folderId": folderId, "parentId": targetParentId}
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
except Exception as e:
logger.error(f"Error moving folder: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.patch("/folders/{folderId}/scope")
@limiter.limit("10/minute")
def _updateFolderScope(
request: Request,
folderId: str = Path(..., description="ID of the folder"),
scope: str = Body(..., embed=True),
context: RequestContext = Depends(getRequestContext),
) -> Dict[str, Any]:
"""Update the scope of a folder. Propagates to all files inside (recursively). Global scope requires sysAdmin."""
validScopes = {"personal", "featureInstance", "mandate", "global"}
if scope not in validScopes:
raise HTTPException(status_code=400, detail=f"Invalid scope: {scope}. Must be one of {validScopes}")
if scope == "global" and not context.isSysAdmin:
raise HTTPException(status_code=403, detail=routeApiMsg("Only sysadmins can set global scope"))
try:
mgmt = interfaceDbManagement.getInterface(
context.user,
mandateId=str(context.mandateId) if context.mandateId else None,
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
)
folder = mgmt.getFolder(folderId)
if not folder:
raise HTTPException(status_code=404, detail=routeApiMsg("Folder not found"))
mgmt.updateFolder(folderId, {"scope": scope})
fileIds = _collectFolderFileIds(mgmt, folderId)
for fid in fileIds:
try:
mgmt.updateFile(fid, {"scope": scope})
except Exception as e:
logger.error("Folder scope propagation: failed to update file %s: %s", fid, e)
logger.info("Updated scope=%s for folder %s: %d files affected", scope, folderId, len(fileIds))
return {"folderId": folderId, "scope": scope, "filesUpdated": len(fileIds)}
except HTTPException:
raise
except Exception as e:
logger.error(f"Error updating folder scope: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.patch("/folders/{folderId}/neutralize")
@limiter.limit("10/minute")
def updateFolderNeutralize(
request: Request,
background_tasks: BackgroundTasks,
folderId: str = Path(..., description="ID of the folder"),
neutralize: bool = Body(..., embed=True),
context: RequestContext = Depends(getRequestContext),
) -> Dict[str, Any]:
"""Toggle neutralization on a folder. Propagates to all files inside (recursively).
When turning ON: all files in the folder get ``neutralize=True``, their
knowledge indexes are purged synchronously, and background re-indexing
is triggered.
When turning OFF: files revert to ``neutralize=False`` unless they were
individually marked (not implemented yet -- all are reverted).
"""
try:
mgmt = interfaceDbManagement.getInterface(
context.user,
mandateId=str(context.mandateId) if context.mandateId else None,
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
)
folder = mgmt.getFolder(folderId)
if not folder:
raise HTTPException(status_code=404, detail=routeApiMsg("Folder not found"))
mgmt.updateFolder(folderId, {"neutralize": neutralize})
fileIds = _collectFolderFileIds(mgmt, folderId)
logger.info("Folder neutralize toggle %s for folder %s: %d files affected", neutralize, folderId, len(fileIds))
from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface
knowledgeDb = getKnowledgeInterface()
for fid in fileIds:
try:
mgmt.updateFile(fid, {"neutralize": neutralize})
if neutralize:
try:
knowledgeDb.deleteFileContentIndex(fid)
except Exception as e:
logger.warning("Folder neutralize: failed to purge index for file %s: %s", fid, e)
else:
try:
from modules.datamodels.datamodelKnowledge import FileContentIndex
indices = knowledgeDb.db.getRecordset(FileContentIndex, recordFilter={"id": fid})
for idx in indices:
idxId = idx.get("id") if isinstance(idx, dict) else getattr(idx, "id", None)
if idxId:
knowledgeDb.db.recordModify(FileContentIndex, idxId, {
"neutralizationStatus": "original",
"isNeutralized": False,
})
except Exception as e:
logger.warning("Folder neutralize OFF: metadata update failed for %s: %s", fid, e)
except Exception as e:
logger.error("Folder neutralize: failed to update file %s: %s", fid, e)
for fid in fileIds:
fileMeta = mgmt.getFile(fid)
if fileMeta:
fn = fileMeta.fileName if hasattr(fileMeta, "fileName") else fileMeta.get("fileName", "")
mt = fileMeta.mimeType if hasattr(fileMeta, "mimeType") else fileMeta.get("mimeType", "")
async def _reindex(fileId=fid, fileName=fn, mimeType=mt):
try:
await _autoIndexFile(fileId=fileId, fileName=fileName, mimeType=mimeType, user=context.user)
except Exception as ex:
logger.error("Folder neutralize re-index failed for %s: %s", fileId, ex)
background_tasks.add_task(_reindex)
return {"folderId": folderId, "neutralize": neutralize, "filesUpdated": len(fileIds)}
except HTTPException:
raise
except Exception as e:
logger.error(f"Error updating folder neutralize flag: {e}")
raise HTTPException(status_code=500, detail=str(e))
def _collectFolderFileIds(mgmt, folderId: str) -> List[str]:
"""Recursively collect all file IDs in a folder and its sub-folders."""
fileIds = []
try:
files = mgmt.listFiles(folderId=folderId)
if isinstance(files, dict):
files = files.get("files", [])
for f in (files or []):
fid = f.get("id") if isinstance(f, dict) else getattr(f, "id", None)
if fid:
fileIds.append(fid)
except Exception as e:
logger.warning("_collectFolderFileIds: listFiles failed for folder %s: %s", folderId, e)
try:
subFolders = mgmt.listFolders(parentId=folderId)
for sf in (subFolders or []):
sfId = sf.get("id") if isinstance(sf, dict) else getattr(sf, "id", None)
if sfId:
fileIds.extend(_collectFolderFileIds(mgmt, sfId))
except Exception as e:
logger.warning("_collectFolderFileIds: listFolders failed for folder %s: %s", folderId, e)
return fileIds
@router.get("/folders/{folderId}/download")
@limiter.limit("10/minute")
def download_folder(
request: Request,
folderId: str = Path(..., description="ID of the folder to download as ZIP"),
currentUser: User = Depends(getCurrentUser),
context: RequestContext = Depends(getRequestContext)
) -> Response:
"""Download a folder (including subfolders) as a ZIP archive."""
import io
import zipfile
import urllib.parse
try:
mgmt = interfaceDbManagement.getInterface(
currentUser,
mandateId=str(context.mandateId) if context.mandateId else None,
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
)
folder = mgmt.getFolder(folderId)
if not folder:
raise HTTPException(status_code=404, detail=f"Folder {folderId} not found")
folderName = folder.get("name", "download")
def _collectFiles(parentId: str, pathPrefix: str):
"""Recursively collect (zipPath, fileId) tuples."""
entries = []
for f in mgmt._getFilesByCurrentUser(recordFilter={"folderId": parentId}):
fname = f.get("fileName") or f.get("name") or f.get("id", "file")
entries.append((f"{pathPrefix}{fname}", f["id"]))
for sub in mgmt.listFolders(parentId=parentId):
subName = sub.get("name", sub["id"])
entries.extend(_collectFiles(sub["id"], f"{pathPrefix}{subName}/"))
return entries
fileEntries = _collectFiles(folderId, "")
if not fileEntries:
raise HTTPException(status_code=404, detail=routeApiMsg("Folder is empty"))
buf = io.BytesIO()
with zipfile.ZipFile(buf, "w", zipfile.ZIP_DEFLATED) as zf:
for zipPath, fileId in fileEntries:
data = mgmt.getFileData(fileId)
if data:
zf.writestr(zipPath, data)
buf.seek(0)
zipBytes = buf.getvalue()
encodedName = urllib.parse.quote(f"{folderName}.zip")
return Response(
content=zipBytes,
media_type="application/zip",
headers={
"Content-Disposition": f"attachment; filename*=UTF-8''{encodedName}"
}
)
except HTTPException:
raise
except Exception as e:
logger.error(f"Error downloading folder as ZIP: {e}")
raise HTTPException(status_code=500, detail=f"Error downloading folder: {str(e)}")
@router.post("/batch-delete")
@limiter.limit("10/minute")
def batch_delete_items(
request: Request,
body: Dict[str, Any] = Body(...),
currentUser: User = Depends(getCurrentUser),
context: RequestContext = Depends(getRequestContext)
) -> Dict[str, Any]:
"""Batch delete files/folders with a single SQL-backed operation per type."""
fileIds = body.get("fileIds") or []
folderIds = body.get("folderIds") or []
recursiveFolders = bool(body.get("recursiveFolders", True))
if not isinstance(fileIds, list) or not isinstance(folderIds, list):
raise HTTPException(status_code=400, detail=routeApiMsg("fileIds and folderIds must be arrays"))
try:
mgmt = interfaceDbManagement.getInterface(
currentUser,
mandateId=str(context.mandateId) if context.mandateId else None,
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
)
result = {"deletedFiles": 0, "deletedFolders": 0}
if fileIds:
fileResult = mgmt.deleteFilesBatch(fileIds)
result["deletedFiles"] += fileResult.get("deletedFiles", 0)
if folderIds:
folderResult = mgmt.deleteFoldersBatch(folderIds, recursive=recursiveFolders)
result["deletedFiles"] += folderResult.get("deletedFiles", 0)
result["deletedFolders"] += folderResult.get("deletedFolders", 0)
return result
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
except Exception as e:
logger.error(f"Error in batch delete: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.post("/batch-move")
@limiter.limit("10/minute")
def batch_move_items(
request: Request,
body: Dict[str, Any] = Body(...),
currentUser: User = Depends(getCurrentUser),
context: RequestContext = Depends(getRequestContext)
) -> Dict[str, Any]:
"""Batch move files/folders with a single SQL-backed operation per type."""
fileIds = body.get("fileIds") or []
folderIds = body.get("folderIds") or []
targetFolderId = body.get("targetFolderId")
targetParentId = body.get("targetParentId")
if not isinstance(fileIds, list) or not isinstance(folderIds, list):
raise HTTPException(status_code=400, detail=routeApiMsg("fileIds and folderIds must be arrays"))
try:
mgmt = interfaceDbManagement.getInterface(
currentUser,
mandateId=str(context.mandateId) if context.mandateId else None,
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
)
result = {"movedFiles": 0, "movedFolders": 0}
if fileIds:
fileResult = mgmt.moveFilesBatch(fileIds, targetFolderId=targetFolderId)
result["movedFiles"] += fileResult.get("movedFiles", 0)
if folderIds:
folderResult = mgmt.moveFoldersBatch(folderIds, targetParentId=targetParentId)
result["movedFolders"] += folderResult.get("movedFolders", 0)
return result
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
except Exception as e:
logger.error(f"Error in batch move: {e}")
raise HTTPException(status_code=500, detail=str(e))
# ── Scope & neutralize tagging endpoints (before /{fileId} catch-all) ─────────
@router.patch("/{fileId}/scope")
@limiter.limit("30/minute")
def updateFileScope(
request: Request,
background_tasks: BackgroundTasks,
fileId: str = Path(..., description="ID of the file"),
scope: str = Body(..., embed=True),
context: RequestContext = Depends(getRequestContext),
) -> Dict[str, Any]:
"""Update the scope of a file. Global scope requires sysAdmin."""
try:
validScopes = {"personal", "featureInstance", "mandate", "global"}
if scope not in validScopes:
raise HTTPException(status_code=400, detail=f"Invalid scope: {scope}. Must be one of {validScopes}")
if scope == "global" and not context.isSysAdmin:
raise HTTPException(status_code=403, detail=routeApiMsg("Only sysadmins can set global scope"))
managementInterface = interfaceDbManagement.getInterface(
context.user,
mandateId=str(context.mandateId) if context.mandateId else None,
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
)
managementInterface.updateFile(fileId, {"scope": scope})
try:
from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface
from modules.datamodels.datamodelKnowledge import FileContentIndex
knowledgeDb = getKnowledgeInterface()
indices = knowledgeDb.db.getRecordset(FileContentIndex, recordFilter={"id": fileId})
for idx in indices:
idxId = idx.get("id") if isinstance(idx, dict) else getattr(idx, "id", None)
if idxId:
knowledgeDb.db.recordModify(FileContentIndex, idxId, {"scope": scope})
except Exception as e:
logger.warning(f"Failed to update FileContentIndex scope for file {fileId}: {e}")
fileMeta = managementInterface.getFile(fileId)
if fileMeta:
fn = fileMeta.fileName if hasattr(fileMeta, "fileName") else fileMeta.get("fileName", "")
mt = fileMeta.mimeType if hasattr(fileMeta, "mimeType") else fileMeta.get("mimeType", "")
async def _runReindexAfterScopeChange():
try:
await _autoIndexFile(fileId=fileId, fileName=fn, mimeType=mt, user=context.user)
except Exception as ex:
logger.warning("Re-index after scope change failed for %s: %s", fileId, ex)
background_tasks.add_task(_runReindexAfterScopeChange)
return {"fileId": fileId, "scope": scope, "updated": True}
except HTTPException:
raise
except Exception as e:
logger.error(f"Error updating file scope: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.patch("/{fileId}/neutralize")
@limiter.limit("30/minute")
def updateFileNeutralize(
request: Request,
background_tasks: BackgroundTasks,
fileId: str = Path(..., description="ID of the file"),
neutralize: bool = Body(..., embed=True),
context: RequestContext = Depends(getRequestContext),
) -> Dict[str, Any]:
"""Toggle neutralization flag on a file.
FAILSAFE: When turning neutralize ON, the existing Knowledge Store index
and all content chunks are deleted SYNCHRONOUSLY before the response is
returned. The re-index happens in a background task. If re-indexing
fails the file simply has no index — no un-neutralized data can leak.
"""
try:
managementInterface = interfaceDbManagement.getInterface(
context.user,
mandateId=str(context.mandateId) if context.mandateId else None,
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
)
managementInterface.updateFile(fileId, {"neutralize": neutralize})
from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface
knowledgeDb = getKnowledgeInterface()
if neutralize:
# ── CRITICAL: purge existing (potentially un-neutralized) index
# This MUST succeed before the response is sent so that no stale
# raw-text chunks remain searchable while re-indexing runs.
try:
knowledgeDb.deleteFileContentIndex(fileId)
logger.info("Neutralize toggle ON: deleted index + chunks for file %s", fileId)
except Exception as e:
logger.error("Neutralize toggle ON: FAILED to delete index for file %s: %s", fileId, e)
raise HTTPException(
status_code=500,
detail=f"Could not purge existing index for neutralization — aborting toggle. Error: {e}",
)
else:
# Turning neutralize OFF: update metadata only; re-index will overwrite
try:
from modules.datamodels.datamodelKnowledge import FileContentIndex
indices = knowledgeDb.db.getRecordset(FileContentIndex, recordFilter={"id": fileId})
for idx in indices:
idxId = idx.get("id") if isinstance(idx, dict) else getattr(idx, "id", None)
if idxId:
knowledgeDb.db.recordModify(FileContentIndex, idxId, {
"neutralizationStatus": "original",
"isNeutralized": False,
})
except Exception as e:
logger.warning("Failed to update FileContentIndex after neutralize-OFF for %s: %s", fileId, e)
# Background re-index (safe: if it fails, there is simply no index)
fileMeta = managementInterface.getFile(fileId)
if fileMeta:
fn = fileMeta.fileName if hasattr(fileMeta, "fileName") else fileMeta.get("fileName", "")
mt = fileMeta.mimeType if hasattr(fileMeta, "mimeType") else fileMeta.get("mimeType", "")
async def _runReindexAfterNeutralizeToggle():
try:
await _autoIndexFile(fileId=fileId, fileName=fn, mimeType=mt, user=context.user)
except Exception as ex:
logger.error("Re-index after neutralize toggle failed for %s: %s (file has NO index until next re-index)", fileId, ex)
background_tasks.add_task(_runReindexAfterNeutralizeToggle)
return {"fileId": fileId, "neutralize": neutralize, "updated": True}
except HTTPException:
raise
except Exception as e:
logger.error(f"Error updating file neutralize flag: {e}")
raise HTTPException(status_code=500, detail=str(e))
# ── File endpoints with path parameters (catch-all /{fileId}) ─────────────────
@router.get("/{fileId}")
@limiter.limit("30/minute")
def get_file(
request: Request,
fileId: str = Path(..., description="ID of the file"),
currentUser: User = Depends(getCurrentUser),
context: RequestContext = Depends(getRequestContext)
):
"""Get a file. Resolves the file's mandate/instance scope automatically."""
try:
_mgmt, fileData = _resolveFileWithScope(currentUser, context, fileId)
if not fileData:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"File with ID {fileId} not found"
)
fileDict = fileData.model_dump() if hasattr(fileData, "model_dump") else dict(fileData)
enriched = enrichRowsWithFkLabels([fileDict], FileItem)
return enriched[0]
except interfaceDbManagement.FileNotFoundError as e:
logger.warning(f"File not found: {str(e)}")
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=str(e)
)
except interfaceDbManagement.FilePermissionError as e:
logger.warning(f"No permission for file: {str(e)}")
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail=str(e)
)
except interfaceDbManagement.FileError as e:
logger.error(f"Error retrieving file: {str(e)}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=str(e)
)
except Exception as e:
logger.error(f"Unexpected error retrieving file: {str(e)}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Error retrieving file: {str(e)}"
)
@router.put("/{fileId}", response_model=FileItem)
@limiter.limit("10/minute")
def update_file(
request: Request,
fileId: str = Path(..., description="ID of the file to update"),
file_info: Dict[str, Any] = Body(...),
currentUser: User = Depends(getCurrentUser)
) -> FileItem:
"""Update file info"""
try:
_EDITABLE_FIELDS = {"fileName", "scope", "tags", "description", "folderId", "neutralize"}
safeData = {k: v for k, v in file_info.items() if k in _EDITABLE_FIELDS}
if not safeData:
raise HTTPException(status_code=400, detail=routeApiMsg("No editable fields provided"))
managementInterface = interfaceDbManagement.getInterface(currentUser)
file = managementInterface.getFile(fileId)
if not file:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"File with ID {fileId} not found"
)
if safeData.get("scope") == "global" and not getattr(currentUser, "isSysAdmin", False):
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail=routeApiMsg("Only sysadmins can set global scope"),
)
if not managementInterface.checkRbacPermission(FileItem, "update", fileId):
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail=routeApiMsg("Not authorized to update this file")
)
result = managementInterface.updateFile(fileId, safeData)
if not result:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=routeApiMsg("Failed to update file")
)
# Get updated file
updatedFile = managementInterface.getFile(fileId)
return updatedFile
except HTTPException as he:
raise he
except Exception as e:
logger.error(f"Error updating file: {str(e)}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=str(e)
)
@router.delete("/{fileId}", response_model=Dict[str, Any])
@limiter.limit("10/minute")
def delete_file(
request: Request,
fileId: str = Path(..., description="ID of the file to delete"),
currentUser: User = Depends(getCurrentUser)
) -> Dict[str, Any]:
"""Delete a file"""
managementInterface = interfaceDbManagement.getInterface(currentUser)
# Check if the file exists
existingFile = managementInterface.getFile(fileId)
if not existingFile:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"File with ID {fileId} not found"
)
success = managementInterface.deleteFile(fileId)
if not success:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=routeApiMsg("Error deleting the file")
)
return {"message": f"File with ID {fileId} successfully deleted"}
@router.get("/{fileId}/download")
@limiter.limit("30/minute")
def download_file(
request: Request,
fileId: str = Path(..., description="ID of the file to download"),
currentUser: User = Depends(getCurrentUser),
context: RequestContext = Depends(getRequestContext)
) -> Response:
"""Download a file. Resolves the file's mandate/instance scope automatically,
so direct links work even when X-Mandate-Id / X-Instance-Id headers
are not sent by the browser."""
try:
managementInterface, fileData = _resolveFileWithScope(currentUser, context, fileId)
if not fileData:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"File with ID {fileId} not found"
)
fileContent = managementInterface.getFileData(fileId)
if not fileContent:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"File content not found for ID {fileId}"
)
# Return file as response
# Properly encode filename for Content-Disposition header to handle Unicode characters
import urllib.parse
encoded_filename = urllib.parse.quote(fileData.fileName)
return Response(
content=fileContent,
media_type=fileData.mimeType,
headers={
"Content-Disposition": f"attachment; filename*=UTF-8''{encoded_filename}"
}
)
except HTTPException:
raise
except Exception as e:
logger.error(f"Error downloading file: {str(e)}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Error downloading file: {str(e)}"
)
@router.get("/{fileId}/preview", response_model=FilePreview)
@limiter.limit("30/minute")
def preview_file(
request: Request,
fileId: str = Path(..., description="ID of the file to preview"),
currentUser: User = Depends(getCurrentUser),
context: RequestContext = Depends(getRequestContext)
) -> FilePreview:
"""Preview a file's content. Resolves the file's mandate/instance scope automatically."""
try:
managementInterface, fileMeta = _resolveFileWithScope(currentUser, context, fileId)
if not fileMeta:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"File with ID {fileId} not found"
)
preview = managementInterface.getFileContent(fileId)
if not preview:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"File with ID {fileId} not found or no content available"
)
return preview
except HTTPException:
raise
except Exception as e:
logger.error(f"Error previewing file: {str(e)}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Error previewing file: {str(e)}"
)
@router.post("/{fileId}/move")
@limiter.limit("10/minute")
def move_file(
request: Request,
fileId: str = Path(...),
body: Dict[str, Any] = Body(...),
currentUser: User = Depends(getCurrentUser),
context: RequestContext = Depends(getRequestContext)
) -> Dict[str, Any]:
"""Move a file to a different folder."""
targetFolderId = body.get("targetFolderId")
try:
mgmt = interfaceDbManagement.getInterface(
currentUser,
mandateId=str(context.mandateId) if context.mandateId else None,
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
)
mgmt.updateFile(fileId, {"folderId": targetFolderId})
if targetFolderId:
try:
targetFolder = mgmt.getFolder(targetFolderId)
folderNeut = (targetFolder.get("neutralize") if isinstance(targetFolder, dict)
else getattr(targetFolder, "neutralize", False)) if targetFolder else False
if folderNeut:
mgmt.updateFile(fileId, {"neutralize": True})
logger.info("File %s moved to neutralized folder %s — inherited neutralize=True", fileId, targetFolderId)
except Exception as e:
logger.warning("File move: folder neutralize inheritance check failed for %s: %s", fileId, e)
return {"success": True, "fileId": fileId, "folderId": targetFolderId}
except Exception as e:
logger.error(f"Error moving file: {e}")
raise HTTPException(status_code=500, detail=str(e))