1373 lines
56 KiB
Python
1373 lines
56 KiB
Python
# Copyright (c) 2025 Patrick Motsch
|
|
# All rights reserved.
|
|
from fastapi import APIRouter, HTTPException, Depends, File, UploadFile, Form, Path, Request, status, Query, Response, Body, BackgroundTasks
|
|
from fastapi.responses import JSONResponse
|
|
from typing import List, Dict, Any, Optional
|
|
import logging
|
|
import json
|
|
import math
|
|
|
|
# Import auth module
|
|
from modules.auth import limiter, getCurrentUser, getRequestContext, RequestContext
|
|
|
|
# Import interfaces
|
|
import modules.interfaces.interfaceDbManagement as interfaceDbManagement
|
|
from modules.datamodels.datamodelFiles import FileItem, FilePreview, FileFolder
|
|
from modules.shared.attributeUtils import getModelAttributeDefinitions
|
|
from modules.datamodels.datamodelUam import User
|
|
from modules.datamodels.datamodelPagination import PaginationParams, PaginatedResponse, PaginationMetadata, normalize_pagination_dict
|
|
from modules.shared.i18nRegistry import apiRouteContext
|
|
from modules.routes.routeHelpers import enrichRowsWithFkLabels
|
|
routeApiMsg = apiRouteContext("routeDataFiles")
|
|
|
|
# Configure logger
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def _resolveFileWithScope(currentUser: User, context: RequestContext, fileId: str):
|
|
"""Returns (managementInterface, fileItem) with RBAC scoped to the file's own mandate/instance.
|
|
|
|
Files generated by workflows (e.g. AI report outputs) carry their own
|
|
mandateId/featureInstanceId. Direct download links via <a href> cannot send
|
|
custom scope headers, so we resolve the scope from the FileItem itself and
|
|
re-check RBAC in that scope.
|
|
|
|
Returns (None, None) if the file does not exist or the user lacks access
|
|
in the file's actual scope.
|
|
"""
|
|
requestMandateId = str(context.mandateId) if context.mandateId else None
|
|
requestInstanceId = str(context.featureInstanceId) if context.featureInstanceId else None
|
|
|
|
mgmt = interfaceDbManagement.getInterface(
|
|
currentUser,
|
|
mandateId=requestMandateId,
|
|
featureInstanceId=requestInstanceId,
|
|
)
|
|
fileItem = mgmt.getFile(fileId)
|
|
if fileItem:
|
|
return mgmt, fileItem
|
|
|
|
metas = mgmt.db.getRecordset(FileItem, recordFilter={"id": fileId})
|
|
if not metas:
|
|
return None, None
|
|
|
|
meta = metas[0]
|
|
fileMandateId = meta.get("mandateId") or None
|
|
fileInstanceId = meta.get("featureInstanceId") or None
|
|
|
|
if not fileMandateId and not fileInstanceId:
|
|
return None, None
|
|
|
|
if fileMandateId == requestMandateId and fileInstanceId == requestInstanceId:
|
|
return None, None
|
|
|
|
scopedMgmt = interfaceDbManagement.getInterface(
|
|
currentUser,
|
|
mandateId=fileMandateId,
|
|
featureInstanceId=fileInstanceId,
|
|
)
|
|
fileItem = scopedMgmt.getFile(fileId)
|
|
if not fileItem:
|
|
return None, None
|
|
|
|
return scopedMgmt, fileItem
|
|
|
|
|
|
async def _autoIndexFile(fileId: str, fileName: str, mimeType: str, user, *, mandateId: str = None, featureInstanceId: str = None):
|
|
"""Background task: pre-scan + extraction + knowledge indexing.
|
|
Step 1: Structure Pre-Scan (AI-free) -> FileContentIndex (persisted)
|
|
Step 2: Content extraction via runExtraction -> ContentParts
|
|
Step 3: KnowledgeService.requestIngestion -> idempotent chunking + embedding -> Knowledge Store"""
|
|
userId = user.id if hasattr(user, "id") else str(user)
|
|
try:
|
|
mgmtInterface = interfaceDbManagement.getInterface(
|
|
user,
|
|
mandateId=mandateId or None,
|
|
featureInstanceId=featureInstanceId or None,
|
|
)
|
|
mgmtInterface.updateFile(fileId, {"status": "processing"})
|
|
|
|
rawBytes = mgmtInterface.getFileData(fileId)
|
|
if not rawBytes:
|
|
logger.warning(f"Auto-index: no file data for {fileId}, skipping")
|
|
mgmtInterface.updateFile(fileId, {"status": "active"})
|
|
return
|
|
|
|
file_meta = mgmtInterface.getFile(fileId)
|
|
feature_instance_id = ""
|
|
mandate_id = ""
|
|
file_scope = "personal"
|
|
if file_meta:
|
|
if isinstance(file_meta, dict):
|
|
feature_instance_id = file_meta.get("featureInstanceId") or ""
|
|
mandate_id = file_meta.get("mandateId") or ""
|
|
file_scope = file_meta.get("scope") or "personal"
|
|
else:
|
|
feature_instance_id = getattr(file_meta, "featureInstanceId", None) or ""
|
|
mandate_id = getattr(file_meta, "mandateId", None) or ""
|
|
file_scope = getattr(file_meta, "scope", None) or "personal"
|
|
|
|
logger.info(f"Auto-index starting for {fileName} ({len(rawBytes)} bytes, {mimeType})")
|
|
|
|
# Step 1: Structure Pre-Scan (AI-free)
|
|
from modules.serviceCenter.services.serviceKnowledge.subPreScan import preScanDocument
|
|
contentIndex = await preScanDocument(
|
|
fileData=rawBytes,
|
|
mimeType=mimeType,
|
|
fileId=fileId,
|
|
fileName=fileName,
|
|
userId=userId,
|
|
featureInstanceId=str(feature_instance_id) if feature_instance_id else "",
|
|
mandateId=str(mandate_id) if mandate_id else "",
|
|
scope=file_scope,
|
|
)
|
|
logger.info(
|
|
f"Pre-scan complete for {fileName}: "
|
|
f"{contentIndex.totalObjects} objects"
|
|
)
|
|
|
|
# Persist FileContentIndex immediately.
|
|
# IMPORTANT: preserve `_ingestion` metadata and `status="indexed"` from any
|
|
# prior successful run — otherwise this upsert wipes the idempotency cache
|
|
# and requestIngestion cannot detect duplicates (AC4 breaks).
|
|
from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface
|
|
knowledgeDb = getKnowledgeInterface()
|
|
try:
|
|
_existing = knowledgeDb.getFileContentIndex(fileId)
|
|
except Exception:
|
|
_existing = None
|
|
if _existing:
|
|
_existingStruct = (
|
|
_existing.get("structure") if isinstance(_existing, dict)
|
|
else getattr(_existing, "structure", {})
|
|
) or {}
|
|
_existingStatus = (
|
|
_existing.get("status") if isinstance(_existing, dict)
|
|
else getattr(_existing, "status", "")
|
|
) or ""
|
|
if "_ingestion" in _existingStruct:
|
|
contentIndex.structure = dict(contentIndex.structure or {})
|
|
contentIndex.structure["_ingestion"] = _existingStruct["_ingestion"]
|
|
if _existingStatus == "indexed":
|
|
contentIndex.status = "indexed"
|
|
knowledgeDb.upsertFileContentIndex(contentIndex)
|
|
|
|
# Step 2: Content extraction (AI-free, produces ContentParts)
|
|
from modules.serviceCenter.services.serviceExtraction.subRegistry import ExtractorRegistry, ChunkerRegistry
|
|
from modules.serviceCenter.services.serviceExtraction.subPipeline import runExtraction
|
|
from modules.datamodels.datamodelExtraction import ExtractionOptions
|
|
|
|
extractorRegistry = ExtractorRegistry()
|
|
chunkerRegistry = ChunkerRegistry()
|
|
# mergeStrategy=None: keep per-page / per-section granularity for RAG ingestion.
|
|
# The default MergeStrategy concatenates all text parts into a single blob, which
|
|
# collapses a 500-page PDF into one ContentChunk and destroys semantic retrieval.
|
|
options = ExtractionOptions(mergeStrategy=None)
|
|
|
|
extracted = runExtraction(
|
|
extractorRegistry, chunkerRegistry,
|
|
rawBytes, fileName, mimeType, options,
|
|
)
|
|
|
|
contentObjects = []
|
|
for part in extracted.parts:
|
|
contentType = "text"
|
|
if part.typeGroup == "image":
|
|
contentType = "image"
|
|
elif part.typeGroup in ("binary", "container"):
|
|
contentType = "other"
|
|
|
|
if not part.data or not part.data.strip():
|
|
continue
|
|
|
|
contentObjects.append({
|
|
"contentObjectId": part.id,
|
|
"contentType": contentType,
|
|
"data": part.data,
|
|
"contextRef": {
|
|
"containerPath": fileName,
|
|
"location": part.label or "file",
|
|
**(part.metadata or {}),
|
|
},
|
|
})
|
|
|
|
logger.info(f"Extracted {len(contentObjects)} content objects from {fileName}")
|
|
|
|
if not contentObjects:
|
|
knowledgeDb.updateFileStatus(fileId, "indexed")
|
|
mgmtInterface.updateFile(fileId, {"status": "active"})
|
|
return
|
|
|
|
# Step 3: Knowledge indexing (chunking + embedding)
|
|
from modules.serviceCenter import getService
|
|
from modules.serviceCenter.context import ServiceCenterContext
|
|
|
|
ctx = ServiceCenterContext(
|
|
user=user,
|
|
mandate_id=str(mandate_id) if mandate_id else "",
|
|
feature_instance_id=str(feature_instance_id) if feature_instance_id else "",
|
|
)
|
|
knowledgeService = getService("knowledge", ctx)
|
|
|
|
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
|
|
|
|
await knowledgeService.requestIngestion(
|
|
IngestionJob(
|
|
sourceKind="file",
|
|
sourceId=fileId,
|
|
fileName=fileName,
|
|
mimeType=mimeType,
|
|
userId=userId,
|
|
featureInstanceId=str(feature_instance_id) if feature_instance_id else "",
|
|
mandateId=str(mandate_id) if mandate_id else "",
|
|
contentObjects=contentObjects,
|
|
structure=contentIndex.structure,
|
|
provenance={"lane": "upload", "route": "routeDataFiles._autoIndexFile"},
|
|
)
|
|
)
|
|
|
|
# Re-acquire interface after await to avoid stale user context from the singleton
|
|
mgmtInterface = interfaceDbManagement.getInterface(user)
|
|
mgmtInterface.updateFile(fileId, {"status": "active"})
|
|
logger.info(f"Auto-index complete for file {fileId} ({fileName})")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Auto-index failed for file {fileId}: {e}", exc_info=True)
|
|
try:
|
|
errMgmt = interfaceDbManagement.getInterface(user)
|
|
errMgmt.updateFile(fileId, {"status": "active"})
|
|
except Exception:
|
|
pass
|
|
|
|
# Model attributes for FileItem
|
|
fileAttributes = getModelAttributeDefinitions(FileItem)
|
|
|
|
# Create router for file endpoints
|
|
router = APIRouter(
|
|
prefix="/api/files",
|
|
tags=["Manage Files"],
|
|
responses={
|
|
404: {"description": "Not found"},
|
|
400: {"description": "Bad request"},
|
|
401: {"description": "Unauthorized"},
|
|
403: {"description": "Forbidden"},
|
|
500: {"description": "Internal server error"}
|
|
}
|
|
)
|
|
|
|
|
|
@router.get("/folders/tree")
|
|
@limiter.limit("120/minute")
|
|
def get_folder_tree(
|
|
request: Request,
|
|
owner: str = Query("me", description="'me' | 'shared'"),
|
|
currentUser: User = Depends(getCurrentUser),
|
|
context: RequestContext = Depends(getRequestContext),
|
|
):
|
|
try:
|
|
managementInterface = interfaceDbManagement.getInterface(
|
|
currentUser,
|
|
mandateId=str(context.mandateId) if context.mandateId else None,
|
|
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
|
|
)
|
|
o = (owner or "me").strip().lower()
|
|
if o == "me":
|
|
return managementInterface.getOwnFolderTree()
|
|
if o == "shared":
|
|
return managementInterface.getSharedFolderTree()
|
|
raise HTTPException(status_code=400, detail="owner must be 'me' or 'shared'")
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"get_folder_tree error: {e}")
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
@router.post("/folders", status_code=status.HTTP_201_CREATED)
|
|
@limiter.limit("30/minute")
|
|
def create_folder(
|
|
request: Request,
|
|
body: Dict[str, Any] = Body(...),
|
|
currentUser: User = Depends(getCurrentUser),
|
|
context: RequestContext = Depends(getRequestContext),
|
|
):
|
|
try:
|
|
name = body.get("name")
|
|
if not name or not str(name).strip():
|
|
raise HTTPException(status_code=400, detail="name is required")
|
|
parentId = body.get("parentId") or None
|
|
managementInterface = interfaceDbManagement.getInterface(
|
|
currentUser,
|
|
mandateId=str(context.mandateId) if context.mandateId else None,
|
|
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
|
|
)
|
|
return managementInterface.createFolder(str(name).strip(), parentId)
|
|
except PermissionError as e:
|
|
raise HTTPException(status_code=403, detail=str(e))
|
|
except interfaceDbManagement.FileNotFoundError as e:
|
|
raise HTTPException(status_code=404, detail=str(e))
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"create_folder error: {e}")
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
@router.patch("/folders/{folderId}")
|
|
@limiter.limit("30/minute")
|
|
def rename_folder(
|
|
request: Request,
|
|
folderId: str = Path(...),
|
|
body: Dict[str, Any] = Body(...),
|
|
currentUser: User = Depends(getCurrentUser),
|
|
context: RequestContext = Depends(getRequestContext),
|
|
):
|
|
try:
|
|
name = body.get("name")
|
|
if not name or not str(name).strip():
|
|
raise HTTPException(status_code=400, detail="name is required")
|
|
managementInterface = interfaceDbManagement.getInterface(
|
|
currentUser,
|
|
mandateId=str(context.mandateId) if context.mandateId else None,
|
|
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
|
|
)
|
|
return managementInterface.renameFolder(folderId, str(name).strip())
|
|
except PermissionError as e:
|
|
raise HTTPException(status_code=403, detail=str(e))
|
|
except interfaceDbManagement.FileNotFoundError as e:
|
|
raise HTTPException(status_code=404, detail=str(e))
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"rename_folder error: {e}")
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
@router.post("/folders/{folderId}/move")
|
|
@limiter.limit("30/minute")
|
|
def move_folder(
|
|
request: Request,
|
|
folderId: str = Path(...),
|
|
body: Dict[str, Any] = Body(...),
|
|
currentUser: User = Depends(getCurrentUser),
|
|
context: RequestContext = Depends(getRequestContext),
|
|
):
|
|
try:
|
|
newParentId = body.get("parentId")
|
|
managementInterface = interfaceDbManagement.getInterface(
|
|
currentUser,
|
|
mandateId=str(context.mandateId) if context.mandateId else None,
|
|
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
|
|
)
|
|
return managementInterface.moveFolder(folderId, newParentId or None)
|
|
except ValueError as e:
|
|
raise HTTPException(status_code=400, detail=str(e))
|
|
except PermissionError as e:
|
|
raise HTTPException(status_code=403, detail=str(e))
|
|
except interfaceDbManagement.FileNotFoundError as e:
|
|
raise HTTPException(status_code=404, detail=str(e))
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"move_folder error: {e}")
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
@router.delete("/folders/{folderId}")
|
|
@limiter.limit("30/minute")
|
|
def delete_folder(
|
|
request: Request,
|
|
folderId: str = Path(...),
|
|
cascade: bool = Query(True, description="Cascade delete sub-folders and files"),
|
|
currentUser: User = Depends(getCurrentUser),
|
|
context: RequestContext = Depends(getRequestContext),
|
|
):
|
|
try:
|
|
managementInterface = interfaceDbManagement.getInterface(
|
|
currentUser,
|
|
mandateId=str(context.mandateId) if context.mandateId else None,
|
|
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
|
|
)
|
|
return managementInterface.deleteFolderCascade(folderId)
|
|
except PermissionError as e:
|
|
raise HTTPException(status_code=403, detail=str(e))
|
|
except interfaceDbManagement.FileNotFoundError as e:
|
|
raise HTTPException(status_code=404, detail=str(e))
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"delete_folder error: {e}")
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
@router.patch("/folders/{folderId}/scope")
|
|
@limiter.limit("30/minute")
|
|
def patch_folder_scope(
|
|
request: Request,
|
|
folderId: str = Path(...),
|
|
body: Dict[str, Any] = Body(...),
|
|
currentUser: User = Depends(getCurrentUser),
|
|
context: RequestContext = Depends(getRequestContext),
|
|
):
|
|
try:
|
|
scope = body.get("scope")
|
|
if not scope:
|
|
raise HTTPException(status_code=400, detail="scope is required")
|
|
cascadeToFiles = body.get("cascadeToFiles", False)
|
|
managementInterface = interfaceDbManagement.getInterface(
|
|
currentUser,
|
|
mandateId=str(context.mandateId) if context.mandateId else None,
|
|
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
|
|
)
|
|
return managementInterface.patchFolderScope(folderId, scope, cascadeToFiles)
|
|
except ValueError as e:
|
|
raise HTTPException(status_code=400, detail=str(e))
|
|
except PermissionError as e:
|
|
raise HTTPException(status_code=403, detail=str(e))
|
|
except interfaceDbManagement.FileNotFoundError as e:
|
|
raise HTTPException(status_code=404, detail=str(e))
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"patch_folder_scope error: {e}")
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
@router.patch("/folders/{folderId}/neutralize")
|
|
@limiter.limit("30/minute")
|
|
def patch_folder_neutralize(
|
|
request: Request,
|
|
folderId: str = Path(...),
|
|
body: Dict[str, Any] = Body(...),
|
|
currentUser: User = Depends(getCurrentUser),
|
|
context: RequestContext = Depends(getRequestContext),
|
|
):
|
|
try:
|
|
neutralize = body.get("neutralize")
|
|
if neutralize is None:
|
|
raise HTTPException(status_code=400, detail="neutralize is required")
|
|
managementInterface = interfaceDbManagement.getInterface(
|
|
currentUser,
|
|
mandateId=str(context.mandateId) if context.mandateId else None,
|
|
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
|
|
)
|
|
return managementInterface.patchFolderNeutralize(folderId, bool(neutralize))
|
|
except PermissionError as e:
|
|
raise HTTPException(status_code=403, detail=str(e))
|
|
except interfaceDbManagement.FileNotFoundError as e:
|
|
raise HTTPException(status_code=404, detail=str(e))
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"patch_folder_neutralize error: {e}")
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
@router.get("/list")
|
|
@limiter.limit("120/minute")
|
|
def get_files(
|
|
request: Request,
|
|
pagination: Optional[str] = Query(None, description="JSON-encoded PaginationParams object"),
|
|
mode: Optional[str] = Query(None, description="'filterValues' for distinct column values, 'ids' for all filtered IDs"),
|
|
column: Optional[str] = Query(None, description="Column key (required when mode=filterValues)"),
|
|
currentUser: User = Depends(getCurrentUser),
|
|
context: RequestContext = Depends(getRequestContext)
|
|
):
|
|
"""
|
|
Get files with optional pagination, sorting, and filtering.
|
|
|
|
Query Parameters:
|
|
- pagination: JSON-encoded PaginationParams object, or None for no pagination
|
|
|
|
Examples:
|
|
- GET /api/files/list (no pagination - returns all items)
|
|
- GET /api/files/list?pagination={"page":1,"pageSize":10,"sort":[]}
|
|
- GET /api/files/list?pagination={"page":2,"pageSize":20,"sort":[{"field":"fileName","direction":"asc"}]}
|
|
"""
|
|
try:
|
|
# Parse pagination parameter
|
|
paginationParams = None
|
|
if pagination:
|
|
try:
|
|
paginationDict = json.loads(pagination)
|
|
if paginationDict:
|
|
paginationDict = normalize_pagination_dict(paginationDict)
|
|
paginationParams = PaginationParams(**paginationDict)
|
|
except (json.JSONDecodeError, ValueError) as e:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail=f"Invalid pagination parameter: {str(e)}"
|
|
)
|
|
|
|
from modules.routes.routeHelpers import (
|
|
handleIdsMode,
|
|
handleFilterValuesInMemory,
|
|
resolveView, applyViewToParams, buildGroupLayout, effective_group_by_levels,
|
|
)
|
|
import modules.interfaces.interfaceDbApp as _appIface
|
|
from modules.datamodels.datamodelPagination import AppliedViewMeta
|
|
|
|
managementInterface = interfaceDbManagement.getInterface(
|
|
currentUser,
|
|
mandateId=str(context.mandateId) if context.mandateId else None,
|
|
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None
|
|
)
|
|
appInterface = _appIface.getInterface(currentUser)
|
|
|
|
# Resolve view and merge config into params
|
|
viewKey = paginationParams.viewKey if paginationParams else None
|
|
viewConfig, viewDisplayName = resolveView(appInterface, "files/list", viewKey)
|
|
viewMeta = AppliedViewMeta(viewKey=viewKey, displayName=viewDisplayName) if viewKey else None
|
|
paginationParams = applyViewToParams(paginationParams, viewConfig)
|
|
groupByLevels = effective_group_by_levels(paginationParams, viewConfig)
|
|
|
|
def _filesToDicts(fileItems):
|
|
return [f.model_dump() if hasattr(f, "model_dump") else (dict(f) if not isinstance(f, dict) else f) for f in fileItems]
|
|
|
|
if mode == "groupSummary":
|
|
if not pagination:
|
|
raise HTTPException(status_code=400, detail="pagination required for groupSummary")
|
|
from modules.routes.routeHelpers import (
|
|
apply_strategy_b_filters_and_sort,
|
|
build_group_summary_groups,
|
|
)
|
|
if not groupByLevels or not groupByLevels[0].get("field"):
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail="groupByLevels[0].field required for groupSummary",
|
|
)
|
|
field = groupByLevels[0]["field"]
|
|
null_label = str(groupByLevels[0].get("nullLabel") or "—")
|
|
allFiles = managementInterface.getAllFiles()
|
|
allItems = enrichRowsWithFkLabels(
|
|
_filesToDicts(allFiles if isinstance(allFiles, list) else (allFiles.items if hasattr(allFiles, "items") else [])),
|
|
FileItem,
|
|
)
|
|
filtered = apply_strategy_b_filters_and_sort(allItems, paginationParams, currentUser)
|
|
groups_out = build_group_summary_groups(filtered, field, null_label)
|
|
return JSONResponse(content={"groups": groups_out})
|
|
|
|
if mode == "filterValues":
|
|
if not column:
|
|
raise HTTPException(status_code=400, detail="column parameter required for mode=filterValues")
|
|
allFiles = managementInterface.getAllFiles()
|
|
items = allFiles if isinstance(allFiles, list) else (allFiles.items if hasattr(allFiles, "items") else [])
|
|
itemDicts = _filesToDicts(items)
|
|
enrichRowsWithFkLabels(itemDicts, FileItem)
|
|
return handleFilterValuesInMemory(itemDicts, column, pagination)
|
|
|
|
if mode == "ids":
|
|
recordFilter = {"sysCreatedBy": managementInterface.userId}
|
|
return handleIdsMode(managementInterface.db, FileItem, pagination, recordFilter)
|
|
|
|
if not groupByLevels:
|
|
# No grouping: let DB handle pagination directly (fastest path)
|
|
result = managementInterface.getAllFiles(pagination=paginationParams)
|
|
if paginationParams and hasattr(result, 'items'):
|
|
enriched = enrichRowsWithFkLabels(_filesToDicts(result.items), FileItem)
|
|
resp: dict = {
|
|
"items": enriched,
|
|
"pagination": PaginationMetadata(
|
|
currentPage=paginationParams.page,
|
|
pageSize=paginationParams.pageSize,
|
|
totalItems=result.totalItems,
|
|
totalPages=result.totalPages,
|
|
sort=paginationParams.sort,
|
|
filters=paginationParams.filters
|
|
).model_dump(),
|
|
}
|
|
else:
|
|
items = result if isinstance(result, list) else (result.items if hasattr(result, "items") else [result])
|
|
resp = {"items": enrichRowsWithFkLabels(_filesToDicts(items), FileItem), "pagination": None}
|
|
if viewMeta:
|
|
resp["appliedView"] = viewMeta.model_dump()
|
|
return resp
|
|
|
|
# Strategy B grouping: load full list, group, then slice
|
|
allFiles = managementInterface.getAllFiles()
|
|
allItems = enrichRowsWithFkLabels(
|
|
_filesToDicts(allFiles if isinstance(allFiles, list) else (allFiles.items if hasattr(allFiles, "items") else [])),
|
|
FileItem,
|
|
)
|
|
|
|
from modules.routes.routeHelpers import apply_strategy_b_filters_and_sort
|
|
if paginationParams.filters or paginationParams.sort:
|
|
allItems = apply_strategy_b_filters_and_sort(allItems, paginationParams, currentUser)
|
|
|
|
if not paginationParams:
|
|
resp = {"items": allItems, "pagination": None}
|
|
if viewMeta:
|
|
resp["appliedView"] = viewMeta.model_dump()
|
|
return resp
|
|
|
|
totalItems = len(allItems)
|
|
totalPages = math.ceil(totalItems / paginationParams.pageSize) if totalItems > 0 else 0
|
|
page_items, groupLayout = buildGroupLayout(allItems, groupByLevels, paginationParams.page, paginationParams.pageSize)
|
|
|
|
resp = {
|
|
"items": page_items,
|
|
"pagination": PaginationMetadata(
|
|
currentPage=paginationParams.page,
|
|
pageSize=paginationParams.pageSize,
|
|
totalItems=totalItems,
|
|
totalPages=totalPages,
|
|
sort=paginationParams.sort,
|
|
filters=paginationParams.filters
|
|
).model_dump(),
|
|
}
|
|
if groupLayout:
|
|
resp["groupLayout"] = groupLayout.model_dump()
|
|
if viewMeta:
|
|
resp["appliedView"] = viewMeta.model_dump()
|
|
return resp
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"Error getting files: {str(e)}")
|
|
raise HTTPException(
|
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
detail=f"Failed to get files: {str(e)}"
|
|
)
|
|
|
|
|
|
def _LEGACY_addFileToGroup_REMOVED():
|
|
"""Removed — file-group tree no longer exists. Use multi-select bulk operations."""
|
|
pass
|
|
|
|
|
|
|
|
|
|
@router.post("/upload", status_code=status.HTTP_201_CREATED)
|
|
@limiter.limit("10/minute")
|
|
async def upload_file(
|
|
request: Request,
|
|
file: UploadFile = File(...),
|
|
workflowId: Optional[str] = Form(None),
|
|
featureInstanceId: Optional[str] = Form(None),
|
|
currentUser: User = Depends(getCurrentUser),
|
|
context: RequestContext = Depends(getRequestContext),
|
|
) -> JSONResponse:
|
|
# Add fileName property to UploadFile for consistency with backend model
|
|
file.fileName = file.filename
|
|
"""Upload a file"""
|
|
try:
|
|
managementInterface = interfaceDbManagement.getInterface(
|
|
currentUser,
|
|
mandateId=str(context.mandateId) if context.mandateId else None,
|
|
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
|
|
)
|
|
|
|
# Read file
|
|
fileContent = await file.read()
|
|
|
|
# Check size limits
|
|
maxSize = int(interfaceDbManagement.APP_CONFIG.get("File_Management_MAX_UPLOAD_SIZE_MB")) * 1024 * 1024 # in bytes
|
|
if len(fileContent) > maxSize:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_413_REQUEST_ENTITY_TOO_LARGE,
|
|
detail=f"File too large. Maximum size: {interfaceDbManagement.APP_CONFIG.get('File_Management_MAX_UPLOAD_SIZE_MB')}MB"
|
|
)
|
|
|
|
# Save file via LucyDOM interface in the database
|
|
fileItem, duplicateType = managementInterface.saveUploadedFile(
|
|
fileContent, file.filename
|
|
)
|
|
|
|
if featureInstanceId and not fileItem.featureInstanceId:
|
|
managementInterface.updateFile(fileItem.id, {"featureInstanceId": featureInstanceId})
|
|
fileItem.featureInstanceId = featureInstanceId
|
|
|
|
# Determine response message based on duplicate type
|
|
if duplicateType == "exact_duplicate":
|
|
message = f"File '{file.filename}' already exists with identical content. Reusing existing file."
|
|
elif duplicateType == "name_conflict":
|
|
message = f"File '{file.filename}' already exists with different content. Uploaded as '{fileItem.fileName}'."
|
|
else: # new_file
|
|
message = "File uploaded successfully"
|
|
|
|
# Convert FileItem to dictionary for JSON response
|
|
fileMeta = fileItem.model_dump()
|
|
|
|
# If workflowId is provided, include it in the response (not stored in FileItem model)
|
|
if workflowId:
|
|
fileMeta["workflowId"] = workflowId
|
|
|
|
# Trigger background auto-index pipeline (non-blocking)
|
|
# Also runs for duplicates in case the original was never successfully indexed
|
|
shouldIndex = duplicateType == "new_file"
|
|
if not shouldIndex:
|
|
try:
|
|
from modules.interfaces.interfaceDbKnowledge import getInterface as _getKnowledgeInterface
|
|
_kDb = _getKnowledgeInterface()
|
|
_existingIndex = _kDb.getFileContentIndex(fileItem.id)
|
|
if not _existingIndex:
|
|
shouldIndex = True
|
|
logger.info(f"Re-triggering auto-index for duplicate {fileItem.id} (not yet indexed)")
|
|
except Exception:
|
|
shouldIndex = True
|
|
|
|
if shouldIndex:
|
|
try:
|
|
import asyncio
|
|
asyncio.ensure_future(_autoIndexFile(
|
|
fileId=fileItem.id,
|
|
fileName=fileItem.fileName,
|
|
mimeType=fileItem.mimeType,
|
|
user=currentUser,
|
|
mandateId=str(context.mandateId) if context.mandateId else None,
|
|
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
|
|
))
|
|
except Exception as indexErr:
|
|
logger.warning(f"Auto-index trigger failed (non-blocking): {indexErr}")
|
|
|
|
# Response with duplicate information
|
|
return JSONResponse({
|
|
"message": message,
|
|
"file": fileMeta,
|
|
"duplicateType": duplicateType,
|
|
"originalFileName": file.filename,
|
|
"storedFileName": fileItem.fileName,
|
|
"isDuplicate": duplicateType != "new_file"
|
|
})
|
|
|
|
except interfaceDbManagement.FileStorageError as e:
|
|
logger.error(f"Error during file upload (storage): {str(e)}")
|
|
raise HTTPException(
|
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
detail=str(e)
|
|
)
|
|
except Exception as e:
|
|
logger.error(f"Error during file upload: {str(e)}")
|
|
raise HTTPException(
|
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
detail=f"Error during file upload: {str(e)}"
|
|
)
|
|
|
|
|
|
|
|
@router.post("/batch-delete")
|
|
@limiter.limit("10/minute")
|
|
def batch_delete_items(
|
|
request: Request,
|
|
body: Dict[str, Any] = Body(...),
|
|
currentUser: User = Depends(getCurrentUser),
|
|
context: RequestContext = Depends(getRequestContext)
|
|
) -> Dict[str, Any]:
|
|
"""Batch delete files."""
|
|
fileIds = body.get("fileIds") or []
|
|
|
|
if not isinstance(fileIds, list):
|
|
raise HTTPException(status_code=400, detail=routeApiMsg("fileIds must be an array"))
|
|
|
|
try:
|
|
mgmt = interfaceDbManagement.getInterface(
|
|
currentUser,
|
|
mandateId=str(context.mandateId) if context.mandateId else None,
|
|
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
|
|
)
|
|
|
|
result = {"deletedFiles": 0}
|
|
|
|
if fileIds:
|
|
fileResult = mgmt.deleteFilesBatch(fileIds)
|
|
result["deletedFiles"] += fileResult.get("deletedFiles", 0)
|
|
|
|
return result
|
|
except ValueError as e:
|
|
raise HTTPException(status_code=400, detail=str(e))
|
|
except Exception as e:
|
|
logger.error(f"Error in batch delete: {e}")
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
@router.post("/batch-download")
|
|
@limiter.limit("10/minute")
|
|
def batchDownload(
|
|
request: Request,
|
|
body: Dict[str, Any] = Body(...),
|
|
currentUser: User = Depends(getCurrentUser),
|
|
context: RequestContext = Depends(getRequestContext),
|
|
):
|
|
"""Download multiple files and/or folders as a single ZIP archive,
|
|
preserving the folder hierarchy as ZIP paths."""
|
|
import io, zipfile
|
|
|
|
fileIds = body.get("fileIds") or []
|
|
folderIds = body.get("folderIds") or []
|
|
|
|
if not fileIds and not folderIds:
|
|
raise HTTPException(status_code=400, detail="fileIds or folderIds required")
|
|
|
|
try:
|
|
mgmt = interfaceDbManagement.getInterface(
|
|
currentUser,
|
|
mandateId=str(context.mandateId) if context.mandateId else None,
|
|
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
|
|
)
|
|
|
|
folderCache: dict[str, dict] = {}
|
|
|
|
def _getFolder(fid: str):
|
|
if fid not in folderCache:
|
|
f = mgmt.getFolder(fid)
|
|
folderCache[fid] = f if f else {}
|
|
return folderCache[fid]
|
|
|
|
def _folderPath(fid: str) -> str:
|
|
"""Build the full path for a folder by walking up parentId."""
|
|
parts: list[str] = []
|
|
current = fid
|
|
visited: set[str] = set()
|
|
while current and current not in visited:
|
|
visited.add(current)
|
|
folder = _getFolder(current)
|
|
if not folder:
|
|
break
|
|
parts.append(folder.get("name", current))
|
|
current = folder.get("parentId")
|
|
parts.reverse()
|
|
return "/".join(parts)
|
|
|
|
# Collect files from requested folders (recursive)
|
|
fileEntries: list[tuple[str, str]] = []
|
|
seenFileIds: set[str] = set()
|
|
|
|
for fid in folderIds:
|
|
childFolderIds = mgmt._collectChildFolderIds(fid)
|
|
for cfid in childFolderIds:
|
|
prefix = _folderPath(cfid)
|
|
items = mgmt.db.getRecordset(FileItem, recordFilter={"folderId": cfid})
|
|
for item in items:
|
|
itemId = item.get("id") if isinstance(item, dict) else getattr(item, "id", None)
|
|
if itemId and itemId not in seenFileIds:
|
|
seenFileIds.add(itemId)
|
|
fileEntries.append((itemId, prefix))
|
|
|
|
# Loose files (not via folder selection)
|
|
for fid in fileIds:
|
|
if fid in seenFileIds:
|
|
continue
|
|
seenFileIds.add(fid)
|
|
fileMeta = mgmt.getFile(fid)
|
|
if not fileMeta:
|
|
continue
|
|
fileFolderId = fileMeta.get("folderId") if isinstance(fileMeta, dict) else getattr(fileMeta, "folderId", None)
|
|
prefix = _folderPath(fileFolderId) if fileFolderId else ""
|
|
fileEntries.append((fid, prefix))
|
|
|
|
if not fileEntries:
|
|
raise HTTPException(status_code=404, detail="No downloadable files found")
|
|
|
|
buf = io.BytesIO()
|
|
with zipfile.ZipFile(buf, "w", zipfile.ZIP_DEFLATED) as zf:
|
|
for fid, prefix in fileEntries:
|
|
try:
|
|
fileMeta = mgmt.getFile(fid)
|
|
fileData = mgmt.getFileData(fid)
|
|
if fileMeta and fileData:
|
|
name = (fileMeta.get("fileName") if isinstance(fileMeta, dict) else getattr(fileMeta, "fileName", fid)) or fid
|
|
zipPath = f"{prefix}/{name}" if prefix else name
|
|
zf.writestr(zipPath, fileData)
|
|
except Exception as fe:
|
|
logger.warning(f"batch_download: skipping file {fid}: {fe}")
|
|
buf.seek(0)
|
|
from fastapi.responses import StreamingResponse
|
|
return StreamingResponse(
|
|
buf,
|
|
media_type="application/zip",
|
|
headers={"Content-Disposition": 'attachment; filename="download.zip"'},
|
|
)
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"batch_download error: {e}")
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
# ── Bulk file operations (replace former group-based bulk routes) ─────────────
|
|
|
|
@router.post("/bulk/scope")
|
|
@limiter.limit("30/minute")
|
|
def bulk_set_scope(
|
|
request: Request,
|
|
body: dict = Body(...),
|
|
currentUser: User = Depends(getCurrentUser),
|
|
context: RequestContext = Depends(getRequestContext),
|
|
):
|
|
"""Set scope for a list of files by their IDs."""
|
|
fileIds: list = body.get("fileIds") or []
|
|
scope: str = body.get("scope") or ""
|
|
if not fileIds:
|
|
raise HTTPException(status_code=400, detail="fileIds is required")
|
|
validScopes = {"personal", "featureInstance", "mandate", "global"}
|
|
if scope not in validScopes:
|
|
raise HTTPException(status_code=400, detail=f"Invalid scope. Must be one of {validScopes}")
|
|
if scope == "global" and not context.isSysAdmin:
|
|
raise HTTPException(status_code=403, detail="Only sysadmins can set global scope")
|
|
try:
|
|
managementInterface = interfaceDbManagement.getInterface(
|
|
currentUser,
|
|
mandateId=str(context.mandateId) if context.mandateId else None,
|
|
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
|
|
)
|
|
updated = 0
|
|
for fid in fileIds:
|
|
try:
|
|
managementInterface.updateFile(fid, {"scope": scope})
|
|
updated += 1
|
|
except Exception as e:
|
|
logger.error(f"bulk_set_scope: failed for file {fid}: {e}")
|
|
return {"scope": scope, "filesUpdated": updated}
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"bulk_set_scope error: {e}")
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
@router.post("/bulk/neutralize")
|
|
@limiter.limit("30/minute")
|
|
def bulk_set_neutralize(
|
|
request: Request,
|
|
body: dict = Body(...),
|
|
currentUser: User = Depends(getCurrentUser),
|
|
context: RequestContext = Depends(getRequestContext),
|
|
):
|
|
"""Set neutralize flag for a list of files by their IDs (incl. knowledge purge/reindex)."""
|
|
fileIds: list = body.get("fileIds") or []
|
|
neutralize = body.get("neutralize")
|
|
if not fileIds:
|
|
raise HTTPException(status_code=400, detail="fileIds is required")
|
|
if neutralize is None:
|
|
raise HTTPException(status_code=400, detail="neutralize is required")
|
|
try:
|
|
managementInterface = interfaceDbManagement.getInterface(
|
|
currentUser,
|
|
mandateId=str(context.mandateId) if context.mandateId else None,
|
|
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
|
|
)
|
|
updated = 0
|
|
for fid in fileIds:
|
|
try:
|
|
managementInterface.updateFile(fid, {"neutralize": neutralize})
|
|
if not neutralize:
|
|
try:
|
|
from modules.interfaces import interfaceDbKnowledge
|
|
kIface = interfaceDbKnowledge.getInterface(currentUser)
|
|
kIface.purgeFileKnowledge(fid)
|
|
except Exception as ke:
|
|
logger.warning(f"bulk_set_neutralize: knowledge purge failed for {fid}: {ke}")
|
|
updated += 1
|
|
except Exception as e:
|
|
logger.error(f"bulk_set_neutralize: failed for file {fid}: {e}")
|
|
return {"neutralize": neutralize, "filesUpdated": updated}
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"bulk_set_neutralize error: {e}")
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
@router.post("/bulk/download-zip")
|
|
@limiter.limit("10/minute")
|
|
async def bulk_download_zip(
|
|
request: Request,
|
|
body: dict = Body(...),
|
|
currentUser: User = Depends(getCurrentUser),
|
|
context: RequestContext = Depends(getRequestContext),
|
|
):
|
|
"""Download a list of files as a ZIP archive."""
|
|
import io, zipfile
|
|
fileIds: list = body.get("fileIds") or []
|
|
if not fileIds:
|
|
raise HTTPException(status_code=400, detail="fileIds is required")
|
|
try:
|
|
managementInterface = interfaceDbManagement.getInterface(
|
|
currentUser,
|
|
mandateId=str(context.mandateId) if context.mandateId else None,
|
|
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
|
|
)
|
|
buf = io.BytesIO()
|
|
with zipfile.ZipFile(buf, "w", zipfile.ZIP_DEFLATED) as zf:
|
|
for fid in fileIds:
|
|
try:
|
|
fileMeta = managementInterface.getFile(fid)
|
|
fileData = managementInterface.getFileData(fid)
|
|
if fileMeta and fileData:
|
|
name = (getattr(fileMeta, "fileName", None) or fid)
|
|
zf.writestr(name, fileData)
|
|
except Exception as fe:
|
|
logger.warning(f"bulk_download_zip: skipping file {fid}: {fe}")
|
|
buf.seek(0)
|
|
from fastapi.responses import StreamingResponse
|
|
return StreamingResponse(
|
|
buf,
|
|
media_type="application/zip",
|
|
headers={"Content-Disposition": 'attachment; filename="files.zip"'},
|
|
)
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"bulk_download_zip error: {e}")
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
# ── Scope & neutralize tagging endpoints (before /{fileId} catch-all) ─────────
|
|
|
|
@router.patch("/{fileId}/scope")
|
|
@limiter.limit("30/minute")
|
|
def updateFileScope(
|
|
request: Request,
|
|
background_tasks: BackgroundTasks,
|
|
fileId: str = Path(..., description="ID of the file"),
|
|
scope: str = Body(..., embed=True),
|
|
context: RequestContext = Depends(getRequestContext),
|
|
) -> Dict[str, Any]:
|
|
"""Update the scope of a file. Global scope requires sysAdmin."""
|
|
try:
|
|
validScopes = {"personal", "featureInstance", "mandate", "global"}
|
|
if scope not in validScopes:
|
|
raise HTTPException(status_code=400, detail=f"Invalid scope: {scope}. Must be one of {validScopes}")
|
|
|
|
if scope == "global" and not context.isSysAdmin:
|
|
raise HTTPException(status_code=403, detail=routeApiMsg("Only sysadmins can set global scope"))
|
|
|
|
managementInterface = interfaceDbManagement.getInterface(
|
|
context.user,
|
|
mandateId=str(context.mandateId) if context.mandateId else None,
|
|
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
|
|
)
|
|
|
|
managementInterface.updateFile(fileId, {"scope": scope})
|
|
|
|
try:
|
|
from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface
|
|
from modules.datamodels.datamodelKnowledge import FileContentIndex
|
|
knowledgeDb = getKnowledgeInterface()
|
|
indices = knowledgeDb.db.getRecordset(FileContentIndex, recordFilter={"id": fileId})
|
|
for idx in indices:
|
|
idxId = idx.get("id") if isinstance(idx, dict) else getattr(idx, "id", None)
|
|
if idxId:
|
|
knowledgeDb.db.recordModify(FileContentIndex, idxId, {"scope": scope})
|
|
except Exception as e:
|
|
logger.warning(f"Failed to update FileContentIndex scope for file {fileId}: {e}")
|
|
|
|
fileMeta = managementInterface.getFile(fileId)
|
|
if fileMeta:
|
|
fn = fileMeta.fileName if hasattr(fileMeta, "fileName") else fileMeta.get("fileName", "")
|
|
mt = fileMeta.mimeType if hasattr(fileMeta, "mimeType") else fileMeta.get("mimeType", "")
|
|
|
|
async def _runReindexAfterScopeChange():
|
|
try:
|
|
await _autoIndexFile(
|
|
fileId=fileId, fileName=fn, mimeType=mt, user=context.user,
|
|
mandateId=str(context.mandateId) if context.mandateId else None,
|
|
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
|
|
)
|
|
except Exception as ex:
|
|
logger.warning("Re-index after scope change failed for %s: %s", fileId, ex)
|
|
|
|
background_tasks.add_task(_runReindexAfterScopeChange)
|
|
|
|
return {"fileId": fileId, "scope": scope, "updated": True}
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"Error updating file scope: {e}")
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
@router.patch("/{fileId}/neutralize")
|
|
@limiter.limit("30/minute")
|
|
def updateFileNeutralize(
|
|
request: Request,
|
|
background_tasks: BackgroundTasks,
|
|
fileId: str = Path(..., description="ID of the file"),
|
|
neutralize: bool = Body(..., embed=True),
|
|
context: RequestContext = Depends(getRequestContext),
|
|
) -> Dict[str, Any]:
|
|
"""Toggle neutralization flag on a file.
|
|
|
|
FAILSAFE: When turning neutralize ON, the existing Knowledge Store index
|
|
and all content chunks are deleted SYNCHRONOUSLY before the response is
|
|
returned. The re-index happens in a background task. If re-indexing
|
|
fails the file simply has no index — no un-neutralized data can leak.
|
|
"""
|
|
try:
|
|
managementInterface = interfaceDbManagement.getInterface(
|
|
context.user,
|
|
mandateId=str(context.mandateId) if context.mandateId else None,
|
|
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
|
|
)
|
|
|
|
managementInterface.updateFile(fileId, {"neutralize": neutralize})
|
|
|
|
from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface
|
|
knowledgeDb = getKnowledgeInterface()
|
|
|
|
if neutralize:
|
|
# ── CRITICAL: purge existing (potentially un-neutralized) index
|
|
# This MUST succeed before the response is sent so that no stale
|
|
# raw-text chunks remain searchable while re-indexing runs.
|
|
try:
|
|
knowledgeDb.deleteFileContentIndex(fileId)
|
|
logger.info("Neutralize toggle ON: deleted index + chunks for file %s", fileId)
|
|
except Exception as e:
|
|
logger.error("Neutralize toggle ON: FAILED to delete index for file %s: %s", fileId, e)
|
|
raise HTTPException(
|
|
status_code=500,
|
|
detail=f"Could not purge existing index for neutralization — aborting toggle. Error: {e}",
|
|
)
|
|
else:
|
|
# Turning neutralize OFF: update metadata only; re-index will overwrite
|
|
try:
|
|
from modules.datamodels.datamodelKnowledge import FileContentIndex
|
|
indices = knowledgeDb.db.getRecordset(FileContentIndex, recordFilter={"id": fileId})
|
|
for idx in indices:
|
|
idxId = idx.get("id") if isinstance(idx, dict) else getattr(idx, "id", None)
|
|
if idxId:
|
|
knowledgeDb.db.recordModify(FileContentIndex, idxId, {
|
|
"neutralizationStatus": "original",
|
|
"isNeutralized": False,
|
|
})
|
|
except Exception as e:
|
|
logger.warning("Failed to update FileContentIndex after neutralize-OFF for %s: %s", fileId, e)
|
|
|
|
# Background re-index (safe: if it fails, there is simply no index)
|
|
fileMeta = managementInterface.getFile(fileId)
|
|
if fileMeta:
|
|
fn = fileMeta.fileName if hasattr(fileMeta, "fileName") else fileMeta.get("fileName", "")
|
|
mt = fileMeta.mimeType if hasattr(fileMeta, "mimeType") else fileMeta.get("mimeType", "")
|
|
|
|
async def _runReindexAfterNeutralizeToggle():
|
|
try:
|
|
await _autoIndexFile(
|
|
fileId=fileId, fileName=fn, mimeType=mt, user=context.user,
|
|
mandateId=str(context.mandateId) if context.mandateId else None,
|
|
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
|
|
)
|
|
except Exception as ex:
|
|
logger.error("Re-index after neutralize toggle failed for %s: %s (file has NO index until next re-index)", fileId, ex)
|
|
|
|
background_tasks.add_task(_runReindexAfterNeutralizeToggle)
|
|
|
|
return {"fileId": fileId, "neutralize": neutralize, "updated": True}
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"Error updating file neutralize flag: {e}")
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
# ── File endpoints with path parameters (catch-all /{fileId}) ─────────────────
|
|
|
|
@router.get("/{fileId}")
|
|
@limiter.limit("30/minute")
|
|
def get_file(
|
|
request: Request,
|
|
fileId: str = Path(..., description="ID of the file"),
|
|
currentUser: User = Depends(getCurrentUser),
|
|
context: RequestContext = Depends(getRequestContext)
|
|
):
|
|
"""Get a file. Resolves the file's mandate/instance scope automatically."""
|
|
try:
|
|
_mgmt, fileData = _resolveFileWithScope(currentUser, context, fileId)
|
|
if not fileData:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
|
detail=f"File with ID {fileId} not found"
|
|
)
|
|
|
|
fileDict = fileData.model_dump() if hasattr(fileData, "model_dump") else dict(fileData)
|
|
enriched = enrichRowsWithFkLabels([fileDict], FileItem)
|
|
return enriched[0]
|
|
|
|
except interfaceDbManagement.FileNotFoundError as e:
|
|
logger.warning(f"File not found: {str(e)}")
|
|
raise HTTPException(
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
|
detail=str(e)
|
|
)
|
|
except interfaceDbManagement.FilePermissionError as e:
|
|
logger.warning(f"No permission for file: {str(e)}")
|
|
raise HTTPException(
|
|
status_code=status.HTTP_403_FORBIDDEN,
|
|
detail=str(e)
|
|
)
|
|
except interfaceDbManagement.FileError as e:
|
|
logger.error(f"Error retrieving file: {str(e)}")
|
|
raise HTTPException(
|
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
detail=str(e)
|
|
)
|
|
except Exception as e:
|
|
logger.error(f"Unexpected error retrieving file: {str(e)}")
|
|
raise HTTPException(
|
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
detail=f"Error retrieving file: {str(e)}"
|
|
)
|
|
|
|
@router.put("/{fileId}", response_model=FileItem)
|
|
@limiter.limit("10/minute")
|
|
def update_file(
|
|
request: Request,
|
|
fileId: str = Path(..., description="ID of the file to update"),
|
|
file_info: Dict[str, Any] = Body(...),
|
|
currentUser: User = Depends(getCurrentUser)
|
|
) -> FileItem:
|
|
"""Update file info"""
|
|
try:
|
|
_EDITABLE_FIELDS = {"fileName", "folderId", "scope", "tags", "description", "neutralize"}
|
|
safeData = {k: v for k, v in file_info.items() if k in _EDITABLE_FIELDS}
|
|
if not safeData:
|
|
raise HTTPException(status_code=400, detail=routeApiMsg("No editable fields provided"))
|
|
|
|
managementInterface = interfaceDbManagement.getInterface(currentUser)
|
|
|
|
file = managementInterface.getFile(fileId)
|
|
if not file:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
|
detail=f"File with ID {fileId} not found"
|
|
)
|
|
|
|
if safeData.get("scope") == "global" and not getattr(currentUser, "isSysAdmin", False):
|
|
raise HTTPException(
|
|
status_code=status.HTTP_403_FORBIDDEN,
|
|
detail=routeApiMsg("Only sysadmins can set global scope"),
|
|
)
|
|
|
|
if not managementInterface.checkRbacPermission(FileItem, "update", fileId):
|
|
raise HTTPException(
|
|
status_code=status.HTTP_403_FORBIDDEN,
|
|
detail=routeApiMsg("Not authorized to update this file")
|
|
)
|
|
|
|
result = managementInterface.updateFile(fileId, safeData)
|
|
if not result:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
detail=routeApiMsg("Failed to update file")
|
|
)
|
|
|
|
# Get updated file
|
|
updatedFile = managementInterface.getFile(fileId)
|
|
return updatedFile
|
|
|
|
except HTTPException as he:
|
|
raise he
|
|
except Exception as e:
|
|
logger.error(f"Error updating file: {str(e)}")
|
|
raise HTTPException(
|
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
detail=str(e)
|
|
)
|
|
|
|
@router.delete("/{fileId}", response_model=Dict[str, Any])
|
|
@limiter.limit("10/minute")
|
|
def delete_file(
|
|
request: Request,
|
|
fileId: str = Path(..., description="ID of the file to delete"),
|
|
currentUser: User = Depends(getCurrentUser)
|
|
) -> Dict[str, Any]:
|
|
"""Delete a file"""
|
|
managementInterface = interfaceDbManagement.getInterface(currentUser)
|
|
|
|
# Check if the file exists
|
|
existingFile = managementInterface.getFile(fileId)
|
|
if not existingFile:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
|
detail=f"File with ID {fileId} not found"
|
|
)
|
|
|
|
success = managementInterface.deleteFile(fileId)
|
|
if not success:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
detail=routeApiMsg("Error deleting the file")
|
|
)
|
|
|
|
return {"message": f"File with ID {fileId} successfully deleted"}
|
|
|
|
@router.get("/{fileId}/download")
|
|
@limiter.limit("30/minute")
|
|
def download_file(
|
|
request: Request,
|
|
fileId: str = Path(..., description="ID of the file to download"),
|
|
currentUser: User = Depends(getCurrentUser),
|
|
context: RequestContext = Depends(getRequestContext)
|
|
) -> Response:
|
|
"""Download a file. Resolves the file's mandate/instance scope automatically,
|
|
so direct <a href> links work even when X-Mandate-Id / X-Instance-Id headers
|
|
are not sent by the browser."""
|
|
try:
|
|
managementInterface, fileData = _resolveFileWithScope(currentUser, context, fileId)
|
|
if not fileData:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
|
detail=f"File with ID {fileId} not found"
|
|
)
|
|
|
|
fileContent = managementInterface.getFileData(fileId)
|
|
if not fileContent:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
|
detail=f"File content not found for ID {fileId}"
|
|
)
|
|
|
|
# Return file as response
|
|
# Properly encode filename for Content-Disposition header to handle Unicode characters
|
|
import urllib.parse
|
|
encoded_filename = urllib.parse.quote(fileData.fileName)
|
|
|
|
return Response(
|
|
content=fileContent,
|
|
media_type=fileData.mimeType,
|
|
headers={
|
|
"Content-Disposition": f"attachment; filename*=UTF-8''{encoded_filename}"
|
|
}
|
|
)
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"Error downloading file: {str(e)}")
|
|
raise HTTPException(
|
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
detail=f"Error downloading file: {str(e)}"
|
|
)
|
|
|
|
@router.get("/{fileId}/preview", response_model=FilePreview)
|
|
@limiter.limit("30/minute")
|
|
def preview_file(
|
|
request: Request,
|
|
fileId: str = Path(..., description="ID of the file to preview"),
|
|
currentUser: User = Depends(getCurrentUser),
|
|
context: RequestContext = Depends(getRequestContext)
|
|
) -> FilePreview:
|
|
"""Preview a file's content. Resolves the file's mandate/instance scope automatically."""
|
|
try:
|
|
managementInterface, fileMeta = _resolveFileWithScope(currentUser, context, fileId)
|
|
if not fileMeta:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
|
detail=f"File with ID {fileId} not found"
|
|
)
|
|
|
|
preview = managementInterface.getFileContent(fileId)
|
|
if not preview:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
|
detail=f"File with ID {fileId} not found or no content available"
|
|
)
|
|
|
|
return preview
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"Error previewing file: {str(e)}")
|
|
raise HTTPException(
|
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
detail=f"Error previewing file: {str(e)}"
|
|
)
|
|
|
|
|