gateway/modules/routes/routeDataFiles.py

# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
from fastapi import APIRouter, HTTPException, Depends, File, UploadFile, Form, Path, Request, status, Query, Response, Body, BackgroundTasks
from fastapi.responses import JSONResponse
from typing import List, Dict, Any, Optional
import logging
import json

# Import auth module
from modules.auth import limiter, getCurrentUser, getRequestContext, RequestContext

# Import interfaces
import modules.interfaces.interfaceDbManagement as interfaceDbManagement
from modules.datamodels.datamodelFiles import FileItem, FilePreview
from modules.shared.attributeUtils import getModelAttributeDefinitions
from modules.datamodels.datamodelUam import User
from modules.datamodels.datamodelPagination import PaginationParams, PaginatedResponse, PaginationMetadata, normalize_pagination_dict
from modules.shared.i18nRegistry import apiRouteContext
from modules.routes.routeHelpers import enrichRowsWithFkLabels
routeApiMsg = apiRouteContext("routeDataFiles")

# Configure logger
logger = logging.getLogger(__name__)


def _resolveFileWithScope(currentUser: User, context: RequestContext, fileId: str):
    """Returns (managementInterface, fileItem) with RBAC scoped to the file's own mandate/instance.

    Files generated by workflows (e.g. AI report outputs) carry their own
    mandateId/featureInstanceId. Direct download links via <a href> cannot send
    custom scope headers, so we resolve the scope from the FileItem itself and
    re-check RBAC in that scope.

    Returns (None, None) if the file does not exist or the user lacks access
    in the file's actual scope.
    """
    requestMandateId = str(context.mandateId) if context.mandateId else None
    requestInstanceId = str(context.featureInstanceId) if context.featureInstanceId else None

    mgmt = interfaceDbManagement.getInterface(
        currentUser,
        mandateId=requestMandateId,
        featureInstanceId=requestInstanceId,
    )
    fileItem = mgmt.getFile(fileId)
    if fileItem:
        return mgmt, fileItem

    metas = mgmt.db.getRecordset(FileItem, recordFilter={"id": fileId})
    if not metas:
        return None, None

    meta = metas[0]
    fileMandateId = meta.get("mandateId") or None
    fileInstanceId = meta.get("featureInstanceId") or None

    if not fileMandateId and not fileInstanceId:
        return None, None

    if fileMandateId == requestMandateId and fileInstanceId == requestInstanceId:
        return None, None

    scopedMgmt = interfaceDbManagement.getInterface(
        currentUser,
        mandateId=fileMandateId,
        featureInstanceId=fileInstanceId,
    )
    fileItem = scopedMgmt.getFile(fileId)
    if not fileItem:
        return None, None

    return scopedMgmt, fileItem


async def _autoIndexFile(fileId: str, fileName: str, mimeType: str, user):
    """Background task: pre-scan + extraction + knowledge indexing.
    Step 1: Structure Pre-Scan (AI-free) -> FileContentIndex (persisted)
    Step 2: Content extraction via runExtraction -> ContentParts
    Step 3: KnowledgeService.requestIngestion -> idempotent chunking + embedding -> Knowledge Store"""
    userId = user.id if hasattr(user, "id") else str(user)
    try:
        mgmtInterface = interfaceDbManagement.getInterface(user)
        mgmtInterface.updateFile(fileId, {"status": "processing"})

        rawBytes = mgmtInterface.getFileData(fileId)
        if not rawBytes:
            logger.warning(f"Auto-index: no file data for {fileId}, skipping")
            mgmtInterface.updateFile(fileId, {"status": "active"})
            return

        file_meta = mgmtInterface.getFile(fileId)
        feature_instance_id = ""
        mandate_id = ""
        file_scope = "personal"
        if file_meta:
            if isinstance(file_meta, dict):
                feature_instance_id = file_meta.get("featureInstanceId") or ""
                mandate_id = file_meta.get("mandateId") or ""
                file_scope = file_meta.get("scope") or "personal"
            else:
                feature_instance_id = getattr(file_meta, "featureInstanceId", None) or ""
                mandate_id = getattr(file_meta, "mandateId", None) or ""
                file_scope = getattr(file_meta, "scope", None) or "personal"

        logger.info(f"Auto-index starting for {fileName} ({len(rawBytes)} bytes, {mimeType})")

        # Step 1: Structure Pre-Scan (AI-free)
        from modules.serviceCenter.services.serviceKnowledge.subPreScan import preScanDocument
        contentIndex = await preScanDocument(
            fileData=rawBytes,
            mimeType=mimeType,
            fileId=fileId,
            fileName=fileName,
            userId=userId,
            featureInstanceId=str(feature_instance_id) if feature_instance_id else "",
            mandateId=str(mandate_id) if mandate_id else "",
            scope=file_scope,
        )
        logger.info(
            f"Pre-scan complete for {fileName}: "
            f"{contentIndex.totalObjects} objects"
        )

        # Persist FileContentIndex immediately.
        # IMPORTANT: preserve `_ingestion` metadata and `status="indexed"` from any
        # prior successful run — otherwise this upsert wipes the idempotency cache
        # and requestIngestion cannot detect duplicates (AC4 breaks).
        from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface
        knowledgeDb = getKnowledgeInterface()
        try:
            _existing = knowledgeDb.getFileContentIndex(fileId)
        except Exception:
            _existing = None
        if _existing:
            _existingStruct = (
                _existing.get("structure") if isinstance(_existing, dict)
                else getattr(_existing, "structure", {})
            ) or {}
            _existingStatus = (
                _existing.get("status") if isinstance(_existing, dict)
                else getattr(_existing, "status", "")
            ) or ""
            if "_ingestion" in _existingStruct:
                contentIndex.structure = dict(contentIndex.structure or {})
                contentIndex.structure["_ingestion"] = _existingStruct["_ingestion"]
            if _existingStatus == "indexed":
                contentIndex.status = "indexed"
        knowledgeDb.upsertFileContentIndex(contentIndex)

        # Step 2: Content extraction (AI-free, produces ContentParts)
        from modules.serviceCenter.services.serviceExtraction.subRegistry import ExtractorRegistry, ChunkerRegistry
        from modules.serviceCenter.services.serviceExtraction.subPipeline import runExtraction
        from modules.datamodels.datamodelExtraction import ExtractionOptions

        extractorRegistry = ExtractorRegistry()
        chunkerRegistry = ChunkerRegistry()
        # mergeStrategy=None: keep per-page / per-section granularity for RAG ingestion.
        # The default MergeStrategy concatenates all text parts into a single blob, which
        # collapses a 500-page PDF into one ContentChunk and destroys semantic retrieval.
        options = ExtractionOptions(mergeStrategy=None)

        extracted = runExtraction(
            extractorRegistry, chunkerRegistry,
            rawBytes, fileName, mimeType, options,
        )

        contentObjects = []
        for part in extracted.parts:
            contentType = "text"
            if part.typeGroup == "image":
                contentType = "image"
            elif part.typeGroup in ("binary", "container"):
                contentType = "other"

            if not part.data or not part.data.strip():
                continue

            contentObjects.append({
                "contentObjectId": part.id,
                "contentType": contentType,
                "data": part.data,
                "contextRef": {
                    "containerPath": fileName,
                    "location": part.label or "file",
                    **(part.metadata or {}),
                },
            })

        logger.info(f"Extracted {len(contentObjects)} content objects from {fileName}")

        if not contentObjects:
            knowledgeDb.updateFileStatus(fileId, "indexed")
            mgmtInterface.updateFile(fileId, {"status": "active"})
            return

        # Step 3: Knowledge indexing (chunking + embedding)
        from modules.serviceCenter import getService
        from modules.serviceCenter.context import ServiceCenterContext

        ctx = ServiceCenterContext(
            user=user,
            mandate_id=str(mandate_id) if mandate_id else "",
            feature_instance_id=str(feature_instance_id) if feature_instance_id else "",
        )
        knowledgeService = getService("knowledge", ctx)

        from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob

        await knowledgeService.requestIngestion(
            IngestionJob(
                sourceKind="file",
                sourceId=fileId,
                fileName=fileName,
                mimeType=mimeType,
                userId=userId,
                featureInstanceId=str(feature_instance_id) if feature_instance_id else "",
                mandateId=str(mandate_id) if mandate_id else "",
                contentObjects=contentObjects,
                structure=contentIndex.structure,
                provenance={"lane": "upload", "route": "routeDataFiles._autoIndexFile"},
            )
        )

        # Re-acquire interface after await to avoid stale user context from the singleton
        mgmtInterface = interfaceDbManagement.getInterface(user)
        mgmtInterface.updateFile(fileId, {"status": "active"})
        logger.info(f"Auto-index complete for file {fileId} ({fileName})")

    except Exception as e:
        logger.error(f"Auto-index failed for file {fileId}: {e}", exc_info=True)
        try:
            errMgmt = interfaceDbManagement.getInterface(user)
            errMgmt.updateFile(fileId, {"status": "active"})
        except Exception:
            pass

# Model attributes for FileItem
fileAttributes = getModelAttributeDefinitions(FileItem)

# Create router for file endpoints
router = APIRouter(
    prefix="/api/files",
    tags=["Manage Files"],
    responses={
        404: {"description": "Not found"},
        400: {"description": "Bad request"},
        401: {"description": "Unauthorized"},
        403: {"description": "Forbidden"},
        500: {"description": "Internal server error"}
    }
)

@router.get("/list")
@limiter.limit("120/minute")
def get_files(
    request: Request,
    pagination: Optional[str] = Query(None, description="JSON-encoded PaginationParams object"),
    mode: Optional[str] = Query(None, description="'filterValues' for distinct column values, 'ids' for all filtered IDs"),
    column: Optional[str] = Query(None, description="Column key (required when mode=filterValues)"),
    currentUser: User = Depends(getCurrentUser),
    context: RequestContext = Depends(getRequestContext)
):
    """
    Get files with optional pagination, sorting, and filtering.

    Query Parameters:
    - pagination: JSON-encoded PaginationParams object, or None for no pagination

    Examples:
    - GET /api/files/list (no pagination - returns all items)
    - GET /api/files/list?pagination={"page":1,"pageSize":10,"sort":[]}
    - GET /api/files/list?pagination={"page":2,"pageSize":20,"sort":[{"field":"fileName","direction":"asc"}]}
    """
    try:
        # Parse pagination parameter
        paginationParams = None
        if pagination:
            try:
                paginationDict = json.loads(pagination)
                if paginationDict:
                    paginationDict = normalize_pagination_dict(paginationDict)
                    paginationParams = PaginationParams(**paginationDict)
            except (json.JSONDecodeError, ValueError) as e:
                raise HTTPException(
                    status_code=400,
                    detail=f"Invalid pagination parameter: {str(e)}"
                )

        from modules.routes.routeHelpers import (
            handleIdsMode,
            handleFilterValuesInMemory,
            handleGroupingInRequest, applyGroupScopeFilter,
        )
        import modules.interfaces.interfaceDbApp as _appIface

        managementInterface = interfaceDbManagement.getInterface(
            currentUser,
            mandateId=str(context.mandateId) if context.mandateId else None,
            featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None
        )
        appInterface = _appIface.getInterface(currentUser)
        groupCtx = handleGroupingInRequest(paginationParams, appInterface, "files/list")

        def _filesToDicts(fileItems):
            return [f.model_dump() if hasattr(f, "model_dump") else (dict(f) if not isinstance(f, dict) else f) for f in fileItems]

        if mode == "filterValues":
            if not column:
                raise HTTPException(status_code=400, detail="column parameter required for mode=filterValues")
            allFiles = managementInterface.getAllFiles()
            items = allFiles if isinstance(allFiles, list) else (allFiles.items if hasattr(allFiles, "items") else [])
            itemDicts = _filesToDicts(items)
            enrichRowsWithFkLabels(itemDicts, FileItem)
            itemDicts = applyGroupScopeFilter(itemDicts, groupCtx.itemIds)
            return handleFilterValuesInMemory(itemDicts, column, pagination)

        if mode == "ids":
            recordFilter = {"sysCreatedBy": managementInterface.userId}
            return handleIdsMode(managementInterface.db, FileItem, pagination, recordFilter)

        result = managementInterface.getAllFiles(pagination=paginationParams)

        if paginationParams:
            enriched = applyGroupScopeFilter(enrichRowsWithFkLabels(_filesToDicts(result.items), FileItem), groupCtx.itemIds)
            return {
                "items": enriched,
                "pagination": PaginationMetadata(
                    currentPage=paginationParams.page,
                    pageSize=paginationParams.pageSize,
                    totalItems=result.totalItems,
                    totalPages=result.totalPages,
                    sort=paginationParams.sort,
                    filters=paginationParams.filters
                ).model_dump(),
                "groupTree": groupCtx.groupTree,
            }
        else:
            items = result if isinstance(result, list) else (result.items if hasattr(result, "items") else [result])
            enriched = applyGroupScopeFilter(enrichRowsWithFkLabels(_filesToDicts(items), FileItem), groupCtx.itemIds)
            return {"items": enriched, "pagination": None, "groupTree": groupCtx.groupTree}
    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Error getting files: {str(e)}")
        raise HTTPException(
            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
            detail=f"Failed to get files: {str(e)}"
        )


def _addFileToGroup(appInterface, fileId: str, groupId: str, contextKey: str = "files/list"):
    """Add a file to a group in the persisted groupTree (upsert)."""
    from modules.routes.routeHelpers import _collectItemIds
    try:
        existing = appInterface.getTableGrouping(contextKey)
        if not existing:
            return
        nodes = [n.model_dump() if hasattr(n, 'model_dump') else n for n in existing.rootGroups]
        def _add(nds):
            for nd in nds:
                nid = nd.get("id") if isinstance(nd, dict) else getattr(nd, "id", None)
                if nid == groupId:
                    itemIds = list(nd.get("itemIds", []) if isinstance(nd, dict) else getattr(nd, "itemIds", []))
                    if fileId not in itemIds:
                        itemIds.append(fileId)
                    if isinstance(nd, dict):
                        nd["itemIds"] = itemIds
                    else:
                        nd.itemIds = itemIds
                    return True
                subs = nd.get("subGroups", []) if isinstance(nd, dict) else getattr(nd, "subGroups", [])
                if _add(subs):
                    return True
            return False
        _add(nodes)
        appInterface.upsertTableGrouping(contextKey, nodes)
    except Exception as e:
        logger.warning(f"_addFileToGroup failed: {e}")


@router.post("/upload", status_code=status.HTTP_201_CREATED)
@limiter.limit("10/minute")
async def upload_file(
    request: Request,
    file: UploadFile = File(...),
    workflowId: Optional[str] = Form(None),
    featureInstanceId: Optional[str] = Form(None),
    groupId: Optional[str] = Form(None),
    currentUser: User = Depends(getCurrentUser),
    context: RequestContext = Depends(getRequestContext),
) -> JSONResponse:
    # Add fileName property to UploadFile for consistency with backend model
    file.fileName = file.filename
    """Upload a file"""
    try:
        managementInterface = interfaceDbManagement.getInterface(
            currentUser,
            mandateId=str(context.mandateId) if context.mandateId else None,
            featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
        )

        # Read file
        fileContent = await file.read()

        # Check size limits
        maxSize = int(interfaceDbManagement.APP_CONFIG.get("File_Management_MAX_UPLOAD_SIZE_MB")) * 1024 * 1024  # in bytes
        if len(fileContent) > maxSize:
            raise HTTPException(
                status_code=status.HTTP_413_REQUEST_ENTITY_TOO_LARGE,
                detail=f"File too large. Maximum size: {interfaceDbManagement.APP_CONFIG.get('File_Management_MAX_UPLOAD_SIZE_MB')}MB"
            )

        # Save file via LucyDOM interface in the database
        fileItem, duplicateType = managementInterface.saveUploadedFile(
            fileContent, file.filename
        )

        if featureInstanceId and not fileItem.featureInstanceId:
            managementInterface.updateFile(fileItem.id, {"featureInstanceId": featureInstanceId})
            fileItem.featureInstanceId = featureInstanceId

        # Add to group if groupId was provided
        if groupId:
            import modules.interfaces.interfaceDbApp as _appIface
            appInterface = _appIface.getInterface(currentUser)
            _addFileToGroup(appInterface, fileItem.id, groupId)

        # Determine response message based on duplicate type
        if duplicateType == "exact_duplicate":
            message = f"File '{file.filename}' already exists with identical content. Reusing existing file."
        elif duplicateType == "name_conflict":
            message = f"File '{file.filename}' already exists with different content. Uploaded as '{fileItem.fileName}'."
        else:  # new_file
            message = "File uploaded successfully"

        # Convert FileItem to dictionary for JSON response
        fileMeta = fileItem.model_dump()

        # If workflowId is provided, include it in the response (not stored in FileItem model)
        if workflowId:
            fileMeta["workflowId"] = workflowId

        # Trigger background auto-index pipeline (non-blocking)
        # Also runs for duplicates in case the original was never successfully indexed
        shouldIndex = duplicateType == "new_file"
        if not shouldIndex:
            try:
                from modules.interfaces.interfaceDbKnowledge import getInterface as _getKnowledgeInterface
                _kDb = _getKnowledgeInterface()
                _existingIndex = _kDb.getFileContentIndex(fileItem.id)
                if not _existingIndex:
                    shouldIndex = True
                    logger.info(f"Re-triggering auto-index for duplicate {fileItem.id} (not yet indexed)")
            except Exception:
                shouldIndex = True

        if shouldIndex:
            try:
                import asyncio
                asyncio.ensure_future(_autoIndexFile(
                    fileId=fileItem.id,
                    fileName=fileItem.fileName,
                    mimeType=fileItem.mimeType,
                    user=currentUser,
                ))
            except Exception as indexErr:
                logger.warning(f"Auto-index trigger failed (non-blocking): {indexErr}")

        # Response with duplicate information
        return JSONResponse({
            "message": message,
            "file": fileMeta,
            "duplicateType": duplicateType,
            "originalFileName": file.filename,
            "storedFileName": fileItem.fileName,
            "isDuplicate": duplicateType != "new_file"
        })

    except interfaceDbManagement.FileStorageError as e:
        logger.error(f"Error during file upload (storage): {str(e)}")
        raise HTTPException(
            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
            detail=str(e)
        )
    except Exception as e:
        logger.error(f"Error during file upload: {str(e)}")
        raise HTTPException(
            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
            detail=f"Error during file upload: {str(e)}"
        )


@router.post("/batch-delete")
@limiter.limit("10/minute")
def batch_delete_items(
    request: Request,
    body: Dict[str, Any] = Body(...),
    currentUser: User = Depends(getCurrentUser),
    context: RequestContext = Depends(getRequestContext)
) -> Dict[str, Any]:
    """Batch delete files."""
    fileIds = body.get("fileIds") or []

    if not isinstance(fileIds, list):
        raise HTTPException(status_code=400, detail=routeApiMsg("fileIds must be an array"))

    try:
        mgmt = interfaceDbManagement.getInterface(
            currentUser,
            mandateId=str(context.mandateId) if context.mandateId else None,
            featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
        )

        result = {"deletedFiles": 0}

        if fileIds:
            fileResult = mgmt.deleteFilesBatch(fileIds)
            result["deletedFiles"] += fileResult.get("deletedFiles", 0)

        return result
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))
    except Exception as e:
        logger.error(f"Error in batch delete: {e}")
        raise HTTPException(status_code=500, detail=str(e))


# ── Group bulk endpoints ──────────────────────────────────────────────────────

def _get_group_item_ids(contextKey: str, groupId: str, appInterface) -> set:
    """Collect all file IDs in a group and its sub-groups from the stored groupTree."""
    from modules.routes.routeHelpers import _collectItemIds
    try:
        existing = appInterface.getTableGrouping(contextKey)
        if not existing:
            return set()
        nodes = [n.model_dump() if hasattr(n, 'model_dump') else n for n in existing.rootGroups]
        result = _collectItemIds(nodes, groupId)
        return result or set()
    except Exception as e:
        logger.error(f"_get_group_item_ids failed for groupId={groupId}: {e}")
        return set()


@router.patch("/groups/{groupId}/scope")
@limiter.limit("60/minute")
def patch_group_scope(
    request: Request,
    groupId: str = Path(..., description="Group ID"),
    body: dict = Body(...),
    currentUser: User = Depends(getCurrentUser),
    context: RequestContext = Depends(getRequestContext),
):
    """Set scope for all files in a group (recursive)."""
    scope = body.get("scope")
    if not scope:
        raise HTTPException(status_code=400, detail="scope is required")
    try:
        import modules.interfaces.interfaceDbApp as _appIface
        managementInterface = interfaceDbManagement.getInterface(
            currentUser,
            mandateId=str(context.mandateId) if context.mandateId else None,
            featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
        )
        appInterface = _appIface.getInterface(currentUser)
        fileIds = _get_group_item_ids("files/list", groupId, appInterface)
        updated = 0
        for fid in fileIds:
            try:
                managementInterface.updateFile(fid, {"scope": scope})
                updated += 1
            except Exception as e:
                logger.error(f"patch_group_scope: failed to update file {fid}: {e}")
        return {"groupId": groupId, "scope": scope, "filesUpdated": updated}
    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"patch_group_scope error: {e}")
        raise HTTPException(status_code=500, detail=str(e))


@router.patch("/groups/{groupId}/neutralize")
@limiter.limit("60/minute")
def patch_group_neutralize(
    request: Request,
    groupId: str = Path(..., description="Group ID"),
    body: dict = Body(...),
    currentUser: User = Depends(getCurrentUser),
    context: RequestContext = Depends(getRequestContext),
):
    """Toggle neutralize for all files in a group (recursive, incl. knowledge purge/reindex)."""
    neutralize = body.get("neutralize")
    if neutralize is None:
        raise HTTPException(status_code=400, detail="neutralize is required")
    try:
        import modules.interfaces.interfaceDbApp as _appIface
        managementInterface = interfaceDbManagement.getInterface(
            currentUser,
            mandateId=str(context.mandateId) if context.mandateId else None,
            featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
        )
        appInterface = _appIface.getInterface(currentUser)
        fileIds = _get_group_item_ids("files/list", groupId, appInterface)
        updated = 0
        for fid in fileIds:
            try:
                managementInterface.updateFile(fid, {"neutralize": neutralize})
                if not neutralize:
                    try:
                        from modules.interfaces import interfaceDbKnowledge
                        kIface = interfaceDbKnowledge.getInterface(currentUser)
                        kIface.purgeFileKnowledge(fid)
                    except Exception as ke:
                        logger.warning(f"patch_group_neutralize: knowledge purge failed for {fid}: {ke}")
                updated += 1
            except Exception as e:
                logger.error(f"patch_group_neutralize: failed for file {fid}: {e}")
        return {"groupId": groupId, "neutralize": neutralize, "filesUpdated": updated}
    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"patch_group_neutralize error: {e}")
        raise HTTPException(status_code=500, detail=str(e))


@router.get("/groups/{groupId}/download")
@limiter.limit("20/minute")
async def download_group_zip(
    request: Request,
    groupId: str = Path(..., description="Group ID"),
    currentUser: User = Depends(getCurrentUser),
    context: RequestContext = Depends(getRequestContext),
):
    """Download all files in a group as a ZIP archive."""
    import io, zipfile
    try:
        import modules.interfaces.interfaceDbApp as _appIface
        managementInterface = interfaceDbManagement.getInterface(
            currentUser,
            mandateId=str(context.mandateId) if context.mandateId else None,
            featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
        )
        appInterface = _appIface.getInterface(currentUser)
        fileIds = _get_group_item_ids("files/list", groupId, appInterface)
        if not fileIds:
            raise HTTPException(status_code=404, detail="Group not found or empty")
        buf = io.BytesIO()
        with zipfile.ZipFile(buf, "w", zipfile.ZIP_DEFLATED) as zf:
            for fid in fileIds:
                try:
                    fileMeta = managementInterface.getFile(fid)
                    fileData = managementInterface.getFileData(fid)
                    if fileMeta and fileData:
                        name = (fileMeta.get("fileName") if isinstance(fileMeta, dict) else getattr(fileMeta, "fileName", fid)) or fid
                        zf.writestr(name, fileData)
                except Exception as fe:
                    logger.warning(f"download_group_zip: skipping file {fid}: {fe}")
        buf.seek(0)
        from fastapi.responses import StreamingResponse
        return StreamingResponse(
            buf,
            media_type="application/zip",
            headers={"Content-Disposition": f'attachment; filename="group-{groupId}.zip"'},
        )
    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"download_group_zip error: {e}")
        raise HTTPException(status_code=500, detail=str(e))


@router.delete("/groups/{groupId}")
@limiter.limit("30/minute")
def delete_group(
    request: Request,
    groupId: str = Path(..., description="Group ID"),
    deleteItems: bool = Query(False, description="If true, also delete all files in the group"),
    currentUser: User = Depends(getCurrentUser),
    context: RequestContext = Depends(getRequestContext),
):
    """Remove a group from the groupTree. Optionally delete all its files."""
    try:
        import modules.interfaces.interfaceDbApp as _appIface
        appInterface = _appIface.getInterface(currentUser)
        fileIds = _get_group_item_ids("files/list", groupId, appInterface)
        # Remove group from tree
        existing = appInterface.getTableGrouping("files/list")
        if existing:
            from modules.routes.routeHelpers import _removeGroupFromTree
            newRoots = _removeGroupFromTree([n.model_dump() if hasattr(n, 'model_dump') else n for n in existing.rootGroups], groupId)
            appInterface.upsertTableGrouping("files/list", newRoots)
        # Optionally delete files
        deletedFiles = 0
        if deleteItems:
            managementInterface = interfaceDbManagement.getInterface(
                currentUser,
                mandateId=str(context.mandateId) if context.mandateId else None,
                featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
            )
            for fid in fileIds:
                try:
                    managementInterface.deleteFile(fid)
                    deletedFiles += 1
                except Exception as e:
                    logger.error(f"delete_group: failed to delete file {fid}: {e}")
        return {"groupId": groupId, "deletedFiles": deletedFiles}
    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"delete_group error: {e}")
        raise HTTPException(status_code=500, detail=str(e))


# ── Scope & neutralize tagging endpoints (before /{fileId} catch-all) ─────────

@router.patch("/{fileId}/scope")
@limiter.limit("30/minute")
def updateFileScope(
    request: Request,
    background_tasks: BackgroundTasks,
    fileId: str = Path(..., description="ID of the file"),
    scope: str = Body(..., embed=True),
    context: RequestContext = Depends(getRequestContext),
) -> Dict[str, Any]:
    """Update the scope of a file. Global scope requires sysAdmin."""
    try:
        validScopes = {"personal", "featureInstance", "mandate", "global"}
        if scope not in validScopes:
            raise HTTPException(status_code=400, detail=f"Invalid scope: {scope}. Must be one of {validScopes}")

        if scope == "global" and not context.isSysAdmin:
            raise HTTPException(status_code=403, detail=routeApiMsg("Only sysadmins can set global scope"))

        managementInterface = interfaceDbManagement.getInterface(
            context.user,
            mandateId=str(context.mandateId) if context.mandateId else None,
            featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
        )

        managementInterface.updateFile(fileId, {"scope": scope})

        try:
            from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface
            from modules.datamodels.datamodelKnowledge import FileContentIndex
            knowledgeDb = getKnowledgeInterface()
            indices = knowledgeDb.db.getRecordset(FileContentIndex, recordFilter={"id": fileId})
            for idx in indices:
                idxId = idx.get("id") if isinstance(idx, dict) else getattr(idx, "id", None)
                if idxId:
                    knowledgeDb.db.recordModify(FileContentIndex, idxId, {"scope": scope})
        except Exception as e:
            logger.warning(f"Failed to update FileContentIndex scope for file {fileId}: {e}")

        fileMeta = managementInterface.getFile(fileId)
        if fileMeta:
            fn = fileMeta.fileName if hasattr(fileMeta, "fileName") else fileMeta.get("fileName", "")
            mt = fileMeta.mimeType if hasattr(fileMeta, "mimeType") else fileMeta.get("mimeType", "")

            async def _runReindexAfterScopeChange():
                try:
                    await _autoIndexFile(fileId=fileId, fileName=fn, mimeType=mt, user=context.user)
                except Exception as ex:
                    logger.warning("Re-index after scope change failed for %s: %s", fileId, ex)

            background_tasks.add_task(_runReindexAfterScopeChange)

        return {"fileId": fileId, "scope": scope, "updated": True}
    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Error updating file scope: {e}")
        raise HTTPException(status_code=500, detail=str(e))


@router.patch("/{fileId}/neutralize")
@limiter.limit("30/minute")
def updateFileNeutralize(
    request: Request,
    background_tasks: BackgroundTasks,
    fileId: str = Path(..., description="ID of the file"),
    neutralize: bool = Body(..., embed=True),
    context: RequestContext = Depends(getRequestContext),
) -> Dict[str, Any]:
    """Toggle neutralization flag on a file.

    FAILSAFE: When turning neutralize ON, the existing Knowledge Store index
    and all content chunks are deleted SYNCHRONOUSLY before the response is
    returned.  The re-index happens in a background task.  If re-indexing
    fails the file simply has no index — no un-neutralized data can leak.
    """
    try:
        managementInterface = interfaceDbManagement.getInterface(
            context.user,
            mandateId=str(context.mandateId) if context.mandateId else None,
            featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
        )

        managementInterface.updateFile(fileId, {"neutralize": neutralize})

        from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface
        knowledgeDb = getKnowledgeInterface()

        if neutralize:
            # ── CRITICAL: purge existing (potentially un-neutralized) index
            # This MUST succeed before the response is sent so that no stale
            # raw-text chunks remain searchable while re-indexing runs.
            try:
                knowledgeDb.deleteFileContentIndex(fileId)
                logger.info("Neutralize toggle ON: deleted index + chunks for file %s", fileId)
            except Exception as e:
                logger.error("Neutralize toggle ON: FAILED to delete index for file %s: %s", fileId, e)
                raise HTTPException(
                    status_code=500,
                    detail=f"Could not purge existing index for neutralization — aborting toggle. Error: {e}",
                )
        else:
            # Turning neutralize OFF: update metadata only; re-index will overwrite
            try:
                from modules.datamodels.datamodelKnowledge import FileContentIndex
                indices = knowledgeDb.db.getRecordset(FileContentIndex, recordFilter={"id": fileId})
                for idx in indices:
                    idxId = idx.get("id") if isinstance(idx, dict) else getattr(idx, "id", None)
                    if idxId:
                        knowledgeDb.db.recordModify(FileContentIndex, idxId, {
                            "neutralizationStatus": "original",
                            "isNeutralized": False,
                        })
            except Exception as e:
                logger.warning("Failed to update FileContentIndex after neutralize-OFF for %s: %s", fileId, e)

        # Background re-index (safe: if it fails, there is simply no index)
        fileMeta = managementInterface.getFile(fileId)
        if fileMeta:
            fn = fileMeta.fileName if hasattr(fileMeta, "fileName") else fileMeta.get("fileName", "")
            mt = fileMeta.mimeType if hasattr(fileMeta, "mimeType") else fileMeta.get("mimeType", "")

            async def _runReindexAfterNeutralizeToggle():
                try:
                    await _autoIndexFile(fileId=fileId, fileName=fn, mimeType=mt, user=context.user)
                except Exception as ex:
                    logger.error("Re-index after neutralize toggle failed for %s: %s (file has NO index until next re-index)", fileId, ex)

            background_tasks.add_task(_runReindexAfterNeutralizeToggle)

        return {"fileId": fileId, "neutralize": neutralize, "updated": True}
    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Error updating file neutralize flag: {e}")
        raise HTTPException(status_code=500, detail=str(e))


# ── File endpoints with path parameters (catch-all /{fileId}) ─────────────────

@router.get("/{fileId}")
@limiter.limit("30/minute")
def get_file(
    request: Request,
    fileId: str = Path(..., description="ID of the file"),
    currentUser: User = Depends(getCurrentUser),
    context: RequestContext = Depends(getRequestContext)
):
    """Get a file. Resolves the file's mandate/instance scope automatically."""
    try:
        _mgmt, fileData = _resolveFileWithScope(currentUser, context, fileId)
        if not fileData:
            raise HTTPException(
                status_code=status.HTTP_404_NOT_FOUND,
                detail=f"File with ID {fileId} not found"
            )

        fileDict = fileData.model_dump() if hasattr(fileData, "model_dump") else dict(fileData)
        enriched = enrichRowsWithFkLabels([fileDict], FileItem)
        return enriched[0]

    except interfaceDbManagement.FileNotFoundError as e:
        logger.warning(f"File not found: {str(e)}")
        raise HTTPException(
            status_code=status.HTTP_404_NOT_FOUND,
            detail=str(e)
        )
    except interfaceDbManagement.FilePermissionError as e:
        logger.warning(f"No permission for file: {str(e)}")
        raise HTTPException(
            status_code=status.HTTP_403_FORBIDDEN,
            detail=str(e)
        )
    except interfaceDbManagement.FileError as e:
        logger.error(f"Error retrieving file: {str(e)}")
        raise HTTPException(
            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
            detail=str(e)
        )
    except Exception as e:
        logger.error(f"Unexpected error retrieving file: {str(e)}")
        raise HTTPException(
            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
            detail=f"Error retrieving file: {str(e)}"
        )

@router.put("/{fileId}", response_model=FileItem)
@limiter.limit("10/minute")
def update_file(
    request: Request,
    fileId: str = Path(..., description="ID of the file to update"),
    file_info: Dict[str, Any] = Body(...),
    currentUser: User = Depends(getCurrentUser)
) -> FileItem:
    """Update file info"""
    try:
        _EDITABLE_FIELDS = {"fileName", "scope", "tags", "description", "neutralize"}
        safeData = {k: v for k, v in file_info.items() if k in _EDITABLE_FIELDS}
        if not safeData:
            raise HTTPException(status_code=400, detail=routeApiMsg("No editable fields provided"))

        managementInterface = interfaceDbManagement.getInterface(currentUser)

        file = managementInterface.getFile(fileId)
        if not file:
            raise HTTPException(
                status_code=status.HTTP_404_NOT_FOUND,
                detail=f"File with ID {fileId} not found"
            )

        if safeData.get("scope") == "global" and not getattr(currentUser, "isSysAdmin", False):
            raise HTTPException(
                status_code=status.HTTP_403_FORBIDDEN,
                detail=routeApiMsg("Only sysadmins can set global scope"),
            )

        if not managementInterface.checkRbacPermission(FileItem, "update", fileId):
            raise HTTPException(
                status_code=status.HTTP_403_FORBIDDEN,
                detail=routeApiMsg("Not authorized to update this file")
            )

        result = managementInterface.updateFile(fileId, safeData)
        if not result:
            raise HTTPException(
                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
                detail=routeApiMsg("Failed to update file")
            )

        # Get updated file
        updatedFile = managementInterface.getFile(fileId)
        return updatedFile

    except HTTPException as he:
        raise he
    except Exception as e:
        logger.error(f"Error updating file: {str(e)}")
        raise HTTPException(
            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
            detail=str(e)
        )

@router.delete("/{fileId}", response_model=Dict[str, Any])
@limiter.limit("10/minute")
def delete_file(
    request: Request,
    fileId: str = Path(..., description="ID of the file to delete"),
    currentUser: User = Depends(getCurrentUser)
) -> Dict[str, Any]:
    """Delete a file"""
    managementInterface = interfaceDbManagement.getInterface(currentUser)

    # Check if the file exists
    existingFile = managementInterface.getFile(fileId)
    if not existingFile:
        raise HTTPException(
            status_code=status.HTTP_404_NOT_FOUND,
            detail=f"File with ID {fileId} not found"
        )

    success = managementInterface.deleteFile(fileId)
    if not success:
        raise HTTPException(
            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
            detail=routeApiMsg("Error deleting the file")
        )

    return {"message": f"File with ID {fileId} successfully deleted"}

@router.get("/{fileId}/download")
@limiter.limit("30/minute")
def download_file(
    request: Request,
    fileId: str = Path(..., description="ID of the file to download"),
    currentUser: User = Depends(getCurrentUser),
    context: RequestContext = Depends(getRequestContext)
) -> Response:
    """Download a file. Resolves the file's mandate/instance scope automatically,
    so direct <a href> links work even when X-Mandate-Id / X-Instance-Id headers
    are not sent by the browser."""
    try:
        managementInterface, fileData = _resolveFileWithScope(currentUser, context, fileId)
        if not fileData:
            raise HTTPException(
                status_code=status.HTTP_404_NOT_FOUND,
                detail=f"File with ID {fileId} not found"
            )

        fileContent = managementInterface.getFileData(fileId)
        if not fileContent:
            raise HTTPException(
                status_code=status.HTTP_404_NOT_FOUND,
                detail=f"File content not found for ID {fileId}"
            )

        # Return file as response
        # Properly encode filename for Content-Disposition header to handle Unicode characters
        import urllib.parse
        encoded_filename = urllib.parse.quote(fileData.fileName)

        return Response(
            content=fileContent,
            media_type=fileData.mimeType,
            headers={
                "Content-Disposition": f"attachment; filename*=UTF-8''{encoded_filename}"
            }
        )
    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Error downloading file: {str(e)}")
        raise HTTPException(
            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
            detail=f"Error downloading file: {str(e)}"
        )

@router.get("/{fileId}/preview", response_model=FilePreview)
@limiter.limit("30/minute")
def preview_file(
    request: Request,
    fileId: str = Path(..., description="ID of the file to preview"),
    currentUser: User = Depends(getCurrentUser),
    context: RequestContext = Depends(getRequestContext)
) -> FilePreview:
    """Preview a file's content. Resolves the file's mandate/instance scope automatically."""
    try:
        managementInterface, fileMeta = _resolveFileWithScope(currentUser, context, fileId)
        if not fileMeta:
            raise HTTPException(
                status_code=status.HTTP_404_NOT_FOUND,
                detail=f"File with ID {fileId} not found"
            )

        preview = managementInterface.getFileContent(fileId)
        if not preview:
            raise HTTPException(
                status_code=status.HTTP_404_NOT_FOUND,
                detail=f"File with ID {fileId} not found or no content available"
            )

        return preview
    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Error previewing file: {str(e)}")
        raise HTTPException(
            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
            detail=f"Error previewing file: {str(e)}"
        )