fix: node inhalt extrahieren kann jetzt context nutzen, file page formgenerator und foldertree zeigen gleiche dateien (filter prozess konsolidiert und vereinheitlicht)

2026-05-26 11:51:44 +02:00 · 2026-05-26 11:51:44 +02:00 · 4d1a579dbd
commit 4d1a579dbd
parent c097b28b6c
6 changed files with 177 additions and 44 deletions
--- a/D:/Athi/Local/Web/poweron/local/debug/prompts/20260526-091920-001-extraction_text_part_1_master_keys.txt.txt
+++ b/D:/Athi/Local/Web/poweron/local/debug/prompts/20260526-091920-001-extraction_text_part_1_master_keys.txt.txt
@ -0,0 +1,20 @@
 {
  "partIndex": 1,
  "partId": "9c6e7733-adfb-412b-ab8c-2131b901445a",
  "typeGroup": "text",
  "mimeType": "text/plain",
  "label": "main",
  "dataLength": 1010,
  "metadata": {
    "size": 1010,
    "documentId": "ddaa7167-dd3c-49c1-86b0-5e56af5efd86",
    "documentMimeType": "text/plain",
    "originalFileName": "master_keys.txt",
    "contentFormat": "extracted",
    "intent": "extract",
    "extractionPrompt": "Extract all content from the document",
    "usageHint": "Use extracted content from master_keys.txt",
    "sourceAction": "extraction.extractContent"
  },
  "data": "# PowerOn Master Keys\r\n# Generated on: 1758489953.7492533\r\n# WARNING: Keep this file secure and never commit to version control!\r\n\r\nprod = hb7qNiURT1GHWbjOhVJYsp21MW5YRLI9SNRAhqgtPB0=\r\nint = 9JZ201a7UMeBNsCHJfs1GdgdPoou0zrLYrcNBzvDmv0=\r\ndev = 0tfyQI59HEETi6F0bMyInVSmI0XCMWMGcO7WnZ5i988=\r\n\r\nPoweron is the 1. agent in my life\r\n\r\nFugiKap%77\r\n\r\nAPI key anthropic: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\r\n\r\nF^065319580883at\r\n\r\nevent:The 1st Poweron Event\r\n\r\n\r\nPAT GitHub: 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\r\n\r\n-----BEGIN OPENSSH PRIVATE KEY-----\r\nb3BlbnNzaC1rZXktdjEAAAAABG5vbmUAAAAEbm9uZQAAAAAAAAABAAAAMwAAAAtzc2gtZW\r\nQyNTUxOQAAACDofCk2DPsKSUrH1XhypZYpJTnkRNj0ju7yeAu9JCdPlgAAAKCrNeHXqzXh\r\n1wAAAAtzc2gtZWQyNTUxOQAAACDofCk2DPsKSUrH1XhypZYpJTnkRNj0ju7yeAu9JCdPlg\r\nAAAEC6+y04Tm13npqKvd6EH5YL6xhp0RODCSv9I02tglNaoOh8KTYM+wpJSsfVeHKllikl\r\nOeRE2PSO7vJ4C70kJ0+WAAAAFnlvdXJfZW1haWxAZXhhbXBsZS5jb20BAgMEBQYH\r\n-----END OPENSSH PRIVATE KEY-----\r\n\r\n"
 }
--- a/D:/Athi/Local/Web/poweron/local/debug/prompts/20260526-091920-002-extraction_result_master_keys.txt.txt
+++ b/D:/Athi/Local/Web/poweron/local/debug/prompts/20260526-091920-002-extraction_result_master_keys.txt.txt
@ -0,0 +1,25 @@
 {
  "documentName": "master_keys.txt",
  "documentMimeType": "text/plain",
  "partsCount": 1,
  "parts": [
    {
      "typeGroup": "text",
      "mimeType": "text/plain",
      "label": "main",
      "dataLength": 1010,
      "metadata": {
        "size": 1010,
        "documentId": "ddaa7167-dd3c-49c1-86b0-5e56af5efd86",
        "documentMimeType": "text/plain",
        "originalFileName": "master_keys.txt",
        "contentFormat": "extracted",
        "intent": "extract",
        "extractionPrompt": "Extract all content from the document",
        "usageHint": "Use extracted content from master_keys.txt",
        "sourceAction": "extraction.extractContent"
      },
      "dataPreview": "[Large data: 1010 chars - truncated]"
    }
  ]
 }
--- a/modules/features/graphicalEditor/nodeDefinitions/context.py
+++ b/modules/features/graphicalEditor/nodeDefinitions/context.py
@ -4,6 +4,9 @@
 from modules.shared.i18nRegistry import t
 from modules.features.graphicalEditor.nodeDefinitions.contextPickerHelp import (
    CONTEXT_BUILDER_PARAM_DESCRIPTION,
 )
 from modules.features.graphicalEditor.nodeDefinitions.flow import (
    CONTEXT_ENVELOPE_DATA_PICK_OPTIONS,
    CONTEXT_MERGE_ACTION_RESULT_DATA_PICK_OPTIONS,
@ -37,9 +40,9 @@ CONTEXT_NODES = [
        ),
        "injectRunContext": True,
        "parameters": [
-            {"name": "documentList", "type": "str", "required": True, "frontendType": "hidden",
+            {"name": "context", "type": "Any", "required": False, "frontendType": "contextBuilder",
-             "description": t("Dokumentenliste (via Wire oder DataRef)"), "default": "",
+             "description": CONTEXT_BUILDER_PARAM_DESCRIPTION, "default": "",
-             "graphInherit": {"port": 0, "kind": "documentListWire"}},
+             "graphInherit": {"port": 0, "kind": "primaryTextRef"}},
            {
                "name": "contentFilter",
                "type": "str",
--- a/modules/routes/routeDataFiles.py
+++ b/modules/routes/routeDataFiles.py
@ -668,6 +668,7 @@ def get_files(
    pagination: Optional[str] = Query(None, description="JSON-encoded PaginationParams object"),
    mode: Optional[str] = Query(None, description="'filterValues' for distinct column values, 'ids' for all filtered IDs"),
    column: Optional[str] = Query(None, description="Column key (required when mode=filterValues)"),
    owner: str = Query("me", description="'all' | 'me' | 'shared'"),
    currentUser: User = Depends(getCurrentUser),
    context: RequestContext = Depends(getRequestContext)
 ):
@ -699,8 +700,9 @@ def get_files(
        from modules.routes.routeHelpers import (
            handleIdsMode,
            handleIdsInMemory,
            handleFilterValuesInMemory,
-            resolveView, applyViewToParams, buildGroupLayout, effective_group_by_levels,
+            resolveView, applyViewToParams, buildGroupLayout, effective_group_by_levels, paginateInMemory,
        )
        import modules.interfaces.interfaceDbApp as _appIface
        from modules.datamodels.datamodelPagination import AppliedViewMeta
@ -711,6 +713,10 @@ def get_files(
            featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None
        )
        appInterface = _appIface.getInterface(currentUser)
        owner_mode = (owner or "me").strip().lower()
        if owner_mode not in ("all", "me", "shared"):
            raise HTTPException(status_code=400, detail="owner must be 'all', 'me', or 'shared'")
        current_user_id = str(getattr(currentUser, "id", "") or "")
        # Resolve view and merge config into params
        viewKey = paginationParams.viewKey if paginationParams else None
@ -722,6 +728,17 @@ def get_files(
        def _filesToDicts(fileItems):
            return [f.model_dump() if hasattr(f, "model_dump") else (dict(f) if not isinstance(f, dict) else f) for f in fileItems]
        def _apply_owner_filter(item_dicts):
            if owner_mode == "all":
                return item_dicts
            if owner_mode == "me":
                return [item for item in item_dicts if str(item.get("sysCreatedBy") or "") == current_user_id]
            return [item for item in item_dicts if str(item.get("sysCreatedBy") or "") != current_user_id]
        recordFilter = None
        if owner_mode == "me":
            recordFilter = {"sysCreatedBy": managementInterface.userId}
        if mode == "groupSummary":
            if not pagination:
                raise HTTPException(status_code=400, detail="pagination required for groupSummary")
@ -736,11 +753,12 @@ def get_files(
                )
            field = groupByLevels[0]["field"]
            null_label = str(groupByLevels[0].get("nullLabel") or "—")
-            allFiles = managementInterface.getAllFiles()
+            allFiles = managementInterface.getAllFiles(recordFilter=recordFilter)
            allItems = enrichRowsWithFkLabels(
                _filesToDicts(allFiles if isinstance(allFiles, list) else (allFiles.items if hasattr(allFiles, "items") else [])),
                FileItem,
            )
            allItems = _apply_owner_filter(allItems)
            filtered = apply_strategy_b_filters_and_sort(allItems, paginationParams, currentUser)
            groups_out = build_group_summary_groups(filtered, field, null_label, groupByLevels=groupByLevels)
            return JSONResponse(content={"groups": groups_out})
@ -748,48 +766,35 @@ def get_files(
        if mode == "filterValues":
            if not column:
                raise HTTPException(status_code=400, detail="column parameter required for mode=filterValues")
-            allFiles = managementInterface.getAllFiles()
+            allFiles = managementInterface.getAllFiles(recordFilter=recordFilter)
            items = allFiles if isinstance(allFiles, list) else (allFiles.items if hasattr(allFiles, "items") else [])
            itemDicts = _filesToDicts(items)
            itemDicts = _apply_owner_filter(itemDicts)
            enrichRowsWithFkLabels(itemDicts, FileItem)
            return handleFilterValuesInMemory(itemDicts, column, pagination)
        if mode == "ids":
-            recordFilter = {"sysCreatedBy": managementInterface.userId}
+            if owner_mode == "me":
-            return handleIdsMode(managementInterface.db, FileItem, pagination, recordFilter)
+                return handleIdsMode(managementInterface.db, FileItem, pagination, recordFilter)
            allFiles = managementInterface.getAllFiles(recordFilter=recordFilter)
            items = allFiles if isinstance(allFiles, list) else (allFiles.items if hasattr(allFiles, "items") else [])
            itemDicts = _apply_owner_filter(_filesToDicts(items))
            enrichRowsWithFkLabels(itemDicts, FileItem)
            return handleIdsInMemory(itemDicts, pagination)
-        if not groupByLevels:
+        # Strategy B: load visible list first, then filter/sort/paginate in memory.
-            # No grouping: let DB handle pagination directly (fastest path)
+        # This is required for files because internal workflow artefacts are
-            result = managementInterface.getAllFiles(pagination=paginationParams)
+        # suppressed after record loading; SQL-level COUNT/LIMIT would otherwise
-            if paginationParams and hasattr(result, 'items'):
+        # count hidden rows and produce pages with only a handful of visible items.
-                enriched = enrichRowsWithFkLabels(_filesToDicts(result.items), FileItem)
+        allFiles = managementInterface.getAllFiles(recordFilter=recordFilter)
                resp: dict = {
                    "items": enriched,
                    "pagination": PaginationMetadata(
                        currentPage=paginationParams.page,
                        pageSize=paginationParams.pageSize,
                        totalItems=result.totalItems,
                        totalPages=result.totalPages,
                        sort=paginationParams.sort,
                        filters=paginationParams.filters
                    ).model_dump(),
                }
            else:
                items = result if isinstance(result, list) else (result.items if hasattr(result, "items") else [result])
                resp = {"items": enrichRowsWithFkLabels(_filesToDicts(items), FileItem), "pagination": None}
            if viewMeta:
                resp["appliedView"] = viewMeta.model_dump()
            return resp
        # Strategy B grouping: load full list, group, then slice
        allFiles = managementInterface.getAllFiles()
        allItems = enrichRowsWithFkLabels(
            _filesToDicts(allFiles if isinstance(allFiles, list) else (allFiles.items if hasattr(allFiles, "items") else [])),
            FileItem,
        )
        allItems = _apply_owner_filter(allItems)
        from modules.routes.routeHelpers import apply_strategy_b_filters_and_sort
-        if paginationParams.filters or paginationParams.sort:
+        if paginationParams and (paginationParams.filters or paginationParams.sort):
            allItems = apply_strategy_b_filters_and_sort(allItems, paginationParams, currentUser)
        if not paginationParams:
@ -798,6 +803,24 @@ def get_files(
                resp["appliedView"] = viewMeta.model_dump()
            return resp
        if not groupByLevels:
            page_items, totalItems = paginateInMemory(allItems, paginationParams)
            totalPages = math.ceil(totalItems / paginationParams.pageSize) if totalItems > 0 else 0
            resp = {
                "items": page_items,
                "pagination": PaginationMetadata(
                    currentPage=paginationParams.page,
                    pageSize=paginationParams.pageSize,
                    totalItems=totalItems,
                    totalPages=totalPages,
                    sort=paginationParams.sort,
                    filters=paginationParams.filters
                ).model_dump(),
            }
            if viewMeta:
                resp["appliedView"] = viewMeta.model_dump()
            return resp
        totalItems = len(allItems)
        totalPages = math.ceil(totalItems / paginationParams.pageSize) if totalItems > 0 else 0
        page_items, groupLayout = buildGroupLayout(allItems, groupByLevels, paginationParams.page, paginationParams.pageSize)
--- a/modules/workflows/methods/methodContext/actions/extractContent.py
+++ b/modules/workflows/methods/methodContext/actions/extractContent.py
@ -24,7 +24,7 @@ import time
 from typing import Any, Dict, List, Optional, Tuple
 from modules.datamodels.datamodelChat import ActionResult
-from modules.datamodels.datamodelDocref import coerceDocumentReferenceList
+from modules.datamodels.datamodelDocref import DocumentReferenceList, coerceDocumentReferenceList
 from modules.datamodels.datamodelExtraction import ContentExtracted, ExtractionOptions
 logger = logging.getLogger(__name__)
@ -1751,6 +1751,62 @@ def presentation_envelopes_to_document_json(
    }
 def _document_list_from_context(raw: Any, *, _depth: int = 0) -> DocumentReferenceList:
    """Best-effort extraction of document/file references from ``context`` payloads.
    Supports direct DocumentList-like values plus nested shapes commonly produced
    by DataPicker selections, ActionResult wrappers, and file/files containers.
    """
    if _depth > 6 or raw is None or raw == "":
        return DocumentReferenceList(references=[])
    if isinstance(raw, dict) and "fileId" in raw and "id" not in raw and "documentId" not in raw:
        direct = coerceDocumentReferenceList({
            "id": raw.get("fileId"),
            "name": raw.get("fileName") or raw.get("name"),
        })
    else:
        direct = coerceDocumentReferenceList(raw)
    if direct.references:
        return direct
    collected = []
    def _extend_from(value: Any) -> None:
        nested = _document_list_from_context(value, _depth=_depth + 1)
        if nested.references:
            collected.extend(nested.references)
    if isinstance(raw, dict):
        nested_files = raw.get("files")
        if isinstance(nested_files, dict):
            _extend_from(list(nested_files.values()))
        for key in ("documents", "references", "items", "file", "document", "value", "data", "merged", "result", "context"):
            nested = raw.get(key)
            if nested is None or nested is raw:
                continue
            _extend_from(nested)
    elif isinstance(raw, list):
        for item in raw:
            _extend_from(item)
    if not collected:
        return DocumentReferenceList(references=[])
    deduped = []
    seen = set()
    for ref in collected:
        try:
            key = ref.to_string()
        except Exception:
            key = repr(ref)
        if key in seen:
            continue
        seen.add(key)
        deduped.append(ref)
    return DocumentReferenceList(references=deduped)
 async def extractContent(self, parameters: Dict[str, Any]) -> ActionResult:
    operation_id = None
    try:
@ -1758,18 +1814,24 @@ async def extractContent(self, parameters: Dict[str, Any]) -> ActionResult:
        operation_id = f"context_extract_{wf}_{int(time.time())}"
        document_list_param = parameters.get("documentList")
-        if not document_list_param:
+        if document_list_param:
-            return ActionResult.isFailure(error="documentList is required")
+            dl = coerceDocumentReferenceList(document_list_param)
-
+            source = "documentList"
-        dl = coerceDocumentReferenceList(document_list_param)
+        else:
            context_param = parameters.get("context")
            dl = _document_list_from_context(context_param)
            source = "context"
        if not dl.references:
            return ActionResult.isFailure(
                error=(
-                    f"documentList could not be parsed (type={type(document_list_param).__name__}); "
+                    f"{source} could not be parsed into document references "
-                    "expected DocumentReferenceList, list of strings/dicts, or "
+                    f"(type={type((document_list_param if document_list_param else parameters.get('context'))).__name__}); "
-                    "a wrapper dict like {'documents': [...]}"
+                    "expected DocumentReferenceList, list of string/dict refs, "
                    "or a context payload containing file/document refs under keys like "
                    "{documents, files, file, data, value}."
                ),
            )
        logger.info("extractContent resolved %d document reference(s) from %s", len(dl.references), source)
        parent_operation_id = parameters.get("parentOperationId")
        self.services.chat.progressLogStart(
--- a/modules/workflows/methods/methodContext/methodContext.py
+++ b/modules/workflows/methods/methodContext/methodContext.py
@ -68,8 +68,8 @@ class MethodContext(MethodBase):
                        name="documentList",
                        type="DocumentList",
                        frontendType=FrontendType.DOCUMENT_REFERENCE,
-                        required=True,
+                        required=False,
-                        description="Document reference(s) to extract content from",
+                        description="Optional document reference(s) to extract content from. When omitted, extractContent also accepts refs via context.",
                    ),
                    "contentFilter": WorkflowActionParameter(
                        name="contentFilter",