From 4d1a579dbd04434f25417a5895489068aed09da2 Mon Sep 17 00:00:00 2001
From: Ida <i.dittrich@valueon.ch>
Date: Tue, 26 May 2026 11:51:44 +0200
Subject: [PATCH] fix: node inhalt extrahieren kann jetzt context nutzen, file
 page formgenerator und foldertree zeigen gleiche dateien (filter prozess
 konsolidiert und vereinheitlicht)

---
 ...extraction_text_part_1_master_keys.txt.txt | 20 +++++
 ...-002-extraction_result_master_keys.txt.txt | 25 ++++++
 .../nodeDefinitions/context.py                |  9 +-
 modules/routes/routeDataFiles.py              | 85 ++++++++++++-------
 .../methodContext/actions/extractContent.py   | 78 +++++++++++++++--
 .../methods/methodContext/methodContext.py    |  4 +-
 6 files changed, 177 insertions(+), 44 deletions(-)
 create mode 100644 D:/Athi/Local/Web/poweron/local/debug/prompts/20260526-091920-001-extraction_text_part_1_master_keys.txt.txt
 create mode 100644 D:/Athi/Local/Web/poweron/local/debug/prompts/20260526-091920-002-extraction_result_master_keys.txt.txt

diff --git a/D:/Athi/Local/Web/poweron/local/debug/prompts/20260526-091920-001-extraction_text_part_1_master_keys.txt.txt b/D:/Athi/Local/Web/poweron/local/debug/prompts/20260526-091920-001-extraction_text_part_1_master_keys.txt.txt
new file mode 100644
index 00000000..d42fb2fd
--- /dev/null
+++ b/D:/Athi/Local/Web/poweron/local/debug/prompts/20260526-091920-001-extraction_text_part_1_master_keys.txt.txt
@@ -0,0 +1,20 @@
+{
+  "partIndex": 1,
+  "partId": "9c6e7733-adfb-412b-ab8c-2131b901445a",
+  "typeGroup": "text",
+  "mimeType": "text/plain",
+  "label": "main",
+  "dataLength": 1010,
+  "metadata": {
+    "size": 1010,
+    "documentId": "ddaa7167-dd3c-49c1-86b0-5e56af5efd86",
+    "documentMimeType": "text/plain",
+    "originalFileName": "master_keys.txt",
+    "contentFormat": "extracted",
+    "intent": "extract",
+    "extractionPrompt": "Extract all content from the document",
+    "usageHint": "Use extracted content from master_keys.txt",
+    "sourceAction": "extraction.extractContent"
+  },
+  "data": "# PowerOn Master Keys\r\n# Generated on: 1758489953.7492533\r\n# WARNING: Keep this file secure and never commit to version control!\r\n\r\nprod = hb7qNiURT1GHWbjOhVJYsp21MW5YRLI9SNRAhqgtPB0=\r\nint = 9JZ201a7UMeBNsCHJfs1GdgdPoou0zrLYrcNBzvDmv0=\r\ndev = 0tfyQI59HEETi6F0bMyInVSmI0XCMWMGcO7WnZ5i988=\r\n\r\nPoweron is the 1. agent in my life\r\n\r\nFugiKap%77\r\n\r\nAPI key anthropic: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\r\n\r\nF^065319580883at\r\n\r\nevent:The 1st Poweron Event\r\n\r\n\r\nPAT GitHub: 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\r\n\r\n-----BEGIN OPENSSH PRIVATE KEY-----\r\nb3BlbnNzaC1rZXktdjEAAAAABG5vbmUAAAAEbm9uZQAAAAAAAAABAAAAMwAAAAtzc2gtZW\r\nQyNTUxOQAAACDofCk2DPsKSUrH1XhypZYpJTnkRNj0ju7yeAu9JCdPlgAAAKCrNeHXqzXh\r\n1wAAAAtzc2gtZWQyNTUxOQAAACDofCk2DPsKSUrH1XhypZYpJTnkRNj0ju7yeAu9JCdPlg\r\nAAAEC6+y04Tm13npqKvd6EH5YL6xhp0RODCSv9I02tglNaoOh8KTYM+wpJSsfVeHKllikl\r\nOeRE2PSO7vJ4C70kJ0+WAAAAFnlvdXJfZW1haWxAZXhhbXBsZS5jb20BAgMEBQYH\r\n-----END OPENSSH PRIVATE KEY-----\r\n\r\n"
+}
\ No newline at end of file
diff --git a/D:/Athi/Local/Web/poweron/local/debug/prompts/20260526-091920-002-extraction_result_master_keys.txt.txt b/D:/Athi/Local/Web/poweron/local/debug/prompts/20260526-091920-002-extraction_result_master_keys.txt.txt
new file mode 100644
index 00000000..d9dd9741
--- /dev/null
+++ b/D:/Athi/Local/Web/poweron/local/debug/prompts/20260526-091920-002-extraction_result_master_keys.txt.txt
@@ -0,0 +1,25 @@
+{
+  "documentName": "master_keys.txt",
+  "documentMimeType": "text/plain",
+  "partsCount": 1,
+  "parts": [
+    {
+      "typeGroup": "text",
+      "mimeType": "text/plain",
+      "label": "main",
+      "dataLength": 1010,
+      "metadata": {
+        "size": 1010,
+        "documentId": "ddaa7167-dd3c-49c1-86b0-5e56af5efd86",
+        "documentMimeType": "text/plain",
+        "originalFileName": "master_keys.txt",
+        "contentFormat": "extracted",
+        "intent": "extract",
+        "extractionPrompt": "Extract all content from the document",
+        "usageHint": "Use extracted content from master_keys.txt",
+        "sourceAction": "extraction.extractContent"
+      },
+      "dataPreview": "[Large data: 1010 chars - truncated]"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/modules/features/graphicalEditor/nodeDefinitions/context.py b/modules/features/graphicalEditor/nodeDefinitions/context.py
index 743d92e8..3171f58a 100644
--- a/modules/features/graphicalEditor/nodeDefinitions/context.py
+++ b/modules/features/graphicalEditor/nodeDefinitions/context.py
@@ -4,6 +4,9 @@
 
 from modules.shared.i18nRegistry import t
 
+from modules.features.graphicalEditor.nodeDefinitions.contextPickerHelp import (
+    CONTEXT_BUILDER_PARAM_DESCRIPTION,
+)
 from modules.features.graphicalEditor.nodeDefinitions.flow import (
     CONTEXT_ENVELOPE_DATA_PICK_OPTIONS,
     CONTEXT_MERGE_ACTION_RESULT_DATA_PICK_OPTIONS,
@@ -37,9 +40,9 @@ CONTEXT_NODES = [
         ),
         "injectRunContext": True,
         "parameters": [
-            {"name": "documentList", "type": "str", "required": True, "frontendType": "hidden",
-             "description": t("Dokumentenliste (via Wire oder DataRef)"), "default": "",
-             "graphInherit": {"port": 0, "kind": "documentListWire"}},
+            {"name": "context", "type": "Any", "required": False, "frontendType": "contextBuilder",
+             "description": CONTEXT_BUILDER_PARAM_DESCRIPTION, "default": "",
+             "graphInherit": {"port": 0, "kind": "primaryTextRef"}},
             {
                 "name": "contentFilter",
                 "type": "str",
diff --git a/modules/routes/routeDataFiles.py b/modules/routes/routeDataFiles.py
index 4bcbcf8f..eb44db44 100644
--- a/modules/routes/routeDataFiles.py
+++ b/modules/routes/routeDataFiles.py
@@ -668,6 +668,7 @@ def get_files(
     pagination: Optional[str] = Query(None, description="JSON-encoded PaginationParams object"),
     mode: Optional[str] = Query(None, description="'filterValues' for distinct column values, 'ids' for all filtered IDs"),
     column: Optional[str] = Query(None, description="Column key (required when mode=filterValues)"),
+    owner: str = Query("me", description="'all' | 'me' | 'shared'"),
     currentUser: User = Depends(getCurrentUser),
     context: RequestContext = Depends(getRequestContext)
 ):
@@ -699,8 +700,9 @@ def get_files(
 
         from modules.routes.routeHelpers import (
             handleIdsMode,
+            handleIdsInMemory,
             handleFilterValuesInMemory,
-            resolveView, applyViewToParams, buildGroupLayout, effective_group_by_levels,
+            resolveView, applyViewToParams, buildGroupLayout, effective_group_by_levels, paginateInMemory,
         )
         import modules.interfaces.interfaceDbApp as _appIface
         from modules.datamodels.datamodelPagination import AppliedViewMeta
@@ -711,6 +713,10 @@ def get_files(
             featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None
         )
         appInterface = _appIface.getInterface(currentUser)
+        owner_mode = (owner or "me").strip().lower()
+        if owner_mode not in ("all", "me", "shared"):
+            raise HTTPException(status_code=400, detail="owner must be 'all', 'me', or 'shared'")
+        current_user_id = str(getattr(currentUser, "id", "") or "")
 
         # Resolve view and merge config into params
         viewKey = paginationParams.viewKey if paginationParams else None
@@ -722,6 +728,17 @@ def get_files(
         def _filesToDicts(fileItems):
             return [f.model_dump() if hasattr(f, "model_dump") else (dict(f) if not isinstance(f, dict) else f) for f in fileItems]
 
+        def _apply_owner_filter(item_dicts):
+            if owner_mode == "all":
+                return item_dicts
+            if owner_mode == "me":
+                return [item for item in item_dicts if str(item.get("sysCreatedBy") or "") == current_user_id]
+            return [item for item in item_dicts if str(item.get("sysCreatedBy") or "") != current_user_id]
+
+        recordFilter = None
+        if owner_mode == "me":
+            recordFilter = {"sysCreatedBy": managementInterface.userId}
+
         if mode == "groupSummary":
             if not pagination:
                 raise HTTPException(status_code=400, detail="pagination required for groupSummary")
@@ -736,11 +753,12 @@ def get_files(
                 )
             field = groupByLevels[0]["field"]
             null_label = str(groupByLevels[0].get("nullLabel") or "—")
-            allFiles = managementInterface.getAllFiles()
+            allFiles = managementInterface.getAllFiles(recordFilter=recordFilter)
             allItems = enrichRowsWithFkLabels(
                 _filesToDicts(allFiles if isinstance(allFiles, list) else (allFiles.items if hasattr(allFiles, "items") else [])),
                 FileItem,
             )
+            allItems = _apply_owner_filter(allItems)
             filtered = apply_strategy_b_filters_and_sort(allItems, paginationParams, currentUser)
             groups_out = build_group_summary_groups(filtered, field, null_label, groupByLevels=groupByLevels)
             return JSONResponse(content={"groups": groups_out})
@@ -748,48 +766,35 @@ def get_files(
         if mode == "filterValues":
             if not column:
                 raise HTTPException(status_code=400, detail="column parameter required for mode=filterValues")
-            allFiles = managementInterface.getAllFiles()
+            allFiles = managementInterface.getAllFiles(recordFilter=recordFilter)
             items = allFiles if isinstance(allFiles, list) else (allFiles.items if hasattr(allFiles, "items") else [])
             itemDicts = _filesToDicts(items)
+            itemDicts = _apply_owner_filter(itemDicts)
             enrichRowsWithFkLabels(itemDicts, FileItem)
             return handleFilterValuesInMemory(itemDicts, column, pagination)
 
         if mode == "ids":
-            recordFilter = {"sysCreatedBy": managementInterface.userId}
-            return handleIdsMode(managementInterface.db, FileItem, pagination, recordFilter)
+            if owner_mode == "me":
+                return handleIdsMode(managementInterface.db, FileItem, pagination, recordFilter)
+            allFiles = managementInterface.getAllFiles(recordFilter=recordFilter)
+            items = allFiles if isinstance(allFiles, list) else (allFiles.items if hasattr(allFiles, "items") else [])
+            itemDicts = _apply_owner_filter(_filesToDicts(items))
+            enrichRowsWithFkLabels(itemDicts, FileItem)
+            return handleIdsInMemory(itemDicts, pagination)
 
-        if not groupByLevels:
-            # No grouping: let DB handle pagination directly (fastest path)
-            result = managementInterface.getAllFiles(pagination=paginationParams)
-            if paginationParams and hasattr(result, 'items'):
-                enriched = enrichRowsWithFkLabels(_filesToDicts(result.items), FileItem)
-                resp: dict = {
-                    "items": enriched,
-                    "pagination": PaginationMetadata(
-                        currentPage=paginationParams.page,
-                        pageSize=paginationParams.pageSize,
-                        totalItems=result.totalItems,
-                        totalPages=result.totalPages,
-                        sort=paginationParams.sort,
-                        filters=paginationParams.filters
-                    ).model_dump(),
-                }
-            else:
-                items = result if isinstance(result, list) else (result.items if hasattr(result, "items") else [result])
-                resp = {"items": enrichRowsWithFkLabels(_filesToDicts(items), FileItem), "pagination": None}
-            if viewMeta:
-                resp["appliedView"] = viewMeta.model_dump()
-            return resp
-
-        # Strategy B grouping: load full list, group, then slice
-        allFiles = managementInterface.getAllFiles()
+        # Strategy B: load visible list first, then filter/sort/paginate in memory.
+        # This is required for files because internal workflow artefacts are
+        # suppressed after record loading; SQL-level COUNT/LIMIT would otherwise
+        # count hidden rows and produce pages with only a handful of visible items.
+        allFiles = managementInterface.getAllFiles(recordFilter=recordFilter)
         allItems = enrichRowsWithFkLabels(
             _filesToDicts(allFiles if isinstance(allFiles, list) else (allFiles.items if hasattr(allFiles, "items") else [])),
             FileItem,
         )
+        allItems = _apply_owner_filter(allItems)
 
         from modules.routes.routeHelpers import apply_strategy_b_filters_and_sort
-        if paginationParams.filters or paginationParams.sort:
+        if paginationParams and (paginationParams.filters or paginationParams.sort):
             allItems = apply_strategy_b_filters_and_sort(allItems, paginationParams, currentUser)
 
         if not paginationParams:
@@ -798,6 +803,24 @@ def get_files(
                 resp["appliedView"] = viewMeta.model_dump()
             return resp
 
+        if not groupByLevels:
+            page_items, totalItems = paginateInMemory(allItems, paginationParams)
+            totalPages = math.ceil(totalItems / paginationParams.pageSize) if totalItems > 0 else 0
+            resp = {
+                "items": page_items,
+                "pagination": PaginationMetadata(
+                    currentPage=paginationParams.page,
+                    pageSize=paginationParams.pageSize,
+                    totalItems=totalItems,
+                    totalPages=totalPages,
+                    sort=paginationParams.sort,
+                    filters=paginationParams.filters
+                ).model_dump(),
+            }
+            if viewMeta:
+                resp["appliedView"] = viewMeta.model_dump()
+            return resp
+
         totalItems = len(allItems)
         totalPages = math.ceil(totalItems / paginationParams.pageSize) if totalItems > 0 else 0
         page_items, groupLayout = buildGroupLayout(allItems, groupByLevels, paginationParams.page, paginationParams.pageSize)
diff --git a/modules/workflows/methods/methodContext/actions/extractContent.py b/modules/workflows/methods/methodContext/actions/extractContent.py
index 52d07b34..f6674124 100644
--- a/modules/workflows/methods/methodContext/actions/extractContent.py
+++ b/modules/workflows/methods/methodContext/actions/extractContent.py
@@ -24,7 +24,7 @@ import time
 from typing import Any, Dict, List, Optional, Tuple
 
 from modules.datamodels.datamodelChat import ActionResult
-from modules.datamodels.datamodelDocref import coerceDocumentReferenceList
+from modules.datamodels.datamodelDocref import DocumentReferenceList, coerceDocumentReferenceList
 from modules.datamodels.datamodelExtraction import ContentExtracted, ExtractionOptions
 
 logger = logging.getLogger(__name__)
@@ -1751,6 +1751,62 @@ def presentation_envelopes_to_document_json(
     }
 
 
+def _document_list_from_context(raw: Any, *, _depth: int = 0) -> DocumentReferenceList:
+    """Best-effort extraction of document/file references from ``context`` payloads.
+
+    Supports direct DocumentList-like values plus nested shapes commonly produced
+    by DataPicker selections, ActionResult wrappers, and file/files containers.
+    """
+    if _depth > 6 or raw is None or raw == "":
+        return DocumentReferenceList(references=[])
+
+    if isinstance(raw, dict) and "fileId" in raw and "id" not in raw and "documentId" not in raw:
+        direct = coerceDocumentReferenceList({
+            "id": raw.get("fileId"),
+            "name": raw.get("fileName") or raw.get("name"),
+        })
+    else:
+        direct = coerceDocumentReferenceList(raw)
+    if direct.references:
+        return direct
+
+    collected = []
+
+    def _extend_from(value: Any) -> None:
+        nested = _document_list_from_context(value, _depth=_depth + 1)
+        if nested.references:
+            collected.extend(nested.references)
+
+    if isinstance(raw, dict):
+        nested_files = raw.get("files")
+        if isinstance(nested_files, dict):
+            _extend_from(list(nested_files.values()))
+        for key in ("documents", "references", "items", "file", "document", "value", "data", "merged", "result", "context"):
+            nested = raw.get(key)
+            if nested is None or nested is raw:
+                continue
+            _extend_from(nested)
+    elif isinstance(raw, list):
+        for item in raw:
+            _extend_from(item)
+
+    if not collected:
+        return DocumentReferenceList(references=[])
+
+    deduped = []
+    seen = set()
+    for ref in collected:
+        try:
+            key = ref.to_string()
+        except Exception:
+            key = repr(ref)
+        if key in seen:
+            continue
+        seen.add(key)
+        deduped.append(ref)
+    return DocumentReferenceList(references=deduped)
+
+
 async def extractContent(self, parameters: Dict[str, Any]) -> ActionResult:
     operation_id = None
     try:
@@ -1758,18 +1814,24 @@ async def extractContent(self, parameters: Dict[str, Any]) -> ActionResult:
         operation_id = f"context_extract_{wf}_{int(time.time())}"
 
         document_list_param = parameters.get("documentList")
-        if not document_list_param:
-            return ActionResult.isFailure(error="documentList is required")
-
-        dl = coerceDocumentReferenceList(document_list_param)
+        if document_list_param:
+            dl = coerceDocumentReferenceList(document_list_param)
+            source = "documentList"
+        else:
+            context_param = parameters.get("context")
+            dl = _document_list_from_context(context_param)
+            source = "context"
         if not dl.references:
             return ActionResult.isFailure(
                 error=(
-                    f"documentList could not be parsed (type={type(document_list_param).__name__}); "
-                    "expected DocumentReferenceList, list of strings/dicts, or "
-                    "a wrapper dict like {'documents': [...]}"
+                    f"{source} could not be parsed into document references "
+                    f"(type={type((document_list_param if document_list_param else parameters.get('context'))).__name__}); "
+                    "expected DocumentReferenceList, list of string/dict refs, "
+                    "or a context payload containing file/document refs under keys like "
+                    "{documents, files, file, data, value}."
                 ),
             )
+        logger.info("extractContent resolved %d document reference(s) from %s", len(dl.references), source)
 
         parent_operation_id = parameters.get("parentOperationId")
         self.services.chat.progressLogStart(
diff --git a/modules/workflows/methods/methodContext/methodContext.py b/modules/workflows/methods/methodContext/methodContext.py
index 80e0c089..43a97f66 100644
--- a/modules/workflows/methods/methodContext/methodContext.py
+++ b/modules/workflows/methods/methodContext/methodContext.py
@@ -68,8 +68,8 @@ class MethodContext(MethodBase):
                         name="documentList",
                         type="DocumentList",
                         frontendType=FrontendType.DOCUMENT_REFERENCE,
-                        required=True,
-                        description="Document reference(s) to extract content from",
+                        required=False,
+                        description="Optional document reference(s) to extract content from. When omitted, extractContent also accepts refs via context.",
                     ),
                     "contentFilter": WorkflowActionParameter(
                         name="contentFilter",