fix: node inhalt extrahieren kann jetzt context nutzen, file page formgenerator und foldertree zeigen gleiche dateien (filter prozess konsolidiert und vereinheitlicht)

2026-05-26 11:51:44 +02:00 · 2026-05-26 11:51:44 +02:00 · 4d1a579dbd
commit 4d1a579dbd
parent c097b28b6c
6 changed files with 177 additions and 44 deletions
--- a/D:/Athi/Local/Web/poweron/local/debug/prompts/20260526-091920-001-extraction_text_part_1_master_keys.txt.txt
+++ b/D:/Athi/Local/Web/poweron/local/debug/prompts/20260526-091920-001-extraction_text_part_1_master_keys.txt.txt
@ -0,0 +1,20 @@
+{
+  "partIndex": 1,
+  "partId": "9c6e7733-adfb-412b-ab8c-2131b901445a",
+  "typeGroup": "text",
+  "mimeType": "text/plain",
+  "label": "main",
+  "dataLength": 1010,
+  "metadata": {
+    "size": 1010,
+    "documentId": "ddaa7167-dd3c-49c1-86b0-5e56af5efd86",
+    "documentMimeType": "text/plain",
+    "originalFileName": "master_keys.txt",
+    "contentFormat": "extracted",
+    "intent": "extract",
+    "extractionPrompt": "Extract all content from the document",
+    "usageHint": "Use extracted content from master_keys.txt",
+    "sourceAction": "extraction.extractContent"
+  },
+  "data": "# PowerOn Master Keys\r\n# Generated on: 1758489953.7492533\r\n# WARNING: Keep this file secure and never commit to version control!\r\n\r\nprod = hb7qNiURT1GHWbjOhVJYsp21MW5YRLI9SNRAhqgtPB0=\r\nint = 9JZ201a7UMeBNsCHJfs1GdgdPoou0zrLYrcNBzvDmv0=\r\ndev = 0tfyQI59HEETi6F0bMyInVSmI0XCMWMGcO7WnZ5i988=\r\n\r\nPoweron is the 1. agent in my life\r\n\r\nFugiKap%77\r\n\r\nAPI key anthropic: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\r\n\r\nF^065319580883at\r\n\r\nevent:The 1st Poweron Event\r\n\r\n\r\nPAT GitHub: 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\r\n\r\n-----BEGIN OPENSSH PRIVATE KEY-----\r\nb3BlbnNzaC1rZXktdjEAAAAABG5vbmUAAAAEbm9uZQAAAAAAAAABAAAAMwAAAAtzc2gtZW\r\nQyNTUxOQAAACDofCk2DPsKSUrH1XhypZYpJTnkRNj0ju7yeAu9JCdPlgAAAKCrNeHXqzXh\r\n1wAAAAtzc2gtZWQyNTUxOQAAACDofCk2DPsKSUrH1XhypZYpJTnkRNj0ju7yeAu9JCdPlg\r\nAAAEC6+y04Tm13npqKvd6EH5YL6xhp0RODCSv9I02tglNaoOh8KTYM+wpJSsfVeHKllikl\r\nOeRE2PSO7vJ4C70kJ0+WAAAAFnlvdXJfZW1haWxAZXhhbXBsZS5jb20BAgMEBQYH\r\n-----END OPENSSH PRIVATE KEY-----\r\n\r\n"
+}
--- a/D:/Athi/Local/Web/poweron/local/debug/prompts/20260526-091920-002-extraction_result_master_keys.txt.txt
+++ b/D:/Athi/Local/Web/poweron/local/debug/prompts/20260526-091920-002-extraction_result_master_keys.txt.txt
@ -0,0 +1,25 @@
+{
+  "documentName": "master_keys.txt",
+  "documentMimeType": "text/plain",
+  "partsCount": 1,
+  "parts": [
+    {
+      "typeGroup": "text",
+      "mimeType": "text/plain",
+      "label": "main",
+      "dataLength": 1010,
+      "metadata": {
+        "size": 1010,
+        "documentId": "ddaa7167-dd3c-49c1-86b0-5e56af5efd86",
+        "documentMimeType": "text/plain",
+        "originalFileName": "master_keys.txt",
+        "contentFormat": "extracted",
+        "intent": "extract",
+        "extractionPrompt": "Extract all content from the document",
+        "usageHint": "Use extracted content from master_keys.txt",
+        "sourceAction": "extraction.extractContent"
+      },
+      "dataPreview": "[Large data: 1010 chars - truncated]"
+    }
+  ]
+}
--- a/modules/features/graphicalEditor/nodeDefinitions/context.py
+++ b/modules/features/graphicalEditor/nodeDefinitions/context.py
@ -4,6 +4,9 @@

 from modules.shared.i18nRegistry import t

+from modules.features.graphicalEditor.nodeDefinitions.contextPickerHelp import (
+    CONTEXT_BUILDER_PARAM_DESCRIPTION,
+)
 from modules.features.graphicalEditor.nodeDefinitions.flow import (
    CONTEXT_ENVELOPE_DATA_PICK_OPTIONS,
    CONTEXT_MERGE_ACTION_RESULT_DATA_PICK_OPTIONS,
@ -37,9 +40,9 @@ CONTEXT_NODES = [
        ),
        "injectRunContext": True,
        "parameters": [
-            {"name": "documentList", "type": "str", "required": True, "frontendType": "hidden",
-             "description": t("Dokumentenliste (via Wire oder DataRef)"), "default": "",
-             "graphInherit": {"port": 0, "kind": "documentListWire"}},
+            {"name": "context", "type": "Any", "required": False, "frontendType": "contextBuilder",
+             "description": CONTEXT_BUILDER_PARAM_DESCRIPTION, "default": "",
+             "graphInherit": {"port": 0, "kind": "primaryTextRef"}},
            {
                "name": "contentFilter",
                "type": "str",
--- a/modules/routes/routeDataFiles.py
+++ b/modules/routes/routeDataFiles.py
@ -668,6 +668,7 @@ def get_files(
    pagination: Optional[str] = Query(None, description="JSON-encoded PaginationParams object"),
    mode: Optional[str] = Query(None, description="'filterValues' for distinct column values, 'ids' for all filtered IDs"),
    column: Optional[str] = Query(None, description="Column key (required when mode=filterValues)"),
+    owner: str = Query("me", description="'all' | 'me' | 'shared'"),
    currentUser: User = Depends(getCurrentUser),
    context: RequestContext = Depends(getRequestContext)
 ):
@ -699,8 +700,9 @@ def get_files(

        from modules.routes.routeHelpers import (
            handleIdsMode,
+            handleIdsInMemory,
            handleFilterValuesInMemory,
-            resolveView, applyViewToParams, buildGroupLayout, effective_group_by_levels,
+            resolveView, applyViewToParams, buildGroupLayout, effective_group_by_levels, paginateInMemory,
        )
        import modules.interfaces.interfaceDbApp as _appIface
        from modules.datamodels.datamodelPagination import AppliedViewMeta
@ -711,6 +713,10 @@ def get_files(
            featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None
        )
        appInterface = _appIface.getInterface(currentUser)
+        owner_mode = (owner or "me").strip().lower()
+        if owner_mode not in ("all", "me", "shared"):
+            raise HTTPException(status_code=400, detail="owner must be 'all', 'me', or 'shared'")
+        current_user_id = str(getattr(currentUser, "id", "") or "")

        # Resolve view and merge config into params
        viewKey = paginationParams.viewKey if paginationParams else None
@ -722,6 +728,17 @@ def get_files(
        def _filesToDicts(fileItems):
            return [f.model_dump() if hasattr(f, "model_dump") else (dict(f) if not isinstance(f, dict) else f) for f in fileItems]

+        def _apply_owner_filter(item_dicts):
+            if owner_mode == "all":
+                return item_dicts
+            if owner_mode == "me":
+                return [item for item in item_dicts if str(item.get("sysCreatedBy") or "") == current_user_id]
+            return [item for item in item_dicts if str(item.get("sysCreatedBy") or "") != current_user_id]
+
+        recordFilter = None
+        if owner_mode == "me":
+            recordFilter = {"sysCreatedBy": managementInterface.userId}
+
        if mode == "groupSummary":
            if not pagination:
                raise HTTPException(status_code=400, detail="pagination required for groupSummary")
@ -736,11 +753,12 @@ def get_files(
                )
            field = groupByLevels[0]["field"]
            null_label = str(groupByLevels[0].get("nullLabel") or "—")
-            allFiles = managementInterface.getAllFiles()
+            allFiles = managementInterface.getAllFiles(recordFilter=recordFilter)
            allItems = enrichRowsWithFkLabels(
                _filesToDicts(allFiles if isinstance(allFiles, list) else (allFiles.items if hasattr(allFiles, "items") else [])),
                FileItem,
            )
+            allItems = _apply_owner_filter(allItems)
            filtered = apply_strategy_b_filters_and_sort(allItems, paginationParams, currentUser)
            groups_out = build_group_summary_groups(filtered, field, null_label, groupByLevels=groupByLevels)
            return JSONResponse(content={"groups": groups_out})
@ -748,48 +766,35 @@ def get_files(
        if mode == "filterValues":
            if not column:
                raise HTTPException(status_code=400, detail="column parameter required for mode=filterValues")
-            allFiles = managementInterface.getAllFiles()
+            allFiles = managementInterface.getAllFiles(recordFilter=recordFilter)
            items = allFiles if isinstance(allFiles, list) else (allFiles.items if hasattr(allFiles, "items") else [])
            itemDicts = _filesToDicts(items)
+            itemDicts = _apply_owner_filter(itemDicts)
            enrichRowsWithFkLabels(itemDicts, FileItem)
            return handleFilterValuesInMemory(itemDicts, column, pagination)

        if mode == "ids":
-            recordFilter = {"sysCreatedBy": managementInterface.userId}
-            return handleIdsMode(managementInterface.db, FileItem, pagination, recordFilter)
+            if owner_mode == "me":
+                return handleIdsMode(managementInterface.db, FileItem, pagination, recordFilter)
+            allFiles = managementInterface.getAllFiles(recordFilter=recordFilter)
+            items = allFiles if isinstance(allFiles, list) else (allFiles.items if hasattr(allFiles, "items") else [])
+            itemDicts = _apply_owner_filter(_filesToDicts(items))
+            enrichRowsWithFkLabels(itemDicts, FileItem)
+            return handleIdsInMemory(itemDicts, pagination)

-        if not groupByLevels:
-            # No grouping: let DB handle pagination directly (fastest path)
-            result = managementInterface.getAllFiles(pagination=paginationParams)
-            if paginationParams and hasattr(result, 'items'):
-                enriched = enrichRowsWithFkLabels(_filesToDicts(result.items), FileItem)
-                resp: dict = {
-                    "items": enriched,
-                    "pagination": PaginationMetadata(
-                        currentPage=paginationParams.page,
-                        pageSize=paginationParams.pageSize,
-                        totalItems=result.totalItems,
-                        totalPages=result.totalPages,
-                        sort=paginationParams.sort,
-                        filters=paginationParams.filters
-                    ).model_dump(),
-                }
-            else:
-                items = result if isinstance(result, list) else (result.items if hasattr(result, "items") else [result])
-                resp = {"items": enrichRowsWithFkLabels(_filesToDicts(items), FileItem), "pagination": None}
-            if viewMeta:
-                resp["appliedView"] = viewMeta.model_dump()
-            return resp
-
-        # Strategy B grouping: load full list, group, then slice
-        allFiles = managementInterface.getAllFiles()
+        # Strategy B: load visible list first, then filter/sort/paginate in memory.
+        # This is required for files because internal workflow artefacts are
+        # suppressed after record loading; SQL-level COUNT/LIMIT would otherwise
+        # count hidden rows and produce pages with only a handful of visible items.
+        allFiles = managementInterface.getAllFiles(recordFilter=recordFilter)
        allItems = enrichRowsWithFkLabels(
            _filesToDicts(allFiles if isinstance(allFiles, list) else (allFiles.items if hasattr(allFiles, "items") else [])),
            FileItem,
        )
+        allItems = _apply_owner_filter(allItems)

        from modules.routes.routeHelpers import apply_strategy_b_filters_and_sort
-        if paginationParams.filters or paginationParams.sort:
+        if paginationParams and (paginationParams.filters or paginationParams.sort):
            allItems = apply_strategy_b_filters_and_sort(allItems, paginationParams, currentUser)

        if not paginationParams:
@ -798,6 +803,24 @@ def get_files(
                resp["appliedView"] = viewMeta.model_dump()
            return resp

+        if not groupByLevels:
+            page_items, totalItems = paginateInMemory(allItems, paginationParams)
+            totalPages = math.ceil(totalItems / paginationParams.pageSize) if totalItems > 0 else 0
+            resp = {
+                "items": page_items,
+                "pagination": PaginationMetadata(
+                    currentPage=paginationParams.page,
+                    pageSize=paginationParams.pageSize,
+                    totalItems=totalItems,
+                    totalPages=totalPages,
+                    sort=paginationParams.sort,
+                    filters=paginationParams.filters
+                ).model_dump(),
+            }
+            if viewMeta:
+                resp["appliedView"] = viewMeta.model_dump()
+            return resp
+
        totalItems = len(allItems)
        totalPages = math.ceil(totalItems / paginationParams.pageSize) if totalItems > 0 else 0
        page_items, groupLayout = buildGroupLayout(allItems, groupByLevels, paginationParams.page, paginationParams.pageSize)
--- a/modules/workflows/methods/methodContext/actions/extractContent.py
+++ b/modules/workflows/methods/methodContext/actions/extractContent.py
@ -24,7 +24,7 @@ import time
 from typing import Any, Dict, List, Optional, Tuple

 from modules.datamodels.datamodelChat import ActionResult
-from modules.datamodels.datamodelDocref import coerceDocumentReferenceList
+from modules.datamodels.datamodelDocref import DocumentReferenceList, coerceDocumentReferenceList
 from modules.datamodels.datamodelExtraction import ContentExtracted, ExtractionOptions

 logger = logging.getLogger(__name__)
@ -1751,6 +1751,62 @@ def presentation_envelopes_to_document_json(
    }


+def _document_list_from_context(raw: Any, *, _depth: int = 0) -> DocumentReferenceList:
+    """Best-effort extraction of document/file references from ``context`` payloads.
+
+    Supports direct DocumentList-like values plus nested shapes commonly produced
+    by DataPicker selections, ActionResult wrappers, and file/files containers.
+    """
+    if _depth > 6 or raw is None or raw == "":
+        return DocumentReferenceList(references=[])
+
+    if isinstance(raw, dict) and "fileId" in raw and "id" not in raw and "documentId" not in raw:
+        direct = coerceDocumentReferenceList({
+            "id": raw.get("fileId"),
+            "name": raw.get("fileName") or raw.get("name"),
+        })
+    else:
+        direct = coerceDocumentReferenceList(raw)
+    if direct.references:
+        return direct
+
+    collected = []
+
+    def _extend_from(value: Any) -> None:
+        nested = _document_list_from_context(value, _depth=_depth + 1)
+        if nested.references:
+            collected.extend(nested.references)
+
+    if isinstance(raw, dict):
+        nested_files = raw.get("files")
+        if isinstance(nested_files, dict):
+            _extend_from(list(nested_files.values()))
+        for key in ("documents", "references", "items", "file", "document", "value", "data", "merged", "result", "context"):
+            nested = raw.get(key)
+            if nested is None or nested is raw:
+                continue
+            _extend_from(nested)
+    elif isinstance(raw, list):
+        for item in raw:
+            _extend_from(item)
+
+    if not collected:
+        return DocumentReferenceList(references=[])
+
+    deduped = []
+    seen = set()
+    for ref in collected:
+        try:
+            key = ref.to_string()
+        except Exception:
+            key = repr(ref)
+        if key in seen:
+            continue
+        seen.add(key)
+        deduped.append(ref)
+    return DocumentReferenceList(references=deduped)
+
+
 async def extractContent(self, parameters: Dict[str, Any]) -> ActionResult:
    operation_id = None
    try:
@ -1758,18 +1814,24 @@ async def extractContent(self, parameters: Dict[str, Any]) -> ActionResult:
        operation_id = f"context_extract_{wf}_{int(time.time())}"

        document_list_param = parameters.get("documentList")
-        if not document_list_param:
-            return ActionResult.isFailure(error="documentList is required")
-
-        dl = coerceDocumentReferenceList(document_list_param)
+        if document_list_param:
+            dl = coerceDocumentReferenceList(document_list_param)
+            source = "documentList"
+        else:
+            context_param = parameters.get("context")
+            dl = _document_list_from_context(context_param)
+            source = "context"
        if not dl.references:
            return ActionResult.isFailure(
                error=(
-                    f"documentList could not be parsed (type={type(document_list_param).__name__}); "
-                    "expected DocumentReferenceList, list of strings/dicts, or "
-                    "a wrapper dict like {'documents': [...]}"
+                    f"{source} could not be parsed into document references "
+                    f"(type={type((document_list_param if document_list_param else parameters.get('context'))).__name__}); "
+                    "expected DocumentReferenceList, list of string/dict refs, "
+                    "or a context payload containing file/document refs under keys like "
+                    "{documents, files, file, data, value}."
                ),
            )
+        logger.info("extractContent resolved %d document reference(s) from %s", len(dl.references), source)

        parent_operation_id = parameters.get("parentOperationId")
        self.services.chat.progressLogStart(
--- a/modules/workflows/methods/methodContext/methodContext.py
+++ b/modules/workflows/methods/methodContext/methodContext.py
@ -68,8 +68,8 @@ class MethodContext(MethodBase):
                        name="documentList",
                        type="DocumentList",
                        frontendType=FrontendType.DOCUMENT_REFERENCE,
-                        required=True,
-                        description="Document reference(s) to extract content from",
+                        required=False,
+                        description="Optional document reference(s) to extract content from. When omitted, extractContent also accepts refs via context.",
                    ),
                    "contentFilter": WorkflowActionParameter(
                        name="contentFilter",