From 4d1a579dbd04434f25417a5895489068aed09da2 Mon Sep 17 00:00:00 2001 From: Ida Date: Tue, 26 May 2026 11:51:44 +0200 Subject: [PATCH] fix: node inhalt extrahieren kann jetzt context nutzen, file page formgenerator und foldertree zeigen gleiche dateien (filter prozess konsolidiert und vereinheitlicht) --- ...extraction_text_part_1_master_keys.txt.txt | 20 +++++ ...-002-extraction_result_master_keys.txt.txt | 25 ++++++ .../nodeDefinitions/context.py | 9 +- modules/routes/routeDataFiles.py | 85 ++++++++++++------- .../methodContext/actions/extractContent.py | 78 +++++++++++++++-- .../methods/methodContext/methodContext.py | 4 +- 6 files changed, 177 insertions(+), 44 deletions(-) create mode 100644 D:/Athi/Local/Web/poweron/local/debug/prompts/20260526-091920-001-extraction_text_part_1_master_keys.txt.txt create mode 100644 D:/Athi/Local/Web/poweron/local/debug/prompts/20260526-091920-002-extraction_result_master_keys.txt.txt diff --git a/D:/Athi/Local/Web/poweron/local/debug/prompts/20260526-091920-001-extraction_text_part_1_master_keys.txt.txt b/D:/Athi/Local/Web/poweron/local/debug/prompts/20260526-091920-001-extraction_text_part_1_master_keys.txt.txt new file mode 100644 index 00000000..d42fb2fd --- /dev/null +++ b/D:/Athi/Local/Web/poweron/local/debug/prompts/20260526-091920-001-extraction_text_part_1_master_keys.txt.txt @@ -0,0 +1,20 @@ +{ + "partIndex": 1, + "partId": "9c6e7733-adfb-412b-ab8c-2131b901445a", + "typeGroup": "text", + "mimeType": "text/plain", + "label": "main", + "dataLength": 1010, + "metadata": { + "size": 1010, + "documentId": "ddaa7167-dd3c-49c1-86b0-5e56af5efd86", + "documentMimeType": "text/plain", + "originalFileName": "master_keys.txt", + "contentFormat": "extracted", + "intent": "extract", + "extractionPrompt": "Extract all content from the document", + "usageHint": "Use extracted content from master_keys.txt", + "sourceAction": "extraction.extractContent" + }, + "data": "# PowerOn Master Keys\r\n# Generated on: 1758489953.7492533\r\n# WARNING: Keep this file secure and never commit to version control!\r\n\r\nprod = hb7qNiURT1GHWbjOhVJYsp21MW5YRLI9SNRAhqgtPB0=\r\nint = 9JZ201a7UMeBNsCHJfs1GdgdPoou0zrLYrcNBzvDmv0=\r\ndev = 0tfyQI59HEETi6F0bMyInVSmI0XCMWMGcO7WnZ5i988=\r\n\r\nPoweron is the 1. agent in my life\r\n\r\nFugiKap%77\r\n\r\nAPI key anthropic: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\r\n\r\nF^065319580883at\r\n\r\nevent:The 1st Poweron Event\r\n\r\n\r\nPAT GitHub: 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\r\n\r\n-----BEGIN OPENSSH PRIVATE KEY-----\r\nb3BlbnNzaC1rZXktdjEAAAAABG5vbmUAAAAEbm9uZQAAAAAAAAABAAAAMwAAAAtzc2gtZW\r\nQyNTUxOQAAACDofCk2DPsKSUrH1XhypZYpJTnkRNj0ju7yeAu9JCdPlgAAAKCrNeHXqzXh\r\n1wAAAAtzc2gtZWQyNTUxOQAAACDofCk2DPsKSUrH1XhypZYpJTnkRNj0ju7yeAu9JCdPlg\r\nAAAEC6+y04Tm13npqKvd6EH5YL6xhp0RODCSv9I02tglNaoOh8KTYM+wpJSsfVeHKllikl\r\nOeRE2PSO7vJ4C70kJ0+WAAAAFnlvdXJfZW1haWxAZXhhbXBsZS5jb20BAgMEBQYH\r\n-----END OPENSSH PRIVATE KEY-----\r\n\r\n" +} \ No newline at end of file diff --git a/D:/Athi/Local/Web/poweron/local/debug/prompts/20260526-091920-002-extraction_result_master_keys.txt.txt b/D:/Athi/Local/Web/poweron/local/debug/prompts/20260526-091920-002-extraction_result_master_keys.txt.txt new file mode 100644 index 00000000..d9dd9741 --- /dev/null +++ b/D:/Athi/Local/Web/poweron/local/debug/prompts/20260526-091920-002-extraction_result_master_keys.txt.txt @@ -0,0 +1,25 @@ +{ + "documentName": "master_keys.txt", + "documentMimeType": "text/plain", + "partsCount": 1, + "parts": [ + { + "typeGroup": "text", + "mimeType": "text/plain", + "label": "main", + "dataLength": 1010, + "metadata": { + "size": 1010, + "documentId": "ddaa7167-dd3c-49c1-86b0-5e56af5efd86", + "documentMimeType": "text/plain", + "originalFileName": "master_keys.txt", + "contentFormat": "extracted", + "intent": "extract", + "extractionPrompt": "Extract all content from the document", + "usageHint": "Use extracted content from master_keys.txt", + "sourceAction": "extraction.extractContent" + }, + "dataPreview": "[Large data: 1010 chars - truncated]" + } + ] +} \ No newline at end of file diff --git a/modules/features/graphicalEditor/nodeDefinitions/context.py b/modules/features/graphicalEditor/nodeDefinitions/context.py index 743d92e8..3171f58a 100644 --- a/modules/features/graphicalEditor/nodeDefinitions/context.py +++ b/modules/features/graphicalEditor/nodeDefinitions/context.py @@ -4,6 +4,9 @@ from modules.shared.i18nRegistry import t +from modules.features.graphicalEditor.nodeDefinitions.contextPickerHelp import ( + CONTEXT_BUILDER_PARAM_DESCRIPTION, +) from modules.features.graphicalEditor.nodeDefinitions.flow import ( CONTEXT_ENVELOPE_DATA_PICK_OPTIONS, CONTEXT_MERGE_ACTION_RESULT_DATA_PICK_OPTIONS, @@ -37,9 +40,9 @@ CONTEXT_NODES = [ ), "injectRunContext": True, "parameters": [ - {"name": "documentList", "type": "str", "required": True, "frontendType": "hidden", - "description": t("Dokumentenliste (via Wire oder DataRef)"), "default": "", - "graphInherit": {"port": 0, "kind": "documentListWire"}}, + {"name": "context", "type": "Any", "required": False, "frontendType": "contextBuilder", + "description": CONTEXT_BUILDER_PARAM_DESCRIPTION, "default": "", + "graphInherit": {"port": 0, "kind": "primaryTextRef"}}, { "name": "contentFilter", "type": "str", diff --git a/modules/routes/routeDataFiles.py b/modules/routes/routeDataFiles.py index 4bcbcf8f..eb44db44 100644 --- a/modules/routes/routeDataFiles.py +++ b/modules/routes/routeDataFiles.py @@ -668,6 +668,7 @@ def get_files( pagination: Optional[str] = Query(None, description="JSON-encoded PaginationParams object"), mode: Optional[str] = Query(None, description="'filterValues' for distinct column values, 'ids' for all filtered IDs"), column: Optional[str] = Query(None, description="Column key (required when mode=filterValues)"), + owner: str = Query("me", description="'all' | 'me' | 'shared'"), currentUser: User = Depends(getCurrentUser), context: RequestContext = Depends(getRequestContext) ): @@ -699,8 +700,9 @@ def get_files( from modules.routes.routeHelpers import ( handleIdsMode, + handleIdsInMemory, handleFilterValuesInMemory, - resolveView, applyViewToParams, buildGroupLayout, effective_group_by_levels, + resolveView, applyViewToParams, buildGroupLayout, effective_group_by_levels, paginateInMemory, ) import modules.interfaces.interfaceDbApp as _appIface from modules.datamodels.datamodelPagination import AppliedViewMeta @@ -711,6 +713,10 @@ def get_files( featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None ) appInterface = _appIface.getInterface(currentUser) + owner_mode = (owner or "me").strip().lower() + if owner_mode not in ("all", "me", "shared"): + raise HTTPException(status_code=400, detail="owner must be 'all', 'me', or 'shared'") + current_user_id = str(getattr(currentUser, "id", "") or "") # Resolve view and merge config into params viewKey = paginationParams.viewKey if paginationParams else None @@ -722,6 +728,17 @@ def get_files( def _filesToDicts(fileItems): return [f.model_dump() if hasattr(f, "model_dump") else (dict(f) if not isinstance(f, dict) else f) for f in fileItems] + def _apply_owner_filter(item_dicts): + if owner_mode == "all": + return item_dicts + if owner_mode == "me": + return [item for item in item_dicts if str(item.get("sysCreatedBy") or "") == current_user_id] + return [item for item in item_dicts if str(item.get("sysCreatedBy") or "") != current_user_id] + + recordFilter = None + if owner_mode == "me": + recordFilter = {"sysCreatedBy": managementInterface.userId} + if mode == "groupSummary": if not pagination: raise HTTPException(status_code=400, detail="pagination required for groupSummary") @@ -736,11 +753,12 @@ def get_files( ) field = groupByLevels[0]["field"] null_label = str(groupByLevels[0].get("nullLabel") or "—") - allFiles = managementInterface.getAllFiles() + allFiles = managementInterface.getAllFiles(recordFilter=recordFilter) allItems = enrichRowsWithFkLabels( _filesToDicts(allFiles if isinstance(allFiles, list) else (allFiles.items if hasattr(allFiles, "items") else [])), FileItem, ) + allItems = _apply_owner_filter(allItems) filtered = apply_strategy_b_filters_and_sort(allItems, paginationParams, currentUser) groups_out = build_group_summary_groups(filtered, field, null_label, groupByLevels=groupByLevels) return JSONResponse(content={"groups": groups_out}) @@ -748,48 +766,35 @@ def get_files( if mode == "filterValues": if not column: raise HTTPException(status_code=400, detail="column parameter required for mode=filterValues") - allFiles = managementInterface.getAllFiles() + allFiles = managementInterface.getAllFiles(recordFilter=recordFilter) items = allFiles if isinstance(allFiles, list) else (allFiles.items if hasattr(allFiles, "items") else []) itemDicts = _filesToDicts(items) + itemDicts = _apply_owner_filter(itemDicts) enrichRowsWithFkLabels(itemDicts, FileItem) return handleFilterValuesInMemory(itemDicts, column, pagination) if mode == "ids": - recordFilter = {"sysCreatedBy": managementInterface.userId} - return handleIdsMode(managementInterface.db, FileItem, pagination, recordFilter) + if owner_mode == "me": + return handleIdsMode(managementInterface.db, FileItem, pagination, recordFilter) + allFiles = managementInterface.getAllFiles(recordFilter=recordFilter) + items = allFiles if isinstance(allFiles, list) else (allFiles.items if hasattr(allFiles, "items") else []) + itemDicts = _apply_owner_filter(_filesToDicts(items)) + enrichRowsWithFkLabels(itemDicts, FileItem) + return handleIdsInMemory(itemDicts, pagination) - if not groupByLevels: - # No grouping: let DB handle pagination directly (fastest path) - result = managementInterface.getAllFiles(pagination=paginationParams) - if paginationParams and hasattr(result, 'items'): - enriched = enrichRowsWithFkLabels(_filesToDicts(result.items), FileItem) - resp: dict = { - "items": enriched, - "pagination": PaginationMetadata( - currentPage=paginationParams.page, - pageSize=paginationParams.pageSize, - totalItems=result.totalItems, - totalPages=result.totalPages, - sort=paginationParams.sort, - filters=paginationParams.filters - ).model_dump(), - } - else: - items = result if isinstance(result, list) else (result.items if hasattr(result, "items") else [result]) - resp = {"items": enrichRowsWithFkLabels(_filesToDicts(items), FileItem), "pagination": None} - if viewMeta: - resp["appliedView"] = viewMeta.model_dump() - return resp - - # Strategy B grouping: load full list, group, then slice - allFiles = managementInterface.getAllFiles() + # Strategy B: load visible list first, then filter/sort/paginate in memory. + # This is required for files because internal workflow artefacts are + # suppressed after record loading; SQL-level COUNT/LIMIT would otherwise + # count hidden rows and produce pages with only a handful of visible items. + allFiles = managementInterface.getAllFiles(recordFilter=recordFilter) allItems = enrichRowsWithFkLabels( _filesToDicts(allFiles if isinstance(allFiles, list) else (allFiles.items if hasattr(allFiles, "items") else [])), FileItem, ) + allItems = _apply_owner_filter(allItems) from modules.routes.routeHelpers import apply_strategy_b_filters_and_sort - if paginationParams.filters or paginationParams.sort: + if paginationParams and (paginationParams.filters or paginationParams.sort): allItems = apply_strategy_b_filters_and_sort(allItems, paginationParams, currentUser) if not paginationParams: @@ -798,6 +803,24 @@ def get_files( resp["appliedView"] = viewMeta.model_dump() return resp + if not groupByLevels: + page_items, totalItems = paginateInMemory(allItems, paginationParams) + totalPages = math.ceil(totalItems / paginationParams.pageSize) if totalItems > 0 else 0 + resp = { + "items": page_items, + "pagination": PaginationMetadata( + currentPage=paginationParams.page, + pageSize=paginationParams.pageSize, + totalItems=totalItems, + totalPages=totalPages, + sort=paginationParams.sort, + filters=paginationParams.filters + ).model_dump(), + } + if viewMeta: + resp["appliedView"] = viewMeta.model_dump() + return resp + totalItems = len(allItems) totalPages = math.ceil(totalItems / paginationParams.pageSize) if totalItems > 0 else 0 page_items, groupLayout = buildGroupLayout(allItems, groupByLevels, paginationParams.page, paginationParams.pageSize) diff --git a/modules/workflows/methods/methodContext/actions/extractContent.py b/modules/workflows/methods/methodContext/actions/extractContent.py index 52d07b34..f6674124 100644 --- a/modules/workflows/methods/methodContext/actions/extractContent.py +++ b/modules/workflows/methods/methodContext/actions/extractContent.py @@ -24,7 +24,7 @@ import time from typing import Any, Dict, List, Optional, Tuple from modules.datamodels.datamodelChat import ActionResult -from modules.datamodels.datamodelDocref import coerceDocumentReferenceList +from modules.datamodels.datamodelDocref import DocumentReferenceList, coerceDocumentReferenceList from modules.datamodels.datamodelExtraction import ContentExtracted, ExtractionOptions logger = logging.getLogger(__name__) @@ -1751,6 +1751,62 @@ def presentation_envelopes_to_document_json( } +def _document_list_from_context(raw: Any, *, _depth: int = 0) -> DocumentReferenceList: + """Best-effort extraction of document/file references from ``context`` payloads. + + Supports direct DocumentList-like values plus nested shapes commonly produced + by DataPicker selections, ActionResult wrappers, and file/files containers. + """ + if _depth > 6 or raw is None or raw == "": + return DocumentReferenceList(references=[]) + + if isinstance(raw, dict) and "fileId" in raw and "id" not in raw and "documentId" not in raw: + direct = coerceDocumentReferenceList({ + "id": raw.get("fileId"), + "name": raw.get("fileName") or raw.get("name"), + }) + else: + direct = coerceDocumentReferenceList(raw) + if direct.references: + return direct + + collected = [] + + def _extend_from(value: Any) -> None: + nested = _document_list_from_context(value, _depth=_depth + 1) + if nested.references: + collected.extend(nested.references) + + if isinstance(raw, dict): + nested_files = raw.get("files") + if isinstance(nested_files, dict): + _extend_from(list(nested_files.values())) + for key in ("documents", "references", "items", "file", "document", "value", "data", "merged", "result", "context"): + nested = raw.get(key) + if nested is None or nested is raw: + continue + _extend_from(nested) + elif isinstance(raw, list): + for item in raw: + _extend_from(item) + + if not collected: + return DocumentReferenceList(references=[]) + + deduped = [] + seen = set() + for ref in collected: + try: + key = ref.to_string() + except Exception: + key = repr(ref) + if key in seen: + continue + seen.add(key) + deduped.append(ref) + return DocumentReferenceList(references=deduped) + + async def extractContent(self, parameters: Dict[str, Any]) -> ActionResult: operation_id = None try: @@ -1758,18 +1814,24 @@ async def extractContent(self, parameters: Dict[str, Any]) -> ActionResult: operation_id = f"context_extract_{wf}_{int(time.time())}" document_list_param = parameters.get("documentList") - if not document_list_param: - return ActionResult.isFailure(error="documentList is required") - - dl = coerceDocumentReferenceList(document_list_param) + if document_list_param: + dl = coerceDocumentReferenceList(document_list_param) + source = "documentList" + else: + context_param = parameters.get("context") + dl = _document_list_from_context(context_param) + source = "context" if not dl.references: return ActionResult.isFailure( error=( - f"documentList could not be parsed (type={type(document_list_param).__name__}); " - "expected DocumentReferenceList, list of strings/dicts, or " - "a wrapper dict like {'documents': [...]}" + f"{source} could not be parsed into document references " + f"(type={type((document_list_param if document_list_param else parameters.get('context'))).__name__}); " + "expected DocumentReferenceList, list of string/dict refs, " + "or a context payload containing file/document refs under keys like " + "{documents, files, file, data, value}." ), ) + logger.info("extractContent resolved %d document reference(s) from %s", len(dl.references), source) parent_operation_id = parameters.get("parentOperationId") self.services.chat.progressLogStart( diff --git a/modules/workflows/methods/methodContext/methodContext.py b/modules/workflows/methods/methodContext/methodContext.py index 80e0c089..43a97f66 100644 --- a/modules/workflows/methods/methodContext/methodContext.py +++ b/modules/workflows/methods/methodContext/methodContext.py @@ -68,8 +68,8 @@ class MethodContext(MethodBase): name="documentList", type="DocumentList", frontendType=FrontendType.DOCUMENT_REFERENCE, - required=True, - description="Document reference(s) to extract content from", + required=False, + description="Optional document reference(s) to extract content from. When omitted, extractContent also accepts refs via context.", ), "contentFilter": WorkflowActionParameter( name="contentFilter",