fix: node inhalt extrahieren kann jetzt context nutzen, file page formgenerator und foldertree zeigen gleiche dateien (filter prozess konsolidiert und vereinheitlicht)

This commit is contained in:
Ida 2026-05-26 11:51:44 +02:00
parent c097b28b6c
commit 4d1a579dbd
6 changed files with 177 additions and 44 deletions

View file

@ -0,0 +1,20 @@
{
"partIndex": 1,
"partId": "9c6e7733-adfb-412b-ab8c-2131b901445a",
"typeGroup": "text",
"mimeType": "text/plain",
"label": "main",
"dataLength": 1010,
"metadata": {
"size": 1010,
"documentId": "ddaa7167-dd3c-49c1-86b0-5e56af5efd86",
"documentMimeType": "text/plain",
"originalFileName": "master_keys.txt",
"contentFormat": "extracted",
"intent": "extract",
"extractionPrompt": "Extract all content from the document",
"usageHint": "Use extracted content from master_keys.txt",
"sourceAction": "extraction.extractContent"
},
"data": "# PowerOn Master Keys\r\n# Generated on: 1758489953.7492533\r\n# WARNING: Keep this file secure and never commit to version control!\r\n\r\nprod = hb7qNiURT1GHWbjOhVJYsp21MW5YRLI9SNRAhqgtPB0=\r\nint = 9JZ201a7UMeBNsCHJfs1GdgdPoou0zrLYrcNBzvDmv0=\r\ndev = 0tfyQI59HEETi6F0bMyInVSmI0XCMWMGcO7WnZ5i988=\r\n\r\nPoweron is the 1. agent in my life\r\n\r\nFugiKap%77\r\n\r\nAPI key anthropic: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\r\n\r\nF^065319580883at\r\n\r\nevent:The 1st Poweron Event\r\n\r\n\r\nPAT GitHub: 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\r\n\r\n-----BEGIN OPENSSH PRIVATE KEY-----\r\nb3BlbnNzaC1rZXktdjEAAAAABG5vbmUAAAAEbm9uZQAAAAAAAAABAAAAMwAAAAtzc2gtZW\r\nQyNTUxOQAAACDofCk2DPsKSUrH1XhypZYpJTnkRNj0ju7yeAu9JCdPlgAAAKCrNeHXqzXh\r\n1wAAAAtzc2gtZWQyNTUxOQAAACDofCk2DPsKSUrH1XhypZYpJTnkRNj0ju7yeAu9JCdPlg\r\nAAAEC6+y04Tm13npqKvd6EH5YL6xhp0RODCSv9I02tglNaoOh8KTYM+wpJSsfVeHKllikl\r\nOeRE2PSO7vJ4C70kJ0+WAAAAFnlvdXJfZW1haWxAZXhhbXBsZS5jb20BAgMEBQYH\r\n-----END OPENSSH PRIVATE KEY-----\r\n\r\n"
}

View file

@ -0,0 +1,25 @@
{
"documentName": "master_keys.txt",
"documentMimeType": "text/plain",
"partsCount": 1,
"parts": [
{
"typeGroup": "text",
"mimeType": "text/plain",
"label": "main",
"dataLength": 1010,
"metadata": {
"size": 1010,
"documentId": "ddaa7167-dd3c-49c1-86b0-5e56af5efd86",
"documentMimeType": "text/plain",
"originalFileName": "master_keys.txt",
"contentFormat": "extracted",
"intent": "extract",
"extractionPrompt": "Extract all content from the document",
"usageHint": "Use extracted content from master_keys.txt",
"sourceAction": "extraction.extractContent"
},
"dataPreview": "[Large data: 1010 chars - truncated]"
}
]
}

View file

@ -4,6 +4,9 @@
from modules.shared.i18nRegistry import t from modules.shared.i18nRegistry import t
from modules.features.graphicalEditor.nodeDefinitions.contextPickerHelp import (
CONTEXT_BUILDER_PARAM_DESCRIPTION,
)
from modules.features.graphicalEditor.nodeDefinitions.flow import ( from modules.features.graphicalEditor.nodeDefinitions.flow import (
CONTEXT_ENVELOPE_DATA_PICK_OPTIONS, CONTEXT_ENVELOPE_DATA_PICK_OPTIONS,
CONTEXT_MERGE_ACTION_RESULT_DATA_PICK_OPTIONS, CONTEXT_MERGE_ACTION_RESULT_DATA_PICK_OPTIONS,
@ -37,9 +40,9 @@ CONTEXT_NODES = [
), ),
"injectRunContext": True, "injectRunContext": True,
"parameters": [ "parameters": [
{"name": "documentList", "type": "str", "required": True, "frontendType": "hidden", {"name": "context", "type": "Any", "required": False, "frontendType": "contextBuilder",
"description": t("Dokumentenliste (via Wire oder DataRef)"), "default": "", "description": CONTEXT_BUILDER_PARAM_DESCRIPTION, "default": "",
"graphInherit": {"port": 0, "kind": "documentListWire"}}, "graphInherit": {"port": 0, "kind": "primaryTextRef"}},
{ {
"name": "contentFilter", "name": "contentFilter",
"type": "str", "type": "str",

View file

@ -668,6 +668,7 @@ def get_files(
pagination: Optional[str] = Query(None, description="JSON-encoded PaginationParams object"), pagination: Optional[str] = Query(None, description="JSON-encoded PaginationParams object"),
mode: Optional[str] = Query(None, description="'filterValues' for distinct column values, 'ids' for all filtered IDs"), mode: Optional[str] = Query(None, description="'filterValues' for distinct column values, 'ids' for all filtered IDs"),
column: Optional[str] = Query(None, description="Column key (required when mode=filterValues)"), column: Optional[str] = Query(None, description="Column key (required when mode=filterValues)"),
owner: str = Query("me", description="'all' | 'me' | 'shared'"),
currentUser: User = Depends(getCurrentUser), currentUser: User = Depends(getCurrentUser),
context: RequestContext = Depends(getRequestContext) context: RequestContext = Depends(getRequestContext)
): ):
@ -699,8 +700,9 @@ def get_files(
from modules.routes.routeHelpers import ( from modules.routes.routeHelpers import (
handleIdsMode, handleIdsMode,
handleIdsInMemory,
handleFilterValuesInMemory, handleFilterValuesInMemory,
resolveView, applyViewToParams, buildGroupLayout, effective_group_by_levels, resolveView, applyViewToParams, buildGroupLayout, effective_group_by_levels, paginateInMemory,
) )
import modules.interfaces.interfaceDbApp as _appIface import modules.interfaces.interfaceDbApp as _appIface
from modules.datamodels.datamodelPagination import AppliedViewMeta from modules.datamodels.datamodelPagination import AppliedViewMeta
@ -711,6 +713,10 @@ def get_files(
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None
) )
appInterface = _appIface.getInterface(currentUser) appInterface = _appIface.getInterface(currentUser)
owner_mode = (owner or "me").strip().lower()
if owner_mode not in ("all", "me", "shared"):
raise HTTPException(status_code=400, detail="owner must be 'all', 'me', or 'shared'")
current_user_id = str(getattr(currentUser, "id", "") or "")
# Resolve view and merge config into params # Resolve view and merge config into params
viewKey = paginationParams.viewKey if paginationParams else None viewKey = paginationParams.viewKey if paginationParams else None
@ -722,6 +728,17 @@ def get_files(
def _filesToDicts(fileItems): def _filesToDicts(fileItems):
return [f.model_dump() if hasattr(f, "model_dump") else (dict(f) if not isinstance(f, dict) else f) for f in fileItems] return [f.model_dump() if hasattr(f, "model_dump") else (dict(f) if not isinstance(f, dict) else f) for f in fileItems]
def _apply_owner_filter(item_dicts):
if owner_mode == "all":
return item_dicts
if owner_mode == "me":
return [item for item in item_dicts if str(item.get("sysCreatedBy") or "") == current_user_id]
return [item for item in item_dicts if str(item.get("sysCreatedBy") or "") != current_user_id]
recordFilter = None
if owner_mode == "me":
recordFilter = {"sysCreatedBy": managementInterface.userId}
if mode == "groupSummary": if mode == "groupSummary":
if not pagination: if not pagination:
raise HTTPException(status_code=400, detail="pagination required for groupSummary") raise HTTPException(status_code=400, detail="pagination required for groupSummary")
@ -736,11 +753,12 @@ def get_files(
) )
field = groupByLevels[0]["field"] field = groupByLevels[0]["field"]
null_label = str(groupByLevels[0].get("nullLabel") or "") null_label = str(groupByLevels[0].get("nullLabel") or "")
allFiles = managementInterface.getAllFiles() allFiles = managementInterface.getAllFiles(recordFilter=recordFilter)
allItems = enrichRowsWithFkLabels( allItems = enrichRowsWithFkLabels(
_filesToDicts(allFiles if isinstance(allFiles, list) else (allFiles.items if hasattr(allFiles, "items") else [])), _filesToDicts(allFiles if isinstance(allFiles, list) else (allFiles.items if hasattr(allFiles, "items") else [])),
FileItem, FileItem,
) )
allItems = _apply_owner_filter(allItems)
filtered = apply_strategy_b_filters_and_sort(allItems, paginationParams, currentUser) filtered = apply_strategy_b_filters_and_sort(allItems, paginationParams, currentUser)
groups_out = build_group_summary_groups(filtered, field, null_label, groupByLevels=groupByLevels) groups_out = build_group_summary_groups(filtered, field, null_label, groupByLevels=groupByLevels)
return JSONResponse(content={"groups": groups_out}) return JSONResponse(content={"groups": groups_out})
@ -748,48 +766,35 @@ def get_files(
if mode == "filterValues": if mode == "filterValues":
if not column: if not column:
raise HTTPException(status_code=400, detail="column parameter required for mode=filterValues") raise HTTPException(status_code=400, detail="column parameter required for mode=filterValues")
allFiles = managementInterface.getAllFiles() allFiles = managementInterface.getAllFiles(recordFilter=recordFilter)
items = allFiles if isinstance(allFiles, list) else (allFiles.items if hasattr(allFiles, "items") else []) items = allFiles if isinstance(allFiles, list) else (allFiles.items if hasattr(allFiles, "items") else [])
itemDicts = _filesToDicts(items) itemDicts = _filesToDicts(items)
itemDicts = _apply_owner_filter(itemDicts)
enrichRowsWithFkLabels(itemDicts, FileItem) enrichRowsWithFkLabels(itemDicts, FileItem)
return handleFilterValuesInMemory(itemDicts, column, pagination) return handleFilterValuesInMemory(itemDicts, column, pagination)
if mode == "ids": if mode == "ids":
recordFilter = {"sysCreatedBy": managementInterface.userId} if owner_mode == "me":
return handleIdsMode(managementInterface.db, FileItem, pagination, recordFilter) return handleIdsMode(managementInterface.db, FileItem, pagination, recordFilter)
allFiles = managementInterface.getAllFiles(recordFilter=recordFilter)
items = allFiles if isinstance(allFiles, list) else (allFiles.items if hasattr(allFiles, "items") else [])
itemDicts = _apply_owner_filter(_filesToDicts(items))
enrichRowsWithFkLabels(itemDicts, FileItem)
return handleIdsInMemory(itemDicts, pagination)
if not groupByLevels: # Strategy B: load visible list first, then filter/sort/paginate in memory.
# No grouping: let DB handle pagination directly (fastest path) # This is required for files because internal workflow artefacts are
result = managementInterface.getAllFiles(pagination=paginationParams) # suppressed after record loading; SQL-level COUNT/LIMIT would otherwise
if paginationParams and hasattr(result, 'items'): # count hidden rows and produce pages with only a handful of visible items.
enriched = enrichRowsWithFkLabels(_filesToDicts(result.items), FileItem) allFiles = managementInterface.getAllFiles(recordFilter=recordFilter)
resp: dict = {
"items": enriched,
"pagination": PaginationMetadata(
currentPage=paginationParams.page,
pageSize=paginationParams.pageSize,
totalItems=result.totalItems,
totalPages=result.totalPages,
sort=paginationParams.sort,
filters=paginationParams.filters
).model_dump(),
}
else:
items = result if isinstance(result, list) else (result.items if hasattr(result, "items") else [result])
resp = {"items": enrichRowsWithFkLabels(_filesToDicts(items), FileItem), "pagination": None}
if viewMeta:
resp["appliedView"] = viewMeta.model_dump()
return resp
# Strategy B grouping: load full list, group, then slice
allFiles = managementInterface.getAllFiles()
allItems = enrichRowsWithFkLabels( allItems = enrichRowsWithFkLabels(
_filesToDicts(allFiles if isinstance(allFiles, list) else (allFiles.items if hasattr(allFiles, "items") else [])), _filesToDicts(allFiles if isinstance(allFiles, list) else (allFiles.items if hasattr(allFiles, "items") else [])),
FileItem, FileItem,
) )
allItems = _apply_owner_filter(allItems)
from modules.routes.routeHelpers import apply_strategy_b_filters_and_sort from modules.routes.routeHelpers import apply_strategy_b_filters_and_sort
if paginationParams.filters or paginationParams.sort: if paginationParams and (paginationParams.filters or paginationParams.sort):
allItems = apply_strategy_b_filters_and_sort(allItems, paginationParams, currentUser) allItems = apply_strategy_b_filters_and_sort(allItems, paginationParams, currentUser)
if not paginationParams: if not paginationParams:
@ -798,6 +803,24 @@ def get_files(
resp["appliedView"] = viewMeta.model_dump() resp["appliedView"] = viewMeta.model_dump()
return resp return resp
if not groupByLevels:
page_items, totalItems = paginateInMemory(allItems, paginationParams)
totalPages = math.ceil(totalItems / paginationParams.pageSize) if totalItems > 0 else 0
resp = {
"items": page_items,
"pagination": PaginationMetadata(
currentPage=paginationParams.page,
pageSize=paginationParams.pageSize,
totalItems=totalItems,
totalPages=totalPages,
sort=paginationParams.sort,
filters=paginationParams.filters
).model_dump(),
}
if viewMeta:
resp["appliedView"] = viewMeta.model_dump()
return resp
totalItems = len(allItems) totalItems = len(allItems)
totalPages = math.ceil(totalItems / paginationParams.pageSize) if totalItems > 0 else 0 totalPages = math.ceil(totalItems / paginationParams.pageSize) if totalItems > 0 else 0
page_items, groupLayout = buildGroupLayout(allItems, groupByLevels, paginationParams.page, paginationParams.pageSize) page_items, groupLayout = buildGroupLayout(allItems, groupByLevels, paginationParams.page, paginationParams.pageSize)

View file

@ -24,7 +24,7 @@ import time
from typing import Any, Dict, List, Optional, Tuple from typing import Any, Dict, List, Optional, Tuple
from modules.datamodels.datamodelChat import ActionResult from modules.datamodels.datamodelChat import ActionResult
from modules.datamodels.datamodelDocref import coerceDocumentReferenceList from modules.datamodels.datamodelDocref import DocumentReferenceList, coerceDocumentReferenceList
from modules.datamodels.datamodelExtraction import ContentExtracted, ExtractionOptions from modules.datamodels.datamodelExtraction import ContentExtracted, ExtractionOptions
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -1751,6 +1751,62 @@ def presentation_envelopes_to_document_json(
} }
def _document_list_from_context(raw: Any, *, _depth: int = 0) -> DocumentReferenceList:
"""Best-effort extraction of document/file references from ``context`` payloads.
Supports direct DocumentList-like values plus nested shapes commonly produced
by DataPicker selections, ActionResult wrappers, and file/files containers.
"""
if _depth > 6 or raw is None or raw == "":
return DocumentReferenceList(references=[])
if isinstance(raw, dict) and "fileId" in raw and "id" not in raw and "documentId" not in raw:
direct = coerceDocumentReferenceList({
"id": raw.get("fileId"),
"name": raw.get("fileName") or raw.get("name"),
})
else:
direct = coerceDocumentReferenceList(raw)
if direct.references:
return direct
collected = []
def _extend_from(value: Any) -> None:
nested = _document_list_from_context(value, _depth=_depth + 1)
if nested.references:
collected.extend(nested.references)
if isinstance(raw, dict):
nested_files = raw.get("files")
if isinstance(nested_files, dict):
_extend_from(list(nested_files.values()))
for key in ("documents", "references", "items", "file", "document", "value", "data", "merged", "result", "context"):
nested = raw.get(key)
if nested is None or nested is raw:
continue
_extend_from(nested)
elif isinstance(raw, list):
for item in raw:
_extend_from(item)
if not collected:
return DocumentReferenceList(references=[])
deduped = []
seen = set()
for ref in collected:
try:
key = ref.to_string()
except Exception:
key = repr(ref)
if key in seen:
continue
seen.add(key)
deduped.append(ref)
return DocumentReferenceList(references=deduped)
async def extractContent(self, parameters: Dict[str, Any]) -> ActionResult: async def extractContent(self, parameters: Dict[str, Any]) -> ActionResult:
operation_id = None operation_id = None
try: try:
@ -1758,18 +1814,24 @@ async def extractContent(self, parameters: Dict[str, Any]) -> ActionResult:
operation_id = f"context_extract_{wf}_{int(time.time())}" operation_id = f"context_extract_{wf}_{int(time.time())}"
document_list_param = parameters.get("documentList") document_list_param = parameters.get("documentList")
if not document_list_param: if document_list_param:
return ActionResult.isFailure(error="documentList is required") dl = coerceDocumentReferenceList(document_list_param)
source = "documentList"
dl = coerceDocumentReferenceList(document_list_param) else:
context_param = parameters.get("context")
dl = _document_list_from_context(context_param)
source = "context"
if not dl.references: if not dl.references:
return ActionResult.isFailure( return ActionResult.isFailure(
error=( error=(
f"documentList could not be parsed (type={type(document_list_param).__name__}); " f"{source} could not be parsed into document references "
"expected DocumentReferenceList, list of strings/dicts, or " f"(type={type((document_list_param if document_list_param else parameters.get('context'))).__name__}); "
"a wrapper dict like {'documents': [...]}" "expected DocumentReferenceList, list of string/dict refs, "
"or a context payload containing file/document refs under keys like "
"{documents, files, file, data, value}."
), ),
) )
logger.info("extractContent resolved %d document reference(s) from %s", len(dl.references), source)
parent_operation_id = parameters.get("parentOperationId") parent_operation_id = parameters.get("parentOperationId")
self.services.chat.progressLogStart( self.services.chat.progressLogStart(

View file

@ -68,8 +68,8 @@ class MethodContext(MethodBase):
name="documentList", name="documentList",
type="DocumentList", type="DocumentList",
frontendType=FrontendType.DOCUMENT_REFERENCE, frontendType=FrontendType.DOCUMENT_REFERENCE,
required=True, required=False,
description="Document reference(s) to extract content from", description="Optional document reference(s) to extract content from. When omitted, extractContent also accepts refs via context.",
), ),
"contentFilter": WorkflowActionParameter( "contentFilter": WorkflowActionParameter(
name="contentFilter", name="contentFilter",