fix: node inhalt extrahieren kann jetzt context nutzen, file page formgenerator und foldertree zeigen gleiche dateien (filter prozess konsolidiert und vereinheitlicht)

This commit is contained in:
Ida 2026-05-26 11:51:44 +02:00
parent c097b28b6c
commit 4d1a579dbd
6 changed files with 177 additions and 44 deletions

View file

@ -0,0 +1,20 @@
{
"partIndex": 1,
"partId": "9c6e7733-adfb-412b-ab8c-2131b901445a",
"typeGroup": "text",
"mimeType": "text/plain",
"label": "main",
"dataLength": 1010,
"metadata": {
"size": 1010,
"documentId": "ddaa7167-dd3c-49c1-86b0-5e56af5efd86",
"documentMimeType": "text/plain",
"originalFileName": "master_keys.txt",
"contentFormat": "extracted",
"intent": "extract",
"extractionPrompt": "Extract all content from the document",
"usageHint": "Use extracted content from master_keys.txt",
"sourceAction": "extraction.extractContent"
},
"data": "# PowerOn Master Keys\r\n# Generated on: 1758489953.7492533\r\n# WARNING: Keep this file secure and never commit to version control!\r\n\r\nprod = hb7qNiURT1GHWbjOhVJYsp21MW5YRLI9SNRAhqgtPB0=\r\nint = 9JZ201a7UMeBNsCHJfs1GdgdPoou0zrLYrcNBzvDmv0=\r\ndev = 0tfyQI59HEETi6F0bMyInVSmI0XCMWMGcO7WnZ5i988=\r\n\r\nPoweron is the 1. agent in my life\r\n\r\nFugiKap%77\r\n\r\nAPI key anthropic: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\r\n\r\nF^065319580883at\r\n\r\nevent:The 1st Poweron Event\r\n\r\n\r\nPAT GitHub: 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\r\n\r\n-----BEGIN OPENSSH PRIVATE KEY-----\r\nb3BlbnNzaC1rZXktdjEAAAAABG5vbmUAAAAEbm9uZQAAAAAAAAABAAAAMwAAAAtzc2gtZW\r\nQyNTUxOQAAACDofCk2DPsKSUrH1XhypZYpJTnkRNj0ju7yeAu9JCdPlgAAAKCrNeHXqzXh\r\n1wAAAAtzc2gtZWQyNTUxOQAAACDofCk2DPsKSUrH1XhypZYpJTnkRNj0ju7yeAu9JCdPlg\r\nAAAEC6+y04Tm13npqKvd6EH5YL6xhp0RODCSv9I02tglNaoOh8KTYM+wpJSsfVeHKllikl\r\nOeRE2PSO7vJ4C70kJ0+WAAAAFnlvdXJfZW1haWxAZXhhbXBsZS5jb20BAgMEBQYH\r\n-----END OPENSSH PRIVATE KEY-----\r\n\r\n"
}

View file

@ -0,0 +1,25 @@
{
"documentName": "master_keys.txt",
"documentMimeType": "text/plain",
"partsCount": 1,
"parts": [
{
"typeGroup": "text",
"mimeType": "text/plain",
"label": "main",
"dataLength": 1010,
"metadata": {
"size": 1010,
"documentId": "ddaa7167-dd3c-49c1-86b0-5e56af5efd86",
"documentMimeType": "text/plain",
"originalFileName": "master_keys.txt",
"contentFormat": "extracted",
"intent": "extract",
"extractionPrompt": "Extract all content from the document",
"usageHint": "Use extracted content from master_keys.txt",
"sourceAction": "extraction.extractContent"
},
"dataPreview": "[Large data: 1010 chars - truncated]"
}
]
}

View file

@ -4,6 +4,9 @@
from modules.shared.i18nRegistry import t
from modules.features.graphicalEditor.nodeDefinitions.contextPickerHelp import (
CONTEXT_BUILDER_PARAM_DESCRIPTION,
)
from modules.features.graphicalEditor.nodeDefinitions.flow import (
CONTEXT_ENVELOPE_DATA_PICK_OPTIONS,
CONTEXT_MERGE_ACTION_RESULT_DATA_PICK_OPTIONS,
@ -37,9 +40,9 @@ CONTEXT_NODES = [
),
"injectRunContext": True,
"parameters": [
{"name": "documentList", "type": "str", "required": True, "frontendType": "hidden",
"description": t("Dokumentenliste (via Wire oder DataRef)"), "default": "",
"graphInherit": {"port": 0, "kind": "documentListWire"}},
{"name": "context", "type": "Any", "required": False, "frontendType": "contextBuilder",
"description": CONTEXT_BUILDER_PARAM_DESCRIPTION, "default": "",
"graphInherit": {"port": 0, "kind": "primaryTextRef"}},
{
"name": "contentFilter",
"type": "str",

View file

@ -668,6 +668,7 @@ def get_files(
pagination: Optional[str] = Query(None, description="JSON-encoded PaginationParams object"),
mode: Optional[str] = Query(None, description="'filterValues' for distinct column values, 'ids' for all filtered IDs"),
column: Optional[str] = Query(None, description="Column key (required when mode=filterValues)"),
owner: str = Query("me", description="'all' | 'me' | 'shared'"),
currentUser: User = Depends(getCurrentUser),
context: RequestContext = Depends(getRequestContext)
):
@ -699,8 +700,9 @@ def get_files(
from modules.routes.routeHelpers import (
handleIdsMode,
handleIdsInMemory,
handleFilterValuesInMemory,
resolveView, applyViewToParams, buildGroupLayout, effective_group_by_levels,
resolveView, applyViewToParams, buildGroupLayout, effective_group_by_levels, paginateInMemory,
)
import modules.interfaces.interfaceDbApp as _appIface
from modules.datamodels.datamodelPagination import AppliedViewMeta
@ -711,6 +713,10 @@ def get_files(
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None
)
appInterface = _appIface.getInterface(currentUser)
owner_mode = (owner or "me").strip().lower()
if owner_mode not in ("all", "me", "shared"):
raise HTTPException(status_code=400, detail="owner must be 'all', 'me', or 'shared'")
current_user_id = str(getattr(currentUser, "id", "") or "")
# Resolve view and merge config into params
viewKey = paginationParams.viewKey if paginationParams else None
@ -722,6 +728,17 @@ def get_files(
def _filesToDicts(fileItems):
return [f.model_dump() if hasattr(f, "model_dump") else (dict(f) if not isinstance(f, dict) else f) for f in fileItems]
def _apply_owner_filter(item_dicts):
if owner_mode == "all":
return item_dicts
if owner_mode == "me":
return [item for item in item_dicts if str(item.get("sysCreatedBy") or "") == current_user_id]
return [item for item in item_dicts if str(item.get("sysCreatedBy") or "") != current_user_id]
recordFilter = None
if owner_mode == "me":
recordFilter = {"sysCreatedBy": managementInterface.userId}
if mode == "groupSummary":
if not pagination:
raise HTTPException(status_code=400, detail="pagination required for groupSummary")
@ -736,11 +753,12 @@ def get_files(
)
field = groupByLevels[0]["field"]
null_label = str(groupByLevels[0].get("nullLabel") or "")
allFiles = managementInterface.getAllFiles()
allFiles = managementInterface.getAllFiles(recordFilter=recordFilter)
allItems = enrichRowsWithFkLabels(
_filesToDicts(allFiles if isinstance(allFiles, list) else (allFiles.items if hasattr(allFiles, "items") else [])),
FileItem,
)
allItems = _apply_owner_filter(allItems)
filtered = apply_strategy_b_filters_and_sort(allItems, paginationParams, currentUser)
groups_out = build_group_summary_groups(filtered, field, null_label, groupByLevels=groupByLevels)
return JSONResponse(content={"groups": groups_out})
@ -748,48 +766,35 @@ def get_files(
if mode == "filterValues":
if not column:
raise HTTPException(status_code=400, detail="column parameter required for mode=filterValues")
allFiles = managementInterface.getAllFiles()
allFiles = managementInterface.getAllFiles(recordFilter=recordFilter)
items = allFiles if isinstance(allFiles, list) else (allFiles.items if hasattr(allFiles, "items") else [])
itemDicts = _filesToDicts(items)
itemDicts = _apply_owner_filter(itemDicts)
enrichRowsWithFkLabels(itemDicts, FileItem)
return handleFilterValuesInMemory(itemDicts, column, pagination)
if mode == "ids":
recordFilter = {"sysCreatedBy": managementInterface.userId}
return handleIdsMode(managementInterface.db, FileItem, pagination, recordFilter)
if owner_mode == "me":
return handleIdsMode(managementInterface.db, FileItem, pagination, recordFilter)
allFiles = managementInterface.getAllFiles(recordFilter=recordFilter)
items = allFiles if isinstance(allFiles, list) else (allFiles.items if hasattr(allFiles, "items") else [])
itemDicts = _apply_owner_filter(_filesToDicts(items))
enrichRowsWithFkLabels(itemDicts, FileItem)
return handleIdsInMemory(itemDicts, pagination)
if not groupByLevels:
# No grouping: let DB handle pagination directly (fastest path)
result = managementInterface.getAllFiles(pagination=paginationParams)
if paginationParams and hasattr(result, 'items'):
enriched = enrichRowsWithFkLabels(_filesToDicts(result.items), FileItem)
resp: dict = {
"items": enriched,
"pagination": PaginationMetadata(
currentPage=paginationParams.page,
pageSize=paginationParams.pageSize,
totalItems=result.totalItems,
totalPages=result.totalPages,
sort=paginationParams.sort,
filters=paginationParams.filters
).model_dump(),
}
else:
items = result if isinstance(result, list) else (result.items if hasattr(result, "items") else [result])
resp = {"items": enrichRowsWithFkLabels(_filesToDicts(items), FileItem), "pagination": None}
if viewMeta:
resp["appliedView"] = viewMeta.model_dump()
return resp
# Strategy B grouping: load full list, group, then slice
allFiles = managementInterface.getAllFiles()
# Strategy B: load visible list first, then filter/sort/paginate in memory.
# This is required for files because internal workflow artefacts are
# suppressed after record loading; SQL-level COUNT/LIMIT would otherwise
# count hidden rows and produce pages with only a handful of visible items.
allFiles = managementInterface.getAllFiles(recordFilter=recordFilter)
allItems = enrichRowsWithFkLabels(
_filesToDicts(allFiles if isinstance(allFiles, list) else (allFiles.items if hasattr(allFiles, "items") else [])),
FileItem,
)
allItems = _apply_owner_filter(allItems)
from modules.routes.routeHelpers import apply_strategy_b_filters_and_sort
if paginationParams.filters or paginationParams.sort:
if paginationParams and (paginationParams.filters or paginationParams.sort):
allItems = apply_strategy_b_filters_and_sort(allItems, paginationParams, currentUser)
if not paginationParams:
@ -798,6 +803,24 @@ def get_files(
resp["appliedView"] = viewMeta.model_dump()
return resp
if not groupByLevels:
page_items, totalItems = paginateInMemory(allItems, paginationParams)
totalPages = math.ceil(totalItems / paginationParams.pageSize) if totalItems > 0 else 0
resp = {
"items": page_items,
"pagination": PaginationMetadata(
currentPage=paginationParams.page,
pageSize=paginationParams.pageSize,
totalItems=totalItems,
totalPages=totalPages,
sort=paginationParams.sort,
filters=paginationParams.filters
).model_dump(),
}
if viewMeta:
resp["appliedView"] = viewMeta.model_dump()
return resp
totalItems = len(allItems)
totalPages = math.ceil(totalItems / paginationParams.pageSize) if totalItems > 0 else 0
page_items, groupLayout = buildGroupLayout(allItems, groupByLevels, paginationParams.page, paginationParams.pageSize)

View file

@ -24,7 +24,7 @@ import time
from typing import Any, Dict, List, Optional, Tuple
from modules.datamodels.datamodelChat import ActionResult
from modules.datamodels.datamodelDocref import coerceDocumentReferenceList
from modules.datamodels.datamodelDocref import DocumentReferenceList, coerceDocumentReferenceList
from modules.datamodels.datamodelExtraction import ContentExtracted, ExtractionOptions
logger = logging.getLogger(__name__)
@ -1751,6 +1751,62 @@ def presentation_envelopes_to_document_json(
}
def _document_list_from_context(raw: Any, *, _depth: int = 0) -> DocumentReferenceList:
"""Best-effort extraction of document/file references from ``context`` payloads.
Supports direct DocumentList-like values plus nested shapes commonly produced
by DataPicker selections, ActionResult wrappers, and file/files containers.
"""
if _depth > 6 or raw is None or raw == "":
return DocumentReferenceList(references=[])
if isinstance(raw, dict) and "fileId" in raw and "id" not in raw and "documentId" not in raw:
direct = coerceDocumentReferenceList({
"id": raw.get("fileId"),
"name": raw.get("fileName") or raw.get("name"),
})
else:
direct = coerceDocumentReferenceList(raw)
if direct.references:
return direct
collected = []
def _extend_from(value: Any) -> None:
nested = _document_list_from_context(value, _depth=_depth + 1)
if nested.references:
collected.extend(nested.references)
if isinstance(raw, dict):
nested_files = raw.get("files")
if isinstance(nested_files, dict):
_extend_from(list(nested_files.values()))
for key in ("documents", "references", "items", "file", "document", "value", "data", "merged", "result", "context"):
nested = raw.get(key)
if nested is None or nested is raw:
continue
_extend_from(nested)
elif isinstance(raw, list):
for item in raw:
_extend_from(item)
if not collected:
return DocumentReferenceList(references=[])
deduped = []
seen = set()
for ref in collected:
try:
key = ref.to_string()
except Exception:
key = repr(ref)
if key in seen:
continue
seen.add(key)
deduped.append(ref)
return DocumentReferenceList(references=deduped)
async def extractContent(self, parameters: Dict[str, Any]) -> ActionResult:
operation_id = None
try:
@ -1758,18 +1814,24 @@ async def extractContent(self, parameters: Dict[str, Any]) -> ActionResult:
operation_id = f"context_extract_{wf}_{int(time.time())}"
document_list_param = parameters.get("documentList")
if not document_list_param:
return ActionResult.isFailure(error="documentList is required")
dl = coerceDocumentReferenceList(document_list_param)
if document_list_param:
dl = coerceDocumentReferenceList(document_list_param)
source = "documentList"
else:
context_param = parameters.get("context")
dl = _document_list_from_context(context_param)
source = "context"
if not dl.references:
return ActionResult.isFailure(
error=(
f"documentList could not be parsed (type={type(document_list_param).__name__}); "
"expected DocumentReferenceList, list of strings/dicts, or "
"a wrapper dict like {'documents': [...]}"
f"{source} could not be parsed into document references "
f"(type={type((document_list_param if document_list_param else parameters.get('context'))).__name__}); "
"expected DocumentReferenceList, list of string/dict refs, "
"or a context payload containing file/document refs under keys like "
"{documents, files, file, data, value}."
),
)
logger.info("extractContent resolved %d document reference(s) from %s", len(dl.references), source)
parent_operation_id = parameters.get("parentOperationId")
self.services.chat.progressLogStart(

View file

@ -68,8 +68,8 @@ class MethodContext(MethodBase):
name="documentList",
type="DocumentList",
frontendType=FrontendType.DOCUMENT_REFERENCE,
required=True,
description="Document reference(s) to extract content from",
required=False,
description="Optional document reference(s) to extract content from. When omitted, extractContent also accepts refs via context.",
),
"contentFilter": WorkflowActionParameter(
name="contentFilter",