fix: node inhalt extrahieren kann jetzt context nutzen, file page formgenerator und foldertree zeigen gleiche dateien (filter prozess konsolidiert und vereinheitlicht)
This commit is contained in:
parent
c097b28b6c
commit
4d1a579dbd
6 changed files with 177 additions and 44 deletions
|
|
@ -0,0 +1,20 @@
|
|||
{
|
||||
"partIndex": 1,
|
||||
"partId": "9c6e7733-adfb-412b-ab8c-2131b901445a",
|
||||
"typeGroup": "text",
|
||||
"mimeType": "text/plain",
|
||||
"label": "main",
|
||||
"dataLength": 1010,
|
||||
"metadata": {
|
||||
"size": 1010,
|
||||
"documentId": "ddaa7167-dd3c-49c1-86b0-5e56af5efd86",
|
||||
"documentMimeType": "text/plain",
|
||||
"originalFileName": "master_keys.txt",
|
||||
"contentFormat": "extracted",
|
||||
"intent": "extract",
|
||||
"extractionPrompt": "Extract all content from the document",
|
||||
"usageHint": "Use extracted content from master_keys.txt",
|
||||
"sourceAction": "extraction.extractContent"
|
||||
},
|
||||
"data": "# PowerOn Master Keys\r\n# Generated on: 1758489953.7492533\r\n# WARNING: Keep this file secure and never commit to version control!\r\n\r\nprod = hb7qNiURT1GHWbjOhVJYsp21MW5YRLI9SNRAhqgtPB0=\r\nint = 9JZ201a7UMeBNsCHJfs1GdgdPoou0zrLYrcNBzvDmv0=\r\ndev = 0tfyQI59HEETi6F0bMyInVSmI0XCMWMGcO7WnZ5i988=\r\n\r\nPoweron is the 1. agent in my life\r\n\r\nFugiKap%77\r\n\r\nAPI key anthropic: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\r\n\r\nF^065319580883at\r\n\r\nevent:The 1st Poweron Event\r\n\r\n\r\nPAT GitHub: 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\r\n\r\n-----BEGIN OPENSSH PRIVATE KEY-----\r\nb3BlbnNzaC1rZXktdjEAAAAABG5vbmUAAAAEbm9uZQAAAAAAAAABAAAAMwAAAAtzc2gtZW\r\nQyNTUxOQAAACDofCk2DPsKSUrH1XhypZYpJTnkRNj0ju7yeAu9JCdPlgAAAKCrNeHXqzXh\r\n1wAAAAtzc2gtZWQyNTUxOQAAACDofCk2DPsKSUrH1XhypZYpJTnkRNj0ju7yeAu9JCdPlg\r\nAAAEC6+y04Tm13npqKvd6EH5YL6xhp0RODCSv9I02tglNaoOh8KTYM+wpJSsfVeHKllikl\r\nOeRE2PSO7vJ4C70kJ0+WAAAAFnlvdXJfZW1haWxAZXhhbXBsZS5jb20BAgMEBQYH\r\n-----END OPENSSH PRIVATE KEY-----\r\n\r\n"
|
||||
}
|
||||
|
|
@ -0,0 +1,25 @@
|
|||
{
|
||||
"documentName": "master_keys.txt",
|
||||
"documentMimeType": "text/plain",
|
||||
"partsCount": 1,
|
||||
"parts": [
|
||||
{
|
||||
"typeGroup": "text",
|
||||
"mimeType": "text/plain",
|
||||
"label": "main",
|
||||
"dataLength": 1010,
|
||||
"metadata": {
|
||||
"size": 1010,
|
||||
"documentId": "ddaa7167-dd3c-49c1-86b0-5e56af5efd86",
|
||||
"documentMimeType": "text/plain",
|
||||
"originalFileName": "master_keys.txt",
|
||||
"contentFormat": "extracted",
|
||||
"intent": "extract",
|
||||
"extractionPrompt": "Extract all content from the document",
|
||||
"usageHint": "Use extracted content from master_keys.txt",
|
||||
"sourceAction": "extraction.extractContent"
|
||||
},
|
||||
"dataPreview": "[Large data: 1010 chars - truncated]"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -4,6 +4,9 @@
|
|||
|
||||
from modules.shared.i18nRegistry import t
|
||||
|
||||
from modules.features.graphicalEditor.nodeDefinitions.contextPickerHelp import (
|
||||
CONTEXT_BUILDER_PARAM_DESCRIPTION,
|
||||
)
|
||||
from modules.features.graphicalEditor.nodeDefinitions.flow import (
|
||||
CONTEXT_ENVELOPE_DATA_PICK_OPTIONS,
|
||||
CONTEXT_MERGE_ACTION_RESULT_DATA_PICK_OPTIONS,
|
||||
|
|
@ -37,9 +40,9 @@ CONTEXT_NODES = [
|
|||
),
|
||||
"injectRunContext": True,
|
||||
"parameters": [
|
||||
{"name": "documentList", "type": "str", "required": True, "frontendType": "hidden",
|
||||
"description": t("Dokumentenliste (via Wire oder DataRef)"), "default": "",
|
||||
"graphInherit": {"port": 0, "kind": "documentListWire"}},
|
||||
{"name": "context", "type": "Any", "required": False, "frontendType": "contextBuilder",
|
||||
"description": CONTEXT_BUILDER_PARAM_DESCRIPTION, "default": "",
|
||||
"graphInherit": {"port": 0, "kind": "primaryTextRef"}},
|
||||
{
|
||||
"name": "contentFilter",
|
||||
"type": "str",
|
||||
|
|
|
|||
|
|
@ -668,6 +668,7 @@ def get_files(
|
|||
pagination: Optional[str] = Query(None, description="JSON-encoded PaginationParams object"),
|
||||
mode: Optional[str] = Query(None, description="'filterValues' for distinct column values, 'ids' for all filtered IDs"),
|
||||
column: Optional[str] = Query(None, description="Column key (required when mode=filterValues)"),
|
||||
owner: str = Query("me", description="'all' | 'me' | 'shared'"),
|
||||
currentUser: User = Depends(getCurrentUser),
|
||||
context: RequestContext = Depends(getRequestContext)
|
||||
):
|
||||
|
|
@ -699,8 +700,9 @@ def get_files(
|
|||
|
||||
from modules.routes.routeHelpers import (
|
||||
handleIdsMode,
|
||||
handleIdsInMemory,
|
||||
handleFilterValuesInMemory,
|
||||
resolveView, applyViewToParams, buildGroupLayout, effective_group_by_levels,
|
||||
resolveView, applyViewToParams, buildGroupLayout, effective_group_by_levels, paginateInMemory,
|
||||
)
|
||||
import modules.interfaces.interfaceDbApp as _appIface
|
||||
from modules.datamodels.datamodelPagination import AppliedViewMeta
|
||||
|
|
@ -711,6 +713,10 @@ def get_files(
|
|||
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None
|
||||
)
|
||||
appInterface = _appIface.getInterface(currentUser)
|
||||
owner_mode = (owner or "me").strip().lower()
|
||||
if owner_mode not in ("all", "me", "shared"):
|
||||
raise HTTPException(status_code=400, detail="owner must be 'all', 'me', or 'shared'")
|
||||
current_user_id = str(getattr(currentUser, "id", "") or "")
|
||||
|
||||
# Resolve view and merge config into params
|
||||
viewKey = paginationParams.viewKey if paginationParams else None
|
||||
|
|
@ -722,6 +728,17 @@ def get_files(
|
|||
def _filesToDicts(fileItems):
|
||||
return [f.model_dump() if hasattr(f, "model_dump") else (dict(f) if not isinstance(f, dict) else f) for f in fileItems]
|
||||
|
||||
def _apply_owner_filter(item_dicts):
|
||||
if owner_mode == "all":
|
||||
return item_dicts
|
||||
if owner_mode == "me":
|
||||
return [item for item in item_dicts if str(item.get("sysCreatedBy") or "") == current_user_id]
|
||||
return [item for item in item_dicts if str(item.get("sysCreatedBy") or "") != current_user_id]
|
||||
|
||||
recordFilter = None
|
||||
if owner_mode == "me":
|
||||
recordFilter = {"sysCreatedBy": managementInterface.userId}
|
||||
|
||||
if mode == "groupSummary":
|
||||
if not pagination:
|
||||
raise HTTPException(status_code=400, detail="pagination required for groupSummary")
|
||||
|
|
@ -736,11 +753,12 @@ def get_files(
|
|||
)
|
||||
field = groupByLevels[0]["field"]
|
||||
null_label = str(groupByLevels[0].get("nullLabel") or "—")
|
||||
allFiles = managementInterface.getAllFiles()
|
||||
allFiles = managementInterface.getAllFiles(recordFilter=recordFilter)
|
||||
allItems = enrichRowsWithFkLabels(
|
||||
_filesToDicts(allFiles if isinstance(allFiles, list) else (allFiles.items if hasattr(allFiles, "items") else [])),
|
||||
FileItem,
|
||||
)
|
||||
allItems = _apply_owner_filter(allItems)
|
||||
filtered = apply_strategy_b_filters_and_sort(allItems, paginationParams, currentUser)
|
||||
groups_out = build_group_summary_groups(filtered, field, null_label, groupByLevels=groupByLevels)
|
||||
return JSONResponse(content={"groups": groups_out})
|
||||
|
|
@ -748,48 +766,35 @@ def get_files(
|
|||
if mode == "filterValues":
|
||||
if not column:
|
||||
raise HTTPException(status_code=400, detail="column parameter required for mode=filterValues")
|
||||
allFiles = managementInterface.getAllFiles()
|
||||
allFiles = managementInterface.getAllFiles(recordFilter=recordFilter)
|
||||
items = allFiles if isinstance(allFiles, list) else (allFiles.items if hasattr(allFiles, "items") else [])
|
||||
itemDicts = _filesToDicts(items)
|
||||
itemDicts = _apply_owner_filter(itemDicts)
|
||||
enrichRowsWithFkLabels(itemDicts, FileItem)
|
||||
return handleFilterValuesInMemory(itemDicts, column, pagination)
|
||||
|
||||
if mode == "ids":
|
||||
recordFilter = {"sysCreatedBy": managementInterface.userId}
|
||||
return handleIdsMode(managementInterface.db, FileItem, pagination, recordFilter)
|
||||
if owner_mode == "me":
|
||||
return handleIdsMode(managementInterface.db, FileItem, pagination, recordFilter)
|
||||
allFiles = managementInterface.getAllFiles(recordFilter=recordFilter)
|
||||
items = allFiles if isinstance(allFiles, list) else (allFiles.items if hasattr(allFiles, "items") else [])
|
||||
itemDicts = _apply_owner_filter(_filesToDicts(items))
|
||||
enrichRowsWithFkLabels(itemDicts, FileItem)
|
||||
return handleIdsInMemory(itemDicts, pagination)
|
||||
|
||||
if not groupByLevels:
|
||||
# No grouping: let DB handle pagination directly (fastest path)
|
||||
result = managementInterface.getAllFiles(pagination=paginationParams)
|
||||
if paginationParams and hasattr(result, 'items'):
|
||||
enriched = enrichRowsWithFkLabels(_filesToDicts(result.items), FileItem)
|
||||
resp: dict = {
|
||||
"items": enriched,
|
||||
"pagination": PaginationMetadata(
|
||||
currentPage=paginationParams.page,
|
||||
pageSize=paginationParams.pageSize,
|
||||
totalItems=result.totalItems,
|
||||
totalPages=result.totalPages,
|
||||
sort=paginationParams.sort,
|
||||
filters=paginationParams.filters
|
||||
).model_dump(),
|
||||
}
|
||||
else:
|
||||
items = result if isinstance(result, list) else (result.items if hasattr(result, "items") else [result])
|
||||
resp = {"items": enrichRowsWithFkLabels(_filesToDicts(items), FileItem), "pagination": None}
|
||||
if viewMeta:
|
||||
resp["appliedView"] = viewMeta.model_dump()
|
||||
return resp
|
||||
|
||||
# Strategy B grouping: load full list, group, then slice
|
||||
allFiles = managementInterface.getAllFiles()
|
||||
# Strategy B: load visible list first, then filter/sort/paginate in memory.
|
||||
# This is required for files because internal workflow artefacts are
|
||||
# suppressed after record loading; SQL-level COUNT/LIMIT would otherwise
|
||||
# count hidden rows and produce pages with only a handful of visible items.
|
||||
allFiles = managementInterface.getAllFiles(recordFilter=recordFilter)
|
||||
allItems = enrichRowsWithFkLabels(
|
||||
_filesToDicts(allFiles if isinstance(allFiles, list) else (allFiles.items if hasattr(allFiles, "items") else [])),
|
||||
FileItem,
|
||||
)
|
||||
allItems = _apply_owner_filter(allItems)
|
||||
|
||||
from modules.routes.routeHelpers import apply_strategy_b_filters_and_sort
|
||||
if paginationParams.filters or paginationParams.sort:
|
||||
if paginationParams and (paginationParams.filters or paginationParams.sort):
|
||||
allItems = apply_strategy_b_filters_and_sort(allItems, paginationParams, currentUser)
|
||||
|
||||
if not paginationParams:
|
||||
|
|
@ -798,6 +803,24 @@ def get_files(
|
|||
resp["appliedView"] = viewMeta.model_dump()
|
||||
return resp
|
||||
|
||||
if not groupByLevels:
|
||||
page_items, totalItems = paginateInMemory(allItems, paginationParams)
|
||||
totalPages = math.ceil(totalItems / paginationParams.pageSize) if totalItems > 0 else 0
|
||||
resp = {
|
||||
"items": page_items,
|
||||
"pagination": PaginationMetadata(
|
||||
currentPage=paginationParams.page,
|
||||
pageSize=paginationParams.pageSize,
|
||||
totalItems=totalItems,
|
||||
totalPages=totalPages,
|
||||
sort=paginationParams.sort,
|
||||
filters=paginationParams.filters
|
||||
).model_dump(),
|
||||
}
|
||||
if viewMeta:
|
||||
resp["appliedView"] = viewMeta.model_dump()
|
||||
return resp
|
||||
|
||||
totalItems = len(allItems)
|
||||
totalPages = math.ceil(totalItems / paginationParams.pageSize) if totalItems > 0 else 0
|
||||
page_items, groupLayout = buildGroupLayout(allItems, groupByLevels, paginationParams.page, paginationParams.pageSize)
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ import time
|
|||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from modules.datamodels.datamodelChat import ActionResult
|
||||
from modules.datamodels.datamodelDocref import coerceDocumentReferenceList
|
||||
from modules.datamodels.datamodelDocref import DocumentReferenceList, coerceDocumentReferenceList
|
||||
from modules.datamodels.datamodelExtraction import ContentExtracted, ExtractionOptions
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
@ -1751,6 +1751,62 @@ def presentation_envelopes_to_document_json(
|
|||
}
|
||||
|
||||
|
||||
def _document_list_from_context(raw: Any, *, _depth: int = 0) -> DocumentReferenceList:
|
||||
"""Best-effort extraction of document/file references from ``context`` payloads.
|
||||
|
||||
Supports direct DocumentList-like values plus nested shapes commonly produced
|
||||
by DataPicker selections, ActionResult wrappers, and file/files containers.
|
||||
"""
|
||||
if _depth > 6 or raw is None or raw == "":
|
||||
return DocumentReferenceList(references=[])
|
||||
|
||||
if isinstance(raw, dict) and "fileId" in raw and "id" not in raw and "documentId" not in raw:
|
||||
direct = coerceDocumentReferenceList({
|
||||
"id": raw.get("fileId"),
|
||||
"name": raw.get("fileName") or raw.get("name"),
|
||||
})
|
||||
else:
|
||||
direct = coerceDocumentReferenceList(raw)
|
||||
if direct.references:
|
||||
return direct
|
||||
|
||||
collected = []
|
||||
|
||||
def _extend_from(value: Any) -> None:
|
||||
nested = _document_list_from_context(value, _depth=_depth + 1)
|
||||
if nested.references:
|
||||
collected.extend(nested.references)
|
||||
|
||||
if isinstance(raw, dict):
|
||||
nested_files = raw.get("files")
|
||||
if isinstance(nested_files, dict):
|
||||
_extend_from(list(nested_files.values()))
|
||||
for key in ("documents", "references", "items", "file", "document", "value", "data", "merged", "result", "context"):
|
||||
nested = raw.get(key)
|
||||
if nested is None or nested is raw:
|
||||
continue
|
||||
_extend_from(nested)
|
||||
elif isinstance(raw, list):
|
||||
for item in raw:
|
||||
_extend_from(item)
|
||||
|
||||
if not collected:
|
||||
return DocumentReferenceList(references=[])
|
||||
|
||||
deduped = []
|
||||
seen = set()
|
||||
for ref in collected:
|
||||
try:
|
||||
key = ref.to_string()
|
||||
except Exception:
|
||||
key = repr(ref)
|
||||
if key in seen:
|
||||
continue
|
||||
seen.add(key)
|
||||
deduped.append(ref)
|
||||
return DocumentReferenceList(references=deduped)
|
||||
|
||||
|
||||
async def extractContent(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
operation_id = None
|
||||
try:
|
||||
|
|
@ -1758,18 +1814,24 @@ async def extractContent(self, parameters: Dict[str, Any]) -> ActionResult:
|
|||
operation_id = f"context_extract_{wf}_{int(time.time())}"
|
||||
|
||||
document_list_param = parameters.get("documentList")
|
||||
if not document_list_param:
|
||||
return ActionResult.isFailure(error="documentList is required")
|
||||
|
||||
dl = coerceDocumentReferenceList(document_list_param)
|
||||
if document_list_param:
|
||||
dl = coerceDocumentReferenceList(document_list_param)
|
||||
source = "documentList"
|
||||
else:
|
||||
context_param = parameters.get("context")
|
||||
dl = _document_list_from_context(context_param)
|
||||
source = "context"
|
||||
if not dl.references:
|
||||
return ActionResult.isFailure(
|
||||
error=(
|
||||
f"documentList could not be parsed (type={type(document_list_param).__name__}); "
|
||||
"expected DocumentReferenceList, list of strings/dicts, or "
|
||||
"a wrapper dict like {'documents': [...]}"
|
||||
f"{source} could not be parsed into document references "
|
||||
f"(type={type((document_list_param if document_list_param else parameters.get('context'))).__name__}); "
|
||||
"expected DocumentReferenceList, list of string/dict refs, "
|
||||
"or a context payload containing file/document refs under keys like "
|
||||
"{documents, files, file, data, value}."
|
||||
),
|
||||
)
|
||||
logger.info("extractContent resolved %d document reference(s) from %s", len(dl.references), source)
|
||||
|
||||
parent_operation_id = parameters.get("parentOperationId")
|
||||
self.services.chat.progressLogStart(
|
||||
|
|
|
|||
|
|
@ -68,8 +68,8 @@ class MethodContext(MethodBase):
|
|||
name="documentList",
|
||||
type="DocumentList",
|
||||
frontendType=FrontendType.DOCUMENT_REFERENCE,
|
||||
required=True,
|
||||
description="Document reference(s) to extract content from",
|
||||
required=False,
|
||||
description="Optional document reference(s) to extract content from. When omitted, extractContent also accepts refs via context.",
|
||||
),
|
||||
"contentFilter": WorkflowActionParameter(
|
||||
name="contentFilter",
|
||||
|
|
|
|||
Loading…
Reference in a new issue