node handover standartisiert, kein hardcoden mehr, inhalt extraktion node verbessert, output ports vereinheitlicht mit user im blick

This commit is contained in:
Ida 2026-05-06 12:50:49 +02:00
parent 6e3da0d0d8
commit 93aff13d26
25 changed files with 1579 additions and 465 deletions

View file

@ -3,6 +3,127 @@
from modules.shared.i18nRegistry import t from modules.shared.i18nRegistry import t
from modules.features.graphicalEditor.nodeDefinitions.contextPickerHelp import (
CONTEXT_BUILDER_PARAM_DESCRIPTION,
)
# Shared authoritative DataPicker paths (same handover idea as ``context.extractContent`` outputPorts).
ACTION_RESULT_DATA_PICK_OPTIONS = [
{
"path": ["documents", 0, "documentData"],
"pickerLabel": t("Gesamter Inhalt"),
"detail": t(
"Strukturiertes Handover als JSON inklusive aller Textteile "
"und Verweisen auf ausgelagerte Bilder."
),
"recommended": True,
"type": "Any",
},
{
"path": ["response"],
"pickerLabel": t("Nur Text"),
"detail": t("Verketteter Klartext aus allen erkannten Textteilen."),
"recommended": True,
"type": "str",
},
{
"path": ["imageDocumentsOnly"],
"pickerLabel": t("Nur Bilder"),
"detail": t("Nur die extrahierten Bilddokumente als Liste, ohne JSON-Handover."),
"recommended": False,
"type": "List[ActionDocument]",
},
{
"path": ["documents"],
"pickerLabel": t("Alle Dateitypen"),
"detail": t("Alle Ausgabedokumente nacheinander: JSON-Handover und Bilder."),
"recommended": False,
"type": "List[ActionDocument]",
},
]
AI_RESULT_DATA_PICK_OPTIONS = [
{
"path": ["documents", 0, "documentData"],
"pickerLabel": t("Gesamter Inhalt"),
"detail": t(
"Hauptausgabedatei oder strukturierter Inhalt von ``documents[0]`` "
"(z. B. erzeugtes Dokument, JSON-Handover)."
),
"recommended": True,
"type": "Any",
},
{
"path": ["response"],
"pickerLabel": t("Nur Text"),
"detail": t("Modell-Antwort als reiner Fließtext (ohne eingebettete Bildbytes)."),
"recommended": True,
"type": "str",
},
{
"path": ["imageDocumentsOnly"],
"pickerLabel": t("Nur Bilder"),
"detail": t("Nur Bild-Dokumente aus ``documents`` (ohne erstes Nicht-Bild-Artefakt, falls gesetzt)."),
"recommended": False,
"type": "List[ActionDocument]",
},
{
"path": ["documents"],
"pickerLabel": t("Alle Ausgabedateien"),
"detail": t("Alle Dokumente der KI-Antwort: erzeugte Dateien, Bilder, Anhänge."),
"recommended": False,
"type": "List[Document]",
},
]
DOCUMENT_LIST_DATA_PICK_OPTIONS = [
{
"path": ["documents"],
"pickerLabel": t("Alle Dokumente"),
"detail": t("Die vollständige Dokumentenliste."),
"recommended": True,
"type": "List[Document]",
},
{
"path": ["documents", 0],
"pickerLabel": t("Erstes Dokument"),
"detail": t("Metadaten und Pfade des ersten Listeneintrags."),
"recommended": False,
"type": "Document",
},
{
"path": ["count"],
"pickerLabel": t("Anzahl"),
"detail": t("Anzahl der Dokumente."),
"recommended": False,
"type": "int",
},
]
CONSOLIDATE_RESULT_DATA_PICK_OPTIONS = [
{
"path": ["result"],
"pickerLabel": t("Konsolidiertes Ergebnis"),
"detail": t("Text oder Struktur nach Konsolidierung."),
"recommended": True,
"type": "Any",
},
{
"path": ["mode"],
"pickerLabel": t("Modus"),
"detail": t("Verwendeter Konsolidierungsmodus."),
"recommended": False,
"type": "str",
},
{
"path": ["count"],
"pickerLabel": t("Anzahl"),
"detail": t("Anzahl zusammengeführter Elemente."),
"recommended": False,
"type": "int",
},
]
_AI_COMMON_PARAMS = [ _AI_COMMON_PARAMS = [
{"name": "requireNeutralization", "type": "bool", "required": False, {"name": "requireNeutralization", "type": "bool", "required": False,
"frontendType": "checkbox", "default": False, "frontendType": "checkbox", "default": False,
@ -28,7 +149,7 @@ AI_NODES = [
"description": t("Dokumente aus vorherigen Schritten"), "default": "", "description": t("Dokumente aus vorherigen Schritten"), "default": "",
"graphInherit": {"port": 0, "kind": "documentListWire"}}, "graphInherit": {"port": 0, "kind": "documentListWire"}},
{"name": "context", "type": "Any", "required": False, "frontendType": "contextBuilder", {"name": "context", "type": "Any", "required": False, "frontendType": "contextBuilder",
"description": t("Daten aus vorherigen Schritten"), "default": "", "description": CONTEXT_BUILDER_PARAM_DESCRIPTION, "default": "",
"graphInherit": {"port": 0, "kind": "primaryTextRef"}}, "graphInherit": {"port": 0, "kind": "primaryTextRef"}},
{"name": "simpleMode", "type": "bool", "required": False, "frontendType": "checkbox", {"name": "simpleMode", "type": "bool", "required": False, "frontendType": "checkbox",
"description": t("Einfacher Modus"), "default": True}, "description": t("Einfacher Modus"), "default": True},
@ -38,7 +159,8 @@ AI_NODES = [
"inputPorts": {0: {"accepts": [ "inputPorts": {0: {"accepts": [
"FormPayload", "DocumentList", "AiResult", "TextResult", "Transit", "LoopItem", "ActionResult", "FormPayload", "DocumentList", "AiResult", "TextResult", "Transit", "LoopItem", "ActionResult",
]}}, ]}},
"outputPorts": {0: {"schema": "AiResult"}}, "outputPorts": {0: {"schema": "AiResult", "dataPickOptions": AI_RESULT_DATA_PICK_OPTIONS}},
"paramMappers": ["aiPromptLegacyAlias"],
"meta": {"icon": "mdi-robot", "color": "#9C27B0", "usesAi": True}, "meta": {"icon": "mdi-robot", "color": "#9C27B0", "usesAi": True},
"_method": "ai", "_method": "ai",
"_action": "process", "_action": "process",
@ -52,7 +174,7 @@ AI_NODES = [
{"name": "prompt", "type": "str", "required": True, "frontendType": "textarea", {"name": "prompt", "type": "str", "required": True, "frontendType": "textarea",
"description": t("Recherche-Anfrage")}, "description": t("Recherche-Anfrage")},
{"name": "context", "type": "Any", "required": False, "frontendType": "contextBuilder", {"name": "context", "type": "Any", "required": False, "frontendType": "contextBuilder",
"description": t("Daten aus vorherigen Schritten"), "default": "", "description": CONTEXT_BUILDER_PARAM_DESCRIPTION, "default": "",
"graphInherit": {"port": 0, "kind": "primaryTextRef"}}, "graphInherit": {"port": 0, "kind": "primaryTextRef"}},
{"name": "documentList", "type": "DocumentList", "required": False, "frontendType": "hidden", {"name": "documentList", "type": "DocumentList", "required": False, "frontendType": "hidden",
"description": t("Dokumente aus vorherigen Schritten"), "default": "", "description": t("Dokumente aus vorherigen Schritten"), "default": "",
@ -63,7 +185,7 @@ AI_NODES = [
"inputPorts": {0: {"accepts": [ "inputPorts": {0: {"accepts": [
"FormPayload", "Transit", "AiResult", "DocumentList", "ActionResult", "LoopItem", "TextResult", "FormPayload", "Transit", "AiResult", "DocumentList", "ActionResult", "LoopItem", "TextResult",
]}}, ]}},
"outputPorts": {0: {"schema": "AiResult"}}, "outputPorts": {0: {"schema": "AiResult", "dataPickOptions": AI_RESULT_DATA_PICK_OPTIONS}},
"meta": {"icon": "mdi-magnify", "color": "#9C27B0", "usesAi": True}, "meta": {"icon": "mdi-magnify", "color": "#9C27B0", "usesAi": True},
"_method": "ai", "_method": "ai",
"_action": "webResearch", "_action": "webResearch",
@ -90,7 +212,7 @@ AI_NODES = [
"inputs": 1, "inputs": 1,
"outputs": 1, "outputs": 1,
"inputPorts": {0: {"accepts": ["DocumentList", "Transit", "LoopItem"]}}, "inputPorts": {0: {"accepts": ["DocumentList", "Transit", "LoopItem"]}},
"outputPorts": {0: {"schema": "AiResult"}}, "outputPorts": {0: {"schema": "AiResult", "dataPickOptions": AI_RESULT_DATA_PICK_OPTIONS}},
"meta": {"icon": "mdi-file-document-outline", "color": "#9C27B0", "usesAi": True}, "meta": {"icon": "mdi-file-document-outline", "color": "#9C27B0", "usesAi": True},
"_method": "ai", "_method": "ai",
"_action": "summarizeDocument", "_action": "summarizeDocument",
@ -116,7 +238,7 @@ AI_NODES = [
"inputs": 1, "inputs": 1,
"outputs": 1, "outputs": 1,
"inputPorts": {0: {"accepts": ["DocumentList", "Transit", "LoopItem"]}}, "inputPorts": {0: {"accepts": ["DocumentList", "Transit", "LoopItem"]}},
"outputPorts": {0: {"schema": "AiResult"}}, "outputPorts": {0: {"schema": "AiResult", "dataPickOptions": AI_RESULT_DATA_PICK_OPTIONS}},
"meta": {"icon": "mdi-translate", "color": "#9C27B0", "usesAi": True}, "meta": {"icon": "mdi-translate", "color": "#9C27B0", "usesAi": True},
"_method": "ai", "_method": "ai",
"_action": "translateDocument", "_action": "translateDocument",
@ -140,7 +262,7 @@ AI_NODES = [
"inputs": 1, "inputs": 1,
"outputs": 1, "outputs": 1,
"inputPorts": {0: {"accepts": ["DocumentList", "Transit", "LoopItem"]}}, "inputPorts": {0: {"accepts": ["DocumentList", "Transit", "LoopItem"]}},
"outputPorts": {0: {"schema": "DocumentList"}}, "outputPorts": {0: {"schema": "DocumentList", "dataPickOptions": DOCUMENT_LIST_DATA_PICK_OPTIONS}},
"meta": {"icon": "mdi-file-convert", "color": "#9C27B0", "usesAi": True}, "meta": {"icon": "mdi-file-convert", "color": "#9C27B0", "usesAi": True},
"_method": "ai", "_method": "ai",
"_action": "convertDocument", "_action": "convertDocument",
@ -165,7 +287,7 @@ AI_NODES = [
"description": t("Zielordner in Meine Dateien"), "description": t("Zielordner in Meine Dateien"),
"default": ""}, "default": ""},
{"name": "context", "type": "Any", "required": False, "frontendType": "contextBuilder", {"name": "context", "type": "Any", "required": False, "frontendType": "contextBuilder",
"description": t("Daten aus vorherigen Schritten"), "default": "", "description": CONTEXT_BUILDER_PARAM_DESCRIPTION, "default": "",
"graphInherit": {"port": 0, "kind": "primaryTextRef"}}, "graphInherit": {"port": 0, "kind": "primaryTextRef"}},
{"name": "documentList", "type": "DocumentList", "required": False, "frontendType": "hidden", {"name": "documentList", "type": "DocumentList", "required": False, "frontendType": "hidden",
"description": t("Dokumente aus vorherigen Schritten"), "default": "", "description": t("Dokumente aus vorherigen Schritten"), "default": "",
@ -176,7 +298,7 @@ AI_NODES = [
"inputPorts": {0: {"accepts": [ "inputPorts": {0: {"accepts": [
"FormPayload", "Transit", "AiResult", "DocumentList", "ActionResult", "LoopItem", "TextResult", "FormPayload", "Transit", "AiResult", "DocumentList", "ActionResult", "LoopItem", "TextResult",
]}}, ]}},
"outputPorts": {0: {"schema": "DocumentList"}}, "outputPorts": {0: {"schema": "DocumentList", "dataPickOptions": DOCUMENT_LIST_DATA_PICK_OPTIONS}},
"meta": {"icon": "mdi-file-plus", "color": "#9C27B0", "usesAi": True}, "meta": {"icon": "mdi-file-plus", "color": "#9C27B0", "usesAi": True},
"_method": "ai", "_method": "ai",
"_action": "generateDocument", "_action": "generateDocument",
@ -196,7 +318,7 @@ AI_NODES = [
"description": t("Zielordner in Meine Dateien"), "description": t("Zielordner in Meine Dateien"),
"default": ""}, "default": ""},
{"name": "context", "type": "Any", "required": False, "frontendType": "contextBuilder", {"name": "context", "type": "Any", "required": False, "frontendType": "contextBuilder",
"description": t("Daten aus vorherigen Schritten"), "default": "", "description": CONTEXT_BUILDER_PARAM_DESCRIPTION, "default": "",
"graphInherit": {"port": 0, "kind": "primaryTextRef"}}, "graphInherit": {"port": 0, "kind": "primaryTextRef"}},
{"name": "documentList", "type": "DocumentList", "required": False, "frontendType": "hidden", {"name": "documentList", "type": "DocumentList", "required": False, "frontendType": "hidden",
"description": t("Dokumente aus vorherigen Schritten"), "default": "", "description": t("Dokumente aus vorherigen Schritten"), "default": "",
@ -207,7 +329,7 @@ AI_NODES = [
"inputPorts": {0: {"accepts": [ "inputPorts": {0: {"accepts": [
"FormPayload", "Transit", "AiResult", "DocumentList", "ActionResult", "LoopItem", "TextResult", "FormPayload", "Transit", "AiResult", "DocumentList", "ActionResult", "LoopItem", "TextResult",
]}}, ]}},
"outputPorts": {0: {"schema": "AiResult"}}, "outputPorts": {0: {"schema": "AiResult", "dataPickOptions": AI_RESULT_DATA_PICK_OPTIONS}},
"meta": {"icon": "mdi-code-tags", "color": "#9C27B0", "usesAi": True}, "meta": {"icon": "mdi-code-tags", "color": "#9C27B0", "usesAi": True},
"_method": "ai", "_method": "ai",
"_action": "generateCode", "_action": "generateCode",
@ -227,7 +349,7 @@ AI_NODES = [
"inputs": 1, "inputs": 1,
"outputs": 1, "outputs": 1,
"inputPorts": {0: {"accepts": ["AggregateResult", "Transit"]}}, "inputPorts": {0: {"accepts": ["AggregateResult", "Transit"]}},
"outputPorts": {0: {"schema": "ConsolidateResult"}}, "outputPorts": {0: {"schema": "ConsolidateResult", "dataPickOptions": CONSOLIDATE_RESULT_DATA_PICK_OPTIONS}},
"meta": {"icon": "mdi-table-merge-cells", "color": "#9C27B0", "usesAi": True}, "meta": {"icon": "mdi-table-merge-cells", "color": "#9C27B0", "usesAi": True},
"_method": "ai", "_method": "ai",
"_action": "consolidate", "_action": "consolidate",

View file

@ -4,6 +4,63 @@
from modules.shared.i18nRegistry import t from modules.shared.i18nRegistry import t
from modules.features.graphicalEditor.nodeDefinitions.ai import ACTION_RESULT_DATA_PICK_OPTIONS
TASK_LIST_DATA_PICK_OPTIONS = [
{
"path": ["tasks"],
"pickerLabel": t("Alle Aufgaben"),
"detail": t("Vollständige Aufgabenliste."),
"recommended": True,
"type": "List[TaskItem]",
},
{
"path": ["tasks", 0],
"pickerLabel": t("Erste Aufgabe"),
"detail": t("Erstes Listenelement."),
"recommended": False,
"type": "TaskItem",
},
{
"path": ["count"],
"pickerLabel": t("Anzahl"),
"detail": t("Anzahl der Aufgaben."),
"recommended": False,
"type": "int",
},
{
"path": ["listId"],
"pickerLabel": t("Listen-ID"),
"detail": t("ClickUp-Listen-Kontext, falls gesetzt."),
"recommended": False,
"type": "str",
},
]
TASK_RESULT_DATA_PICK_OPTIONS = [
{
"path": ["success"],
"pickerLabel": t("Erfolg"),
"detail": t("Ob der API-Aufruf erfolgreich war."),
"recommended": True,
"type": "bool",
},
{
"path": ["taskId"],
"pickerLabel": t("Aufgaben-ID"),
"detail": t("ID der betroffenen Aufgabe."),
"recommended": True,
"type": "str",
},
{
"path": ["task"],
"pickerLabel": t("Aufgabendaten"),
"detail": t("Vollständiges Task-Objekt (Dict)."),
"recommended": True,
"type": "Dict",
},
]
CLICKUP_NODES = [ CLICKUP_NODES = [
{ {
"id": "clickup.searchTasks", "id": "clickup.searchTasks",
@ -33,7 +90,7 @@ CLICKUP_NODES = [
"inputs": 1, "inputs": 1,
"outputs": 1, "outputs": 1,
"inputPorts": {0: {"accepts": ["Transit"]}}, "inputPorts": {0: {"accepts": ["Transit"]}},
"outputPorts": {0: {"schema": "TaskList"}}, "outputPorts": {0: {"schema": "TaskList", "dataPickOptions": TASK_LIST_DATA_PICK_OPTIONS}},
"meta": {"icon": "mdi-magnify", "color": "#7B68EE", "usesAi": False}, "meta": {"icon": "mdi-magnify", "color": "#7B68EE", "usesAi": False},
"_method": "clickup", "_method": "clickup",
"_action": "searchTasks", "_action": "searchTasks",
@ -58,7 +115,7 @@ CLICKUP_NODES = [
"inputs": 1, "inputs": 1,
"outputs": 1, "outputs": 1,
"inputPorts": {0: {"accepts": ["Transit"]}}, "inputPorts": {0: {"accepts": ["Transit"]}},
"outputPorts": {0: {"schema": "TaskList"}}, "outputPorts": {0: {"schema": "TaskList", "dataPickOptions": TASK_LIST_DATA_PICK_OPTIONS}},
"meta": {"icon": "mdi-format-list-bulleted", "color": "#7B68EE", "usesAi": False}, "meta": {"icon": "mdi-format-list-bulleted", "color": "#7B68EE", "usesAi": False},
"_method": "clickup", "_method": "clickup",
"_action": "listTasks", "_action": "listTasks",
@ -80,7 +137,7 @@ CLICKUP_NODES = [
"inputs": 1, "inputs": 1,
"outputs": 1, "outputs": 1,
"inputPorts": {0: {"accepts": ["Transit"]}}, "inputPorts": {0: {"accepts": ["Transit"]}},
"outputPorts": {0: {"schema": "TaskResult"}}, "outputPorts": {0: {"schema": "TaskResult", "dataPickOptions": TASK_RESULT_DATA_PICK_OPTIONS}},
"meta": {"icon": "mdi-file-document-outline", "color": "#7B68EE", "usesAi": False}, "meta": {"icon": "mdi-file-document-outline", "color": "#7B68EE", "usesAi": False},
"_method": "clickup", "_method": "clickup",
"_action": "getTask", "_action": "getTask",
@ -124,7 +181,7 @@ CLICKUP_NODES = [
"inputs": 1, "inputs": 1,
"outputs": 1, "outputs": 1,
"inputPorts": {0: {"accepts": ["Transit"]}}, "inputPorts": {0: {"accepts": ["Transit"]}},
"outputPorts": {0: {"schema": "TaskResult"}}, "outputPorts": {0: {"schema": "TaskResult", "dataPickOptions": TASK_RESULT_DATA_PICK_OPTIONS}},
"meta": {"icon": "mdi-plus-circle-outline", "color": "#7B68EE", "usesAi": False}, "meta": {"icon": "mdi-plus-circle-outline", "color": "#7B68EE", "usesAi": False},
"_method": "clickup", "_method": "clickup",
"_action": "createTask", "_action": "createTask",
@ -148,7 +205,8 @@ CLICKUP_NODES = [
"inputs": 1, "inputs": 1,
"outputs": 1, "outputs": 1,
"inputPorts": {0: {"accepts": ["TaskResult", "Transit"]}}, "inputPorts": {0: {"accepts": ["TaskResult", "Transit"]}},
"outputPorts": {0: {"schema": "TaskResult"}}, "outputPorts": {0: {"schema": "TaskResult", "dataPickOptions": TASK_RESULT_DATA_PICK_OPTIONS}},
"paramMappers": ["clickupTaskUpdateMerge"],
"meta": {"icon": "mdi-pencil-outline", "color": "#7B68EE", "usesAi": False}, "meta": {"icon": "mdi-pencil-outline", "color": "#7B68EE", "usesAi": False},
"_method": "clickup", "_method": "clickup",
"_action": "updateTask", "_action": "updateTask",
@ -174,7 +232,7 @@ CLICKUP_NODES = [
"inputs": 1, "inputs": 1,
"outputs": 1, "outputs": 1,
"inputPorts": {0: {"accepts": ["DocumentList", "Transit"]}}, "inputPorts": {0: {"accepts": ["DocumentList", "Transit"]}},
"outputPorts": {0: {"schema": "ActionResult"}}, "outputPorts": {0: {"schema": "ActionResult", "dataPickOptions": ACTION_RESULT_DATA_PICK_OPTIONS}},
"meta": {"icon": "mdi-attachment", "color": "#7B68EE", "usesAi": False}, "meta": {"icon": "mdi-attachment", "color": "#7B68EE", "usesAi": False},
"_method": "clickup", "_method": "clickup",
"_action": "uploadAttachment", "_action": "uploadAttachment",

View file

@ -8,21 +8,66 @@ CONTEXT_NODES = [
"id": "context.extractContent", "id": "context.extractContent",
"category": "context", "category": "context",
"label": t("Inhalt extrahieren"), "label": t("Inhalt extrahieren"),
"description": t("Dokumentstruktur extrahieren ohne KI (Seiten, Abschnitte, Bilder, Tabellen)"), "description": t(
"Extrahiert Inhalt ohne KI. Ergebnis einheitlich wie KI-Schritte: `response` "
"(gesammelter Klartext), strukturierte JSON-Unterlage in `documents[0]`, "
"einzelne Bilder als eigene Dokumente `extract_media_*` (nur im Workflow, ohne Eintrag unter „Meine Dateien“) — "
"Auswahl im Daten-Picker wie bei `ai.process`."
),
"parameters": [ "parameters": [
{"name": "documentList", "type": "str", "required": True, "frontendType": "hidden", {"name": "documentList", "type": "str", "required": True, "frontendType": "hidden",
"description": t("Dokumentenliste (via Wire oder DataRef)"), "default": "", "description": t("Dokumentenliste (via Wire oder DataRef)"), "default": "",
"graphInherit": {"port": 0, "kind": "documentListWire"}}, "graphInherit": {"port": 0, "kind": "documentListWire"}},
{"name": "extractionOptions", "type": "object", "required": False, "frontendType": "json",
"description": t(
"Extraktions-Optionen (JSON), z.B. {\"includeImages\": true, \"includeTables\": true, "
"\"outputDetail\": \"full\"}"),
"default": {}},
], ],
"inputs": 1, "inputs": 1,
"outputs": 1, "outputs": 1,
"inputPorts": {0: {"accepts": ["DocumentList", "Transit"]}}, "inputPorts": {0: {"accepts": ["DocumentList", "Transit", "LoopItem"]}},
"outputPorts": {0: {"schema": "UdmDocument"}}, "outputPorts": {
0: {
"schema": "ActionResult",
# Authoritative DataPicker paths (same idea as ``parameters`` for configuration).
# Frontend uses only this list — no schema expansion merge for this port.
"dataPickOptions": [
{
"path": ["documents", 0, "documentData"],
"pickerLabel": t("Gesamter Inhalt"),
"detail": t(
"Strukturiertes Handover als JSON inklusive aller Textteile "
"und Verweisen auf ausgelagerte Bilder."
),
"recommended": True,
"type": "Any",
},
{
"path": ["response"],
"pickerLabel": t("Nur Text"),
"detail": t(
"Verketteter Klartext aus allen erkannten Textteilen."
),
"recommended": True,
"type": "str",
},
{
"path": ["imageDocumentsOnly"],
"pickerLabel": t("Nur Bilder"),
"detail": t(
"Nur die extrahierten Bilddokumente als Liste, ohne JSON-Handover."
),
"recommended": False,
"type": "List[ActionDocument]",
},
{
"path": ["documents"],
"pickerLabel": t("Alle Dateitypen"),
"detail": t(
"Alle Ausgabedokumente nacheinander: JSON-Handover und Bilder."
),
"recommended": False,
"type": "List[ActionDocument]",
},
],
}
},
"meta": {"icon": "mdi-file-tree-outline", "color": "#00897B", "usesAi": False}, "meta": {"icon": "mdi-file-tree-outline", "color": "#00897B", "usesAi": False},
"_method": "context", "_method": "context",
"_action": "extractContent", "_action": "extractContent",

View file

@ -0,0 +1,22 @@
# Copyright (c) 2025 Patrick Motsch
# Shared parameter copy for ``contextBuilder`` fields (upstream data pick).
from modules.shared.i18nRegistry import t
CONTEXT_BUILDER_PARAM_DESCRIPTION = t(
"Inhalt aus vorherigen Schritten wählen (DataRef / Daten-Picker): z. B. „response“ für Klartext, "
"Handover-Pfade für strukturiertes JSON oder Medienlisten. "
"Die Auflösung erfolgt vollständig serverseitig (`resolveParameterReferences`). "
"Formular-Schritte speichern Antworten unter „payload“ — fehlt ein gewählter Pfad am Root, "
"wird derselbe Pfad automatisch unter „payload“ nachgeschlagen (Kompatibilität mit älteren "
"und neuen Picker-Pfaden). "
"In Freitext-/Template-Feldern werden weiterhin Platzhalter `{{KnotenId.feld.b.z.}}` ersetzt "
"(gleiche Semantik inkl. optionalem Nachschlagen unter „payload“)."
)
# Kurzreferenz für Node-Beschreibungen (optional einbinden): dieselbe Auflösungslogik
# wie bei DataRefs — kein separates Variablen-Subsystem.
REF_AND_TEMPLATE_COMPATIBILITY_SUMMARY = t(
"Verweise: typisierte DataRefs im Parameter; Zeichenketten-Templates mit {{…}}; "
"Formular-Felder unter output.payload."
)

View file

@ -3,6 +3,25 @@
from modules.shared.i18nRegistry import t from modules.shared.i18nRegistry import t
from modules.features.graphicalEditor.nodeDefinitions.ai import CONSOLIDATE_RESULT_DATA_PICK_OPTIONS
AGGREGATE_RESULT_DATA_PICK_OPTIONS = [
{
"path": ["items"],
"pickerLabel": t("Gesammelte Elemente"),
"detail": t("Alle aus der Schleife gesammelten Werte."),
"recommended": True,
"type": "List[Any]",
},
{
"path": ["count"],
"pickerLabel": t("Anzahl"),
"detail": t("Anzahl gesammelter Elemente."),
"recommended": False,
"type": "int",
},
]
DATA_NODES = [ DATA_NODES = [
{ {
"id": "data.aggregate", "id": "data.aggregate",
@ -17,7 +36,7 @@ DATA_NODES = [
"inputs": 1, "inputs": 1,
"outputs": 1, "outputs": 1,
"inputPorts": {0: {"accepts": ["Transit", "AiResult", "LoopItem"]}}, "inputPorts": {0: {"accepts": ["Transit", "AiResult", "LoopItem"]}},
"outputPorts": {0: {"schema": "AggregateResult"}}, "outputPorts": {0: {"schema": "AggregateResult", "dataPickOptions": AGGREGATE_RESULT_DATA_PICK_OPTIONS}},
"executor": "data", "executor": "data",
"meta": {"icon": "mdi-playlist-plus", "color": "#607D8B", "usesAi": False}, "meta": {"icon": "mdi-playlist-plus", "color": "#607D8B", "usesAi": False},
}, },
@ -55,7 +74,7 @@ DATA_NODES = [
"inputs": 1, "inputs": 1,
"outputs": 1, "outputs": 1,
"inputPorts": {0: {"accepts": ["AggregateResult", "Transit"]}}, "inputPorts": {0: {"accepts": ["AggregateResult", "Transit"]}},
"outputPorts": {0: {"schema": "ConsolidateResult"}}, "outputPorts": {0: {"schema": "ConsolidateResult", "dataPickOptions": CONSOLIDATE_RESULT_DATA_PICK_OPTIONS}},
"executor": "data", "executor": "data",
"meta": {"icon": "mdi-table-merge-cells", "color": "#607D8B", "usesAi": False}, "meta": {"icon": "mdi-table-merge-cells", "color": "#607D8B", "usesAi": False},
}, },

View file

@ -3,6 +3,35 @@
from modules.shared.i18nRegistry import t from modules.shared.i18nRegistry import t
from modules.features.graphicalEditor.nodeDefinitions.contextPickerHelp import (
CONTEXT_BUILDER_PARAM_DESCRIPTION,
)
from modules.features.graphicalEditor.nodeDefinitions.ai import ACTION_RESULT_DATA_PICK_OPTIONS
EMAIL_LIST_DATA_PICK_OPTIONS = [
{
"path": ["emails"],
"pickerLabel": t("Alle E-Mails"),
"detail": t("Die vollständige E-Mail-Liste des Schritts."),
"recommended": True,
"type": "List[EmailItem]",
},
{
"path": ["emails", 0],
"pickerLabel": t("Erste E-Mail"),
"detail": t("Das erste Element der Liste."),
"recommended": False,
"type": "EmailItem",
},
{
"path": ["count"],
"pickerLabel": t("Anzahl"),
"detail": t("Anzahl gefundener E-Mails."),
"recommended": False,
"type": "int",
},
]
EMAIL_NODES = [ EMAIL_NODES = [
{ {
"id": "email.checkEmail", "id": "email.checkEmail",
@ -23,7 +52,8 @@ EMAIL_NODES = [
"inputs": 1, "inputs": 1,
"outputs": 1, "outputs": 1,
"inputPorts": {0: {"accepts": ["Transit"]}}, "inputPorts": {0: {"accepts": ["Transit"]}},
"outputPorts": {0: {"schema": "EmailList"}}, "outputPorts": {0: {"schema": "EmailList", "dataPickOptions": EMAIL_LIST_DATA_PICK_OPTIONS}},
"paramMappers": ["emailCheckFilter"],
"meta": {"icon": "mdi-email-check", "color": "#1976D2", "usesAi": False}, "meta": {"icon": "mdi-email-check", "color": "#1976D2", "usesAi": False},
"_method": "outlook", "_method": "outlook",
"_action": "readEmails", "_action": "readEmails",
@ -47,7 +77,8 @@ EMAIL_NODES = [
"inputs": 1, "inputs": 1,
"outputs": 1, "outputs": 1,
"inputPorts": {0: {"accepts": ["Transit"]}}, "inputPorts": {0: {"accepts": ["Transit"]}},
"outputPorts": {0: {"schema": "EmailList"}}, "outputPorts": {0: {"schema": "EmailList", "dataPickOptions": EMAIL_LIST_DATA_PICK_OPTIONS}},
"paramMappers": ["emailSearchQuery"],
"meta": {"icon": "mdi-email-search", "color": "#1976D2", "usesAi": False}, "meta": {"icon": "mdi-email-search", "color": "#1976D2", "usesAi": False},
"_method": "outlook", "_method": "outlook",
"_action": "searchEmails", "_action": "searchEmails",
@ -63,7 +94,7 @@ EMAIL_NODES = [
"frontendOptions": {"authority": "msft"}, "frontendOptions": {"authority": "msft"},
"description": t("E-Mail-Konto")}, "description": t("E-Mail-Konto")},
{"name": "context", "type": "Any", "required": False, "frontendType": "templateTextarea", {"name": "context", "type": "Any", "required": False, "frontendType": "templateTextarea",
"description": t("Daten aus vorherigen Schritten (oder direkte Beschreibung)"), "default": "", "description": CONTEXT_BUILDER_PARAM_DESCRIPTION, "default": "",
"graphInherit": {"port": 0, "kind": "primaryTextRef"}}, "graphInherit": {"port": 0, "kind": "primaryTextRef"}},
{"name": "to", "type": "str", "required": False, "frontendType": "text", {"name": "to", "type": "str", "required": False, "frontendType": "text",
"description": t("Empfänger (komma-separiert, optional für Entwurf)"), "default": ""}, "description": t("Empfänger (komma-separiert, optional für Entwurf)"), "default": ""},
@ -80,7 +111,8 @@ EMAIL_NODES = [
"inputs": 1, "inputs": 1,
"outputs": 1, "outputs": 1,
"inputPorts": {0: {"accepts": ["EmailDraft", "AiResult", "Transit", "ConsolidateResult", "DocumentList"]}}, "inputPorts": {0: {"accepts": ["EmailDraft", "AiResult", "Transit", "ConsolidateResult", "DocumentList"]}},
"outputPorts": {0: {"schema": "ActionResult"}}, "outputPorts": {0: {"schema": "ActionResult", "dataPickOptions": ACTION_RESULT_DATA_PICK_OPTIONS}},
"paramMappers": ["emailDraftContextFromSubjectBody"],
"meta": {"icon": "mdi-email-edit", "color": "#1976D2", "usesAi": False}, "meta": {"icon": "mdi-email-edit", "color": "#1976D2", "usesAi": False},
"_method": "outlook", "_method": "outlook",
"_action": "composeAndDraftEmailWithContext", "_action": "composeAndDraftEmailWithContext",

View file

@ -3,12 +3,21 @@
from modules.shared.i18nRegistry import t from modules.shared.i18nRegistry import t
from modules.features.graphicalEditor.nodeDefinitions.contextPickerHelp import (
CONTEXT_BUILDER_PARAM_DESCRIPTION,
)
from modules.features.graphicalEditor.nodeDefinitions.ai import DOCUMENT_LIST_DATA_PICK_OPTIONS
FILE_NODES = [ FILE_NODES = [
{ {
"id": "file.create", "id": "file.create",
"category": "file", "category": "file",
"label": t("Datei erstellen"), "label": t("Datei erstellen"),
"description": t("Erstellt eine Datei aus Kontext (Text/Markdown von KI)."), "description": t(
"Erstellt eine Datei aus Kontext. Nach „Inhalt extrahieren“: „response“ für reinen Text; "
"„Nur Bilder“ liefert alle extrahierten Bilder — Datei erstellen fasst sie zu einer PDF oder DOCX "
"(Ausgabeformat pdf oder docx wählen)."
),
"parameters": [ "parameters": [
{"name": "outputFormat", "type": "str", "required": True, "frontendType": "select", {"name": "outputFormat", "type": "str", "required": True, "frontendType": "select",
"frontendOptions": {"options": ["docx", "pdf", "txt", "html", "md"]}, "frontendOptions": {"options": ["docx", "pdf", "txt", "html", "md"]},
@ -19,13 +28,13 @@ FILE_NODES = [
"description": t("Zielordner in Meine Dateien"), "description": t("Zielordner in Meine Dateien"),
"default": ""}, "default": ""},
{"name": "context", "type": "Any", "required": False, "frontendType": "contextBuilder", {"name": "context", "type": "Any", "required": False, "frontendType": "contextBuilder",
"description": t("Daten aus vorherigen Schritten"), "default": "", "description": CONTEXT_BUILDER_PARAM_DESCRIPTION, "default": "",
"graphInherit": {"port": 0, "kind": "primaryTextRef"}}, "graphInherit": {"port": 0, "kind": "primaryTextRef"}},
], ],
"inputs": 1, "inputs": 1,
"outputs": 1, "outputs": 1,
"inputPorts": {0: {"accepts": ["AiResult", "TextResult", "Transit", "FormPayload", "LoopItem", "ActionResult"]}}, "inputPorts": {0: {"accepts": ["AiResult", "TextResult", "Transit", "FormPayload", "LoopItem", "ActionResult"]}},
"outputPorts": {0: {"schema": "DocumentList"}}, "outputPorts": {0: {"schema": "DocumentList", "dataPickOptions": DOCUMENT_LIST_DATA_PICK_OPTIONS}},
"meta": {"icon": "mdi-file-plus-outline", "color": "#2196F3", "usesAi": False}, "meta": {"icon": "mdi-file-plus-outline", "color": "#2196F3", "usesAi": False},
"_method": "file", "_method": "file",
"_action": "create", "_action": "create",

View file

@ -3,6 +3,61 @@
from modules.shared.i18nRegistry import t from modules.shared.i18nRegistry import t
LOOP_ITEM_DATA_PICK_OPTIONS = [
{
"path": ["currentItem"],
"pickerLabel": t("Aktuelles Element"),
"detail": t("Das aktuelle Iterationselement."),
"recommended": True,
"type": "Any",
},
{
"path": ["currentIndex"],
"pickerLabel": t("Aktueller Index"),
"detail": t("0-basierter Index der aktuellen Iteration."),
"recommended": False,
"type": "int",
},
{
"path": ["items"],
"pickerLabel": t("Alle Elemente"),
"detail": t("Die vollständige Quellliste."),
"recommended": False,
"type": "List[Any]",
},
{
"path": ["count"],
"pickerLabel": t("Gesamtanzahl"),
"detail": t("Anzahl der Elemente in der Schleife."),
"recommended": False,
"type": "int",
},
]
MERGE_RESULT_DATA_PICK_OPTIONS = [
{
"path": ["merged"],
"pickerLabel": t("Zusammengeführt"),
"detail": t("Zusammengeführtes Ergebnis (je nach Modus)."),
"recommended": True,
"type": "Dict",
},
{
"path": ["first"],
"pickerLabel": t("Erster Zweig"),
"detail": t("Daten vom ersten verbundenen Eingang (Modus „first“)."),
"recommended": False,
"type": "Any",
},
{
"path": ["inputs"],
"pickerLabel": t("Alle Eingänge"),
"detail": t("Dict der Eingabeobjekte nach Port-Index."),
"recommended": False,
"type": "Dict[int,Any]",
},
]
# Ports, die typische Schritt-Ausgaben durchreichen (nicht nur leerer Transit). # Ports, die typische Schritt-Ausgaben durchreichen (nicht nur leerer Transit).
_FLOW_INPUT_SCHEMAS = [ _FLOW_INPUT_SCHEMAS = [
"Transit", "Transit",
@ -119,7 +174,7 @@ FLOW_NODES = [
"Transit", "UdmDocument", "EmailList", "DocumentList", "FileList", "TaskList", "Transit", "UdmDocument", "EmailList", "DocumentList", "FileList", "TaskList",
"ActionResult", "AiResult", "QueryResult", "FormPayload", "ActionResult", "AiResult", "QueryResult", "FormPayload",
]}}, ]}},
"outputPorts": {0: {"schema": "LoopItem"}}, "outputPorts": {0: {"schema": "LoopItem", "dataPickOptions": LOOP_ITEM_DATA_PICK_OPTIONS}},
"executor": "flow", "executor": "flow",
"meta": {"icon": "mdi-repeat", "color": "#FF9800", "usesAi": False}, "meta": {"icon": "mdi-repeat", "color": "#FF9800", "usesAi": False},
}, },
@ -157,7 +212,7 @@ FLOW_NODES = [
0: {"accepts": list(_FLOW_INPUT_SCHEMAS)}, 0: {"accepts": list(_FLOW_INPUT_SCHEMAS)},
1: {"accepts": list(_FLOW_INPUT_SCHEMAS)}, 1: {"accepts": list(_FLOW_INPUT_SCHEMAS)},
}, },
"outputPorts": {0: {"schema": "MergeResult"}}, "outputPorts": {0: {"schema": "MergeResult", "dataPickOptions": MERGE_RESULT_DATA_PICK_OPTIONS}},
"executor": "flow", "executor": "flow",
"meta": {"icon": "mdi-call-merge", "color": "#FF9800", "usesAi": False}, "meta": {"icon": "mdi-call-merge", "color": "#FF9800", "usesAi": False},
}, },

View file

@ -3,6 +3,35 @@
from modules.shared.i18nRegistry import t from modules.shared.i18nRegistry import t
from modules.features.graphicalEditor.nodeDefinitions.ai import DOCUMENT_LIST_DATA_PICK_OPTIONS
BOOL_RESULT_DATA_PICK_OPTIONS = [
{
"path": ["result"],
"pickerLabel": t("Ergebnis"),
"detail": t("Boolesches Ergebnis (z. B. Genehmigung ja/nein)."),
"recommended": True,
"type": "bool",
},
{
"path": ["reason"],
"pickerLabel": t("Begründung"),
"detail": t("Optionale textuelle Begründung."),
"recommended": False,
"type": "str",
},
]
TEXT_RESULT_DATA_PICK_OPTIONS = [
{
"path": ["text"],
"pickerLabel": t("Text"),
"detail": t("Vom Benutzer eingegebener oder gewählter Text."),
"recommended": True,
"type": "str",
},
]
# Canonical form field types — single source of truth. # Canonical form field types — single source of truth.
# portType maps to the PORT_TYPE_CATALOG primitive used by DataPicker / validateGraph. # portType maps to the PORT_TYPE_CATALOG primitive used by DataPicker / validateGraph.
FORM_FIELD_TYPES = [ FORM_FIELD_TYPES = [
@ -55,7 +84,7 @@ INPUT_NODES = [
"inputs": 1, "inputs": 1,
"outputs": 1, "outputs": 1,
"inputPorts": {0: {"accepts": ["Transit"]}}, "inputPorts": {0: {"accepts": ["Transit"]}},
"outputPorts": {0: {"schema": "BoolResult"}}, "outputPorts": {0: {"schema": "BoolResult", "dataPickOptions": BOOL_RESULT_DATA_PICK_OPTIONS}},
"executor": "input", "executor": "input",
"meta": {"icon": "mdi-check-decagram", "color": "#4CAF50", "usesAi": False}, "meta": {"icon": "mdi-check-decagram", "color": "#4CAF50", "usesAi": False},
}, },
@ -78,7 +107,7 @@ INPUT_NODES = [
"inputs": 1, "inputs": 1,
"outputs": 1, "outputs": 1,
"inputPorts": {0: {"accepts": ["Transit"]}}, "inputPorts": {0: {"accepts": ["Transit"]}},
"outputPorts": {0: {"schema": "DocumentList"}}, "outputPorts": {0: {"schema": "DocumentList", "dataPickOptions": DOCUMENT_LIST_DATA_PICK_OPTIONS}},
"executor": "input", "executor": "input",
"meta": {"icon": "mdi-upload", "color": "#2196F3", "usesAi": False}, "meta": {"icon": "mdi-upload", "color": "#2196F3", "usesAi": False},
}, },
@ -96,7 +125,7 @@ INPUT_NODES = [
"inputs": 1, "inputs": 1,
"outputs": 1, "outputs": 1,
"inputPorts": {0: {"accepts": ["Transit"]}}, "inputPorts": {0: {"accepts": ["Transit"]}},
"outputPorts": {0: {"schema": "TextResult"}}, "outputPorts": {0: {"schema": "TextResult", "dataPickOptions": TEXT_RESULT_DATA_PICK_OPTIONS}},
"executor": "input", "executor": "input",
"meta": {"icon": "mdi-comment-text", "color": "#FF9800", "usesAi": False}, "meta": {"icon": "mdi-comment-text", "color": "#FF9800", "usesAi": False},
}, },
@ -115,7 +144,7 @@ INPUT_NODES = [
"inputs": 1, "inputs": 1,
"outputs": 1, "outputs": 1,
"inputPorts": {0: {"accepts": ["Transit"]}}, "inputPorts": {0: {"accepts": ["Transit"]}},
"outputPorts": {0: {"schema": "BoolResult"}}, "outputPorts": {0: {"schema": "BoolResult", "dataPickOptions": BOOL_RESULT_DATA_PICK_OPTIONS}},
"executor": "input", "executor": "input",
"meta": {"icon": "mdi-magnify-scan", "color": "#673AB7", "usesAi": False}, "meta": {"icon": "mdi-magnify-scan", "color": "#673AB7", "usesAi": False},
}, },
@ -133,7 +162,7 @@ INPUT_NODES = [
"inputs": 1, "inputs": 1,
"outputs": 1, "outputs": 1,
"inputPorts": {0: {"accepts": ["Transit"]}}, "inputPorts": {0: {"accepts": ["Transit"]}},
"outputPorts": {0: {"schema": "TextResult"}}, "outputPorts": {0: {"schema": "TextResult", "dataPickOptions": TEXT_RESULT_DATA_PICK_OPTIONS}},
"executor": "input", "executor": "input",
"meta": {"icon": "mdi-format-list-checks", "color": "#009688", "usesAi": False}, "meta": {"icon": "mdi-format-list-checks", "color": "#009688", "usesAi": False},
}, },
@ -153,7 +182,7 @@ INPUT_NODES = [
"inputs": 1, "inputs": 1,
"outputs": 1, "outputs": 1,
"inputPorts": {0: {"accepts": ["Transit"]}}, "inputPorts": {0: {"accepts": ["Transit"]}},
"outputPorts": {0: {"schema": "BoolResult"}}, "outputPorts": {0: {"schema": "BoolResult", "dataPickOptions": BOOL_RESULT_DATA_PICK_OPTIONS}},
"executor": "input", "executor": "input",
"meta": {"icon": "mdi-checkbox-marked-circle", "color": "#8BC34A", "usesAi": False}, "meta": {"icon": "mdi-checkbox-marked-circle", "color": "#8BC34A", "usesAi": False},
}, },

View file

@ -4,6 +4,8 @@
from modules.shared.i18nRegistry import t from modules.shared.i18nRegistry import t
from modules.features.graphicalEditor.nodeDefinitions.ai import ACTION_RESULT_DATA_PICK_OPTIONS
# Typed FeatureInstance binding (replaces legacy `string, hidden`). # Typed FeatureInstance binding (replaces legacy `string, hidden`).
# - type FeatureInstanceRef[redmine] is filtered by the DataPicker. # - type FeatureInstanceRef[redmine] is filtered by the DataPicker.
# - frontendType "featureInstance" is rendered by FeatureInstancePicker which # - frontendType "featureInstance" is rendered by FeatureInstancePicker which
@ -31,7 +33,7 @@ REDMINE_NODES = [
"inputs": 1, "inputs": 1,
"outputs": 1, "outputs": 1,
"inputPorts": {0: {"accepts": ["Transit"]}}, "inputPorts": {0: {"accepts": ["Transit"]}},
"outputPorts": {0: {"schema": "ActionResult"}}, "outputPorts": {0: {"schema": "ActionResult", "dataPickOptions": ACTION_RESULT_DATA_PICK_OPTIONS}},
"meta": {"icon": "mdi-ticket-outline", "color": "#4A6FA5", "usesAi": False}, "meta": {"icon": "mdi-ticket-outline", "color": "#4A6FA5", "usesAi": False},
"_method": "redmine", "_method": "redmine",
"_action": "readTicket", "_action": "readTicket",
@ -59,7 +61,7 @@ REDMINE_NODES = [
"inputs": 1, "inputs": 1,
"outputs": 1, "outputs": 1,
"inputPorts": {0: {"accepts": ["Transit"]}}, "inputPorts": {0: {"accepts": ["Transit"]}},
"outputPorts": {0: {"schema": "ActionResult"}}, "outputPorts": {0: {"schema": "ActionResult", "dataPickOptions": ACTION_RESULT_DATA_PICK_OPTIONS}},
"meta": {"icon": "mdi-format-list-bulleted", "color": "#4A6FA5", "usesAi": False}, "meta": {"icon": "mdi-format-list-bulleted", "color": "#4A6FA5", "usesAi": False},
"_method": "redmine", "_method": "redmine",
"_action": "listTickets", "_action": "listTickets",
@ -91,7 +93,7 @@ REDMINE_NODES = [
"inputs": 1, "inputs": 1,
"outputs": 1, "outputs": 1,
"inputPorts": {0: {"accepts": ["Transit"]}}, "inputPorts": {0: {"accepts": ["Transit"]}},
"outputPorts": {0: {"schema": "ActionResult"}}, "outputPorts": {0: {"schema": "ActionResult", "dataPickOptions": ACTION_RESULT_DATA_PICK_OPTIONS}},
"meta": {"icon": "mdi-ticket-plus-outline", "color": "#4A6FA5", "usesAi": False}, "meta": {"icon": "mdi-ticket-plus-outline", "color": "#4A6FA5", "usesAi": False},
"_method": "redmine", "_method": "redmine",
"_action": "createTicket", "_action": "createTicket",
@ -127,7 +129,7 @@ REDMINE_NODES = [
"inputs": 1, "inputs": 1,
"outputs": 1, "outputs": 1,
"inputPorts": {0: {"accepts": ["Transit"]}}, "inputPorts": {0: {"accepts": ["Transit"]}},
"outputPorts": {0: {"schema": "ActionResult"}}, "outputPorts": {0: {"schema": "ActionResult", "dataPickOptions": ACTION_RESULT_DATA_PICK_OPTIONS}},
"meta": {"icon": "mdi-ticket-confirmation-outline", "color": "#4A6FA5", "usesAi": False}, "meta": {"icon": "mdi-ticket-confirmation-outline", "color": "#4A6FA5", "usesAi": False},
"_method": "redmine", "_method": "redmine",
"_action": "updateTicket", "_action": "updateTicket",
@ -151,7 +153,7 @@ REDMINE_NODES = [
"inputs": 1, "inputs": 1,
"outputs": 1, "outputs": 1,
"inputPorts": {0: {"accepts": ["Transit"]}}, "inputPorts": {0: {"accepts": ["Transit"]}},
"outputPorts": {0: {"schema": "ActionResult"}}, "outputPorts": {0: {"schema": "ActionResult", "dataPickOptions": ACTION_RESULT_DATA_PICK_OPTIONS}},
"meta": {"icon": "mdi-chart-bar", "color": "#4A6FA5", "usesAi": False}, "meta": {"icon": "mdi-chart-bar", "color": "#4A6FA5", "usesAi": False},
"_method": "redmine", "_method": "redmine",
"_action": "getStats", "_action": "getStats",
@ -169,7 +171,7 @@ REDMINE_NODES = [
"inputs": 1, "inputs": 1,
"outputs": 1, "outputs": 1,
"inputPorts": {0: {"accepts": ["Transit"]}}, "inputPorts": {0: {"accepts": ["Transit"]}},
"outputPorts": {0: {"schema": "ActionResult"}}, "outputPorts": {0: {"schema": "ActionResult", "dataPickOptions": ACTION_RESULT_DATA_PICK_OPTIONS}},
"meta": {"icon": "mdi-database-sync", "color": "#4A6FA5", "usesAi": False}, "meta": {"icon": "mdi-database-sync", "color": "#4A6FA5", "usesAi": False},
"_method": "redmine", "_method": "redmine",
"_action": "runSync", "_action": "runSync",

View file

@ -3,6 +3,35 @@
from modules.shared.i18nRegistry import t from modules.shared.i18nRegistry import t
from modules.features.graphicalEditor.nodeDefinitions.ai import (
ACTION_RESULT_DATA_PICK_OPTIONS,
DOCUMENT_LIST_DATA_PICK_OPTIONS,
)
FILE_LIST_DATA_PICK_OPTIONS = [
{
"path": ["files"],
"pickerLabel": t("Alle Dateien"),
"detail": t("Die vollständige Dateiliste."),
"recommended": True,
"type": "List[FileItem]",
},
{
"path": ["files", 0],
"pickerLabel": t("Erste Datei"),
"detail": t("Das erste Listenelement."),
"recommended": False,
"type": "FileItem",
},
{
"path": ["count"],
"pickerLabel": t("Anzahl"),
"detail": t("Anzahl der Dateien."),
"recommended": False,
"type": "int",
},
]
SHAREPOINT_NODES = [ SHAREPOINT_NODES = [
{ {
"id": "sharepoint.findFile", "id": "sharepoint.findFile",
@ -23,7 +52,7 @@ SHAREPOINT_NODES = [
"inputs": 1, "inputs": 1,
"outputs": 1, "outputs": 1,
"inputPorts": {0: {"accepts": ["Transit"]}}, "inputPorts": {0: {"accepts": ["Transit"]}},
"outputPorts": {0: {"schema": "FileList"}}, "outputPorts": {0: {"schema": "FileList", "dataPickOptions": FILE_LIST_DATA_PICK_OPTIONS}},
"meta": {"icon": "mdi-file-search", "color": "#0078D4", "usesAi": False}, "meta": {"icon": "mdi-file-search", "color": "#0078D4", "usesAi": False},
"_method": "sharepoint", "_method": "sharepoint",
"_action": "findDocumentPath", "_action": "findDocumentPath",
@ -44,7 +73,7 @@ SHAREPOINT_NODES = [
"inputs": 1, "inputs": 1,
"outputs": 1, "outputs": 1,
"inputPorts": {0: {"accepts": ["FileList", "Transit", "LoopItem"]}}, "inputPorts": {0: {"accepts": ["FileList", "Transit", "LoopItem"]}},
"outputPorts": {0: {"schema": "DocumentList"}}, "outputPorts": {0: {"schema": "DocumentList", "dataPickOptions": DOCUMENT_LIST_DATA_PICK_OPTIONS}},
"meta": {"icon": "mdi-file-document", "color": "#0078D4", "usesAi": False}, "meta": {"icon": "mdi-file-document", "color": "#0078D4", "usesAi": False},
"_method": "sharepoint", "_method": "sharepoint",
"_action": "readDocuments", "_action": "readDocuments",
@ -67,7 +96,7 @@ SHAREPOINT_NODES = [
"inputs": 1, "inputs": 1,
"outputs": 1, "outputs": 1,
"inputPorts": {0: {"accepts": ["DocumentList", "Transit"]}}, "inputPorts": {0: {"accepts": ["DocumentList", "Transit"]}},
"outputPorts": {0: {"schema": "ActionResult"}}, "outputPorts": {0: {"schema": "ActionResult", "dataPickOptions": ACTION_RESULT_DATA_PICK_OPTIONS}},
"meta": {"icon": "mdi-upload", "color": "#0078D4", "usesAi": False}, "meta": {"icon": "mdi-upload", "color": "#0078D4", "usesAi": False},
"_method": "sharepoint", "_method": "sharepoint",
"_action": "uploadFile", "_action": "uploadFile",
@ -88,7 +117,7 @@ SHAREPOINT_NODES = [
"inputs": 1, "inputs": 1,
"outputs": 1, "outputs": 1,
"inputPorts": {0: {"accepts": ["Transit"]}}, "inputPorts": {0: {"accepts": ["Transit"]}},
"outputPorts": {0: {"schema": "FileList"}}, "outputPorts": {0: {"schema": "FileList", "dataPickOptions": FILE_LIST_DATA_PICK_OPTIONS}},
"meta": {"icon": "mdi-folder-open", "color": "#0078D4", "usesAi": False}, "meta": {"icon": "mdi-folder-open", "color": "#0078D4", "usesAi": False},
"_method": "sharepoint", "_method": "sharepoint",
"_action": "listDocuments", "_action": "listDocuments",
@ -109,7 +138,7 @@ SHAREPOINT_NODES = [
"inputs": 1, "inputs": 1,
"outputs": 1, "outputs": 1,
"inputPorts": {0: {"accepts": ["FileList", "Transit", "LoopItem"]}}, "inputPorts": {0: {"accepts": ["FileList", "Transit", "LoopItem"]}},
"outputPorts": {0: {"schema": "DocumentList"}}, "outputPorts": {0: {"schema": "DocumentList", "dataPickOptions": DOCUMENT_LIST_DATA_PICK_OPTIONS}},
"meta": {"icon": "mdi-download", "color": "#0078D4", "usesAi": False}, "meta": {"icon": "mdi-download", "color": "#0078D4", "usesAi": False},
"_method": "sharepoint", "_method": "sharepoint",
"_action": "downloadFileByPath", "_action": "downloadFileByPath",
@ -133,7 +162,7 @@ SHAREPOINT_NODES = [
"inputs": 1, "inputs": 1,
"outputs": 1, "outputs": 1,
"inputPorts": {0: {"accepts": ["Transit"]}}, "inputPorts": {0: {"accepts": ["Transit"]}},
"outputPorts": {0: {"schema": "ActionResult"}}, "outputPorts": {0: {"schema": "ActionResult", "dataPickOptions": ACTION_RESULT_DATA_PICK_OPTIONS}},
"meta": {"icon": "mdi-content-copy", "color": "#0078D4", "usesAi": False}, "meta": {"icon": "mdi-content-copy", "color": "#0078D4", "usesAi": False},
"_method": "sharepoint", "_method": "sharepoint",
"_action": "copyFile", "_action": "copyFile",

View file

@ -3,6 +3,8 @@
from modules.shared.i18nRegistry import t from modules.shared.i18nRegistry import t
from modules.features.graphicalEditor.nodeDefinitions.ai import ACTION_RESULT_DATA_PICK_OPTIONS
TRIGGER_NODES = [ TRIGGER_NODES = [
{ {
"id": "trigger.manual", "id": "trigger.manual",
@ -13,7 +15,7 @@ TRIGGER_NODES = [
"inputs": 0, "inputs": 0,
"outputs": 1, "outputs": 1,
"inputPorts": {}, "inputPorts": {},
"outputPorts": {0: {"schema": "ActionResult"}}, "outputPorts": {0: {"schema": "ActionResult", "dataPickOptions": ACTION_RESULT_DATA_PICK_OPTIONS}},
"executor": "trigger", "executor": "trigger",
"meta": {"icon": "mdi-play", "color": "#4CAF50", "usesAi": False}, "meta": {"icon": "mdi-play", "color": "#4CAF50", "usesAi": False},
}, },
@ -55,7 +57,7 @@ TRIGGER_NODES = [
"inputs": 0, "inputs": 0,
"outputs": 1, "outputs": 1,
"inputPorts": {}, "inputPorts": {},
"outputPorts": {0: {"schema": "ActionResult"}}, "outputPorts": {0: {"schema": "ActionResult", "dataPickOptions": ACTION_RESULT_DATA_PICK_OPTIONS}},
"executor": "trigger", "executor": "trigger",
"meta": {"icon": "mdi-clock", "color": "#2196F3", "usesAi": False}, "meta": {"icon": "mdi-clock", "color": "#2196F3", "usesAi": False},
}, },

View file

@ -3,6 +3,8 @@
from modules.shared.i18nRegistry import t from modules.shared.i18nRegistry import t
from modules.features.graphicalEditor.nodeDefinitions.ai import ACTION_RESULT_DATA_PICK_OPTIONS
# Typed FeatureInstance binding (replaces legacy `string, hidden`). # Typed FeatureInstance binding (replaces legacy `string, hidden`).
# - type uses the discriminator notation `FeatureInstanceRef[<code>]` so the # - type uses the discriminator notation `FeatureInstanceRef[<code>]` so the
# DataPicker / RequiredAttributePicker can filter compatible upstream paths. # DataPicker / RequiredAttributePicker can filter compatible upstream paths.
@ -35,7 +37,7 @@ TRUSTEE_NODES = [
"inputs": 1, "inputs": 1,
"outputs": 1, "outputs": 1,
"inputPorts": {0: {"accepts": ["Transit"]}}, "inputPorts": {0: {"accepts": ["Transit"]}},
"outputPorts": {0: {"schema": "ActionResult"}}, "outputPorts": {0: {"schema": "ActionResult", "dataPickOptions": ACTION_RESULT_DATA_PICK_OPTIONS}},
"meta": {"icon": "mdi-database-refresh", "color": "#4CAF50", "usesAi": False}, "meta": {"icon": "mdi-database-refresh", "color": "#4CAF50", "usesAi": False},
"_method": "trustee", "_method": "trustee",
"_action": "refreshAccountingData", "_action": "refreshAccountingData",
@ -62,7 +64,7 @@ TRUSTEE_NODES = [
# Runtime returns ActionResult.isSuccess(documents=[...]) — see # Runtime returns ActionResult.isSuccess(documents=[...]) — see
# actions/extractFromFiles.py. Declaring DocumentList here was adapter # actions/extractFromFiles.py. Declaring DocumentList here was adapter
# drift and broke the DataPicker for downstream nodes. # drift and broke the DataPicker for downstream nodes.
"outputPorts": {0: {"schema": "ActionResult"}}, "outputPorts": {0: {"schema": "ActionResult", "dataPickOptions": ACTION_RESULT_DATA_PICK_OPTIONS}},
"meta": {"icon": "mdi-file-document-scan", "color": "#4CAF50", "usesAi": True}, "meta": {"icon": "mdi-file-document-scan", "color": "#4CAF50", "usesAi": True},
"_method": "trustee", "_method": "trustee",
"_action": "extractFromFiles", "_action": "extractFromFiles",
@ -84,7 +86,7 @@ TRUSTEE_NODES = [
"inputs": 1, "inputs": 1,
"outputs": 1, "outputs": 1,
"inputPorts": {0: {"accepts": ["ActionResult", "DocumentList", "Transit"]}}, "inputPorts": {0: {"accepts": ["ActionResult", "DocumentList", "Transit"]}},
"outputPorts": {0: {"schema": "ActionResult"}}, "outputPorts": {0: {"schema": "ActionResult", "dataPickOptions": ACTION_RESULT_DATA_PICK_OPTIONS}},
"meta": {"icon": "mdi-file-document-check", "color": "#4CAF50", "usesAi": False}, "meta": {"icon": "mdi-file-document-check", "color": "#4CAF50", "usesAi": False},
"_method": "trustee", "_method": "trustee",
"_action": "processDocuments", "_action": "processDocuments",
@ -103,7 +105,7 @@ TRUSTEE_NODES = [
"inputs": 1, "inputs": 1,
"outputs": 1, "outputs": 1,
"inputPorts": {0: {"accepts": ["ActionResult", "DocumentList", "Transit"]}}, "inputPorts": {0: {"accepts": ["ActionResult", "DocumentList", "Transit"]}},
"outputPorts": {0: {"schema": "ActionResult"}}, "outputPorts": {0: {"schema": "ActionResult", "dataPickOptions": ACTION_RESULT_DATA_PICK_OPTIONS}},
"meta": {"icon": "mdi-calculator", "color": "#4CAF50", "usesAi": False}, "meta": {"icon": "mdi-calculator", "color": "#4CAF50", "usesAi": False},
"_method": "trustee", "_method": "trustee",
"_action": "syncToAccounting", "_action": "syncToAccounting",
@ -140,7 +142,7 @@ TRUSTEE_NODES = [
"inputs": 1, "inputs": 1,
"outputs": 1, "outputs": 1,
"inputPorts": {0: {"accepts": ["Transit", "AiResult", "ConsolidateResult", "UdmDocument"]}}, "inputPorts": {0: {"accepts": ["Transit", "AiResult", "ConsolidateResult", "UdmDocument"]}},
"outputPorts": {0: {"schema": "ActionResult"}}, "outputPorts": {0: {"schema": "ActionResult", "dataPickOptions": ACTION_RESULT_DATA_PICK_OPTIONS}},
"meta": {"icon": "mdi-database-search", "color": "#4CAF50", "usesAi": False}, "meta": {"icon": "mdi-database-search", "color": "#4CAF50", "usesAi": False},
"_method": "trustee", "_method": "trustee",
"_action": "queryData", "_action": "queryData",

View file

@ -82,6 +82,34 @@ def _localizeNode(node: Dict[str, Any], language: str) -> Dict[str, Any]:
pc["description"] = resolveText(pd, lang) pc["description"] = resolveText(pd, lang)
params.append(pc) params.append(pc)
out["parameters"] = params out["parameters"] = params
out_ports: Dict[Any, Dict[str, Any]] = {}
for idx, po in (node.get("outputPorts") or {}).items():
if not isinstance(po, dict):
continue
port_copy = dict(po)
opts = port_copy.get("dataPickOptions")
if isinstance(opts, list):
loc_opts: List[Dict[str, Any]] = []
for o in opts:
if not isinstance(o, dict):
continue
oc = dict(o)
pl = oc.get("pickerLabel")
if pl is not None:
oc["pickerLabel"] = resolveText(pl, lang)
dt = oc.get("detail")
if dt is not None:
oc["detail"] = resolveText(dt, lang)
loc_opts.append(oc)
port_copy["dataPickOptions"] = loc_opts
out_ports[idx] = port_copy
if isinstance(node.get("outputPorts"), dict):
out["outputPorts"] = out_ports
# Legacy node-level key no longer used — do not expose.
out.pop("outputPickHints", None)
return out return out
@ -112,7 +140,7 @@ def getNodeTypesForApi(
for name, schema in PORT_TYPE_CATALOG.items(): for name, schema in PORT_TYPE_CATALOG.items():
catalogSerialized[name] = { catalogSerialized[name] = {
"name": schema.name, "name": schema.name,
"fields": [f.model_dump() for f in schema.fields], "fields": [f.model_dump(by_alias=True, exclude_none=True) for f in schema.fields],
} }
return { return {

View file

@ -13,9 +13,9 @@ import time
import uuid import uuid
from typing import Any, Dict, List, Optional from typing import Any, Dict, List, Optional
from pydantic import BaseModel, Field from pydantic import BaseModel, ConfigDict, Field
from modules.shared.i18nRegistry import resolveText from modules.shared.i18nRegistry import resolveText, t
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -25,6 +25,8 @@ logger = logging.getLogger(__name__)
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
class PortField(BaseModel): class PortField(BaseModel):
model_config = ConfigDict(populate_by_name=True)
name: str name: str
type: str # str, int, bool, List[str], List[Document], Dict[str,Any], ConnectionRef, … type: str # str, int, bool, List[str], List[Document], Dict[str,Any], ConnectionRef, …
description: str = "" description: str = ""
@ -36,11 +38,19 @@ class PortField(BaseModel):
discriminator: bool = False discriminator: bool = False
# Surfaces this field at the top of the DataPicker list as the most common pick. # Surfaces this field at the top of the DataPicker list as the most common pick.
recommended: bool = False recommended: bool = False
# Human DataPicker title (camelCase JSON for frontend). Omit for technical paths-only.
picker_label: Optional[str] = Field(default=None, serialization_alias="pickerLabel")
# For List[T] fields: segment between parent and inner field (iteration / one list item).
picker_item_label: Optional[str] = Field(default=None, serialization_alias="pickerItemLabel")
class PortSchema(BaseModel): class PortSchema(BaseModel):
name: str # e.g. "EmailDraft", "AiResult", "Transit" name: str # e.g. "EmailDraft", "AiResult", "Transit"
fields: List[PortField] fields: List[PortField]
# Declarative flag for the engine: when True, the executor attaches
# connection provenance ({id, authority, label}) onto the output. Replaces
# hard-coded schema lists in actionNodeExecutor._attachConnectionProvenance.
carriesConnectionProvenance: bool = False
class InputPortDef(BaseModel): class InputPortDef(BaseModel):
@ -153,7 +163,7 @@ PORT_TYPE_CATALOG: Dict[str, PortSchema] = {
PortField(name="text", type="str", required=False, description="Textinhalt"), PortField(name="text", type="str", required=False, description="Textinhalt"),
PortField(name="children", type="List[Any]", required=False, description="Unterblöcke"), PortField(name="children", type="List[Any]", required=False, description="Unterblöcke"),
]), ]),
"DocumentList": PortSchema(name="DocumentList", fields=[ "DocumentList": PortSchema(name="DocumentList", carriesConnectionProvenance=True, fields=[
PortField(name="documents", type="List[Document]", PortField(name="documents", type="List[Document]",
description="Dokumente aus vorherigen Schritten", recommended=True), description="Dokumente aus vorherigen Schritten", recommended=True),
PortField(name="connection", type="ConnectionRef", required=False, PortField(name="connection", type="ConnectionRef", required=False,
@ -163,7 +173,7 @@ PORT_TYPE_CATALOG: Dict[str, PortSchema] = {
PortField(name="count", type="int", required=False, PortField(name="count", type="int", required=False,
description="Anzahl Dokumente"), description="Anzahl Dokumente"),
]), ]),
"FileList": PortSchema(name="FileList", fields=[ "FileList": PortSchema(name="FileList", carriesConnectionProvenance=True, fields=[
PortField(name="files", type="List[FileItem]", PortField(name="files", type="List[FileItem]",
description="Dateiliste"), description="Dateiliste"),
PortField(name="connection", type="ConnectionRef", required=False, PortField(name="connection", type="ConnectionRef", required=False,
@ -173,7 +183,7 @@ PORT_TYPE_CATALOG: Dict[str, PortSchema] = {
PortField(name="count", type="int", required=False, PortField(name="count", type="int", required=False,
description="Anzahl Dateien"), description="Anzahl Dateien"),
]), ]),
"EmailDraft": PortSchema(name="EmailDraft", fields=[ "EmailDraft": PortSchema(name="EmailDraft", carriesConnectionProvenance=True, fields=[
PortField(name="subject", type="str", PortField(name="subject", type="str",
description="Betreff"), description="Betreff"),
PortField(name="body", type="str", PortField(name="body", type="str",
@ -187,7 +197,7 @@ PORT_TYPE_CATALOG: Dict[str, PortSchema] = {
PortField(name="connection", type="ConnectionRef", required=False, PortField(name="connection", type="ConnectionRef", required=False,
description="Outlook-/Graph-Verbindung"), description="Outlook-/Graph-Verbindung"),
]), ]),
"EmailList": PortSchema(name="EmailList", fields=[ "EmailList": PortSchema(name="EmailList", carriesConnectionProvenance=True, fields=[
PortField(name="emails", type="List[EmailItem]", PortField(name="emails", type="List[EmailItem]",
description="E-Mails"), description="E-Mails"),
PortField(name="connection", type="ConnectionRef", required=False, PortField(name="connection", type="ConnectionRef", required=False,
@ -195,7 +205,7 @@ PORT_TYPE_CATALOG: Dict[str, PortSchema] = {
PortField(name="count", type="int", required=False, PortField(name="count", type="int", required=False,
description="Anzahl"), description="Anzahl"),
]), ]),
"TaskList": PortSchema(name="TaskList", fields=[ "TaskList": PortSchema(name="TaskList", carriesConnectionProvenance=True, fields=[
PortField(name="tasks", type="List[TaskItem]", PortField(name="tasks", type="List[TaskItem]",
description="Aufgaben"), description="Aufgaben"),
PortField(name="connection", type="ConnectionRef", required=False, PortField(name="connection", type="ConnectionRef", required=False,
@ -219,15 +229,29 @@ PORT_TYPE_CATALOG: Dict[str, PortSchema] = {
]), ]),
"AiResult": PortSchema(name="AiResult", fields=[ "AiResult": PortSchema(name="AiResult", fields=[
PortField(name="prompt", type="str", PortField(name="prompt", type="str",
description="Prompt"), description="Prompt",
picker_label=t("Eingabe (Prompt des Schritts)"),
),
PortField(name="response", type="str", PortField(name="response", type="str",
description="Antworttext", recommended=True), description=(
"Antworttext (Modell-Fließtext o. ä.; Bilder liegen in documents, nicht hier)."
),
recommended=True,
picker_label=t("Ausgabetext (Modell)"),
),
PortField(name="responseData", type="Dict", required=False, PortField(name="responseData", type="Dict", required=False,
description="Strukturierte Antwort (nur bei JSON-Ausgabe)"), description="Strukturierte Antwort (nur bei JSON-Ausgabe)",
picker_label=t("Strukturierte Antwortdaten")),
PortField(name="context", type="str", PortField(name="context", type="str",
description="Kontext"), description="Kontext",
picker_label=t("Eingabe-Kontext")),
PortField(name="documents", type="List[Document]", PortField(name="documents", type="List[Document]",
description="Dokumente"), description=(
"Erzeugte oder mitgegebene Dateien (z. B. Bilder); documentData = Nutzlast pro Eintrag."
),
picker_label=t("Alle Ausgabe-Dateien (Liste)"),
picker_item_label=t("je Datei"),
),
]), ]),
"BoolResult": PortSchema(name="BoolResult", fields=[ "BoolResult": PortSchema(name="BoolResult", fields=[
PortField(name="result", type="bool", PortField(name="result", type="bool",
@ -237,7 +261,8 @@ PORT_TYPE_CATALOG: Dict[str, PortSchema] = {
]), ]),
"TextResult": PortSchema(name="TextResult", fields=[ "TextResult": PortSchema(name="TextResult", fields=[
PortField(name="text", type="str", PortField(name="text", type="str",
description="Text"), description="Text",
picker_label=t("Text (Schrittausgabe)")),
]), ]),
"LoopItem": PortSchema(name="LoopItem", fields=[ "LoopItem": PortSchema(name="LoopItem", fields=[
PortField(name="currentItem", type="Any", PortField(name="currentItem", type="Any",
@ -265,11 +290,15 @@ PORT_TYPE_CATALOG: Dict[str, PortSchema] = {
]), ]),
"ActionDocument": PortSchema(name="ActionDocument", fields=[ "ActionDocument": PortSchema(name="ActionDocument", fields=[
PortField(name="documentName", type="str", PortField(name="documentName", type="str",
description="Dokumentname"), description="Dokumentname",
picker_label=t("Dateiname")),
PortField(name="documentData", type="Any", PortField(name="documentData", type="Any",
description="Inhalt / Rohdaten (z.B. JSON-String, Bytes)"), description="Inhalt / Rohdaten (z.B. JSON-String, Bytes)",
picker_label=t("Dateiinhalt (JSON, Text oder Bild)"),
recommended=True),
PortField(name="mimeType", type="str", PortField(name="mimeType", type="str",
description="MIME-Typ"), description="MIME-Typ",
picker_label=t("Dateityp (MIME)")),
PortField(name="fileId", type="str", required=False, PortField(name="fileId", type="str", required=False,
description="Persistierte FileItem.id (vom Engine ergänzt)"), description="Persistierte FileItem.id (vom Engine ergänzt)"),
PortField(name="fileName", type="str", required=False, PortField(name="fileName", type="str", required=False,
@ -285,12 +314,40 @@ PORT_TYPE_CATALOG: Dict[str, PortSchema] = {
# Without it in the catalog the DataPicker cannot offer downstream # Without it in the catalog the DataPicker cannot offer downstream
# bindings like `processDocuments → documents → *` for syncToAccounting. # bindings like `processDocuments → documents → *` for syncToAccounting.
PortField(name="documents", type="List[ActionDocument]", required=False, PortField(name="documents", type="List[ActionDocument]", required=False,
description="Erzeugte Dokumente (immer befüllt für Trustee/AI/Email/...)"), description=(
"Dokumentliste: Index 0 oft JSON-Handover oder Hauptdatei; Einträge mit "
"MIME image/* oder Namen extract_media_* sind ausgelagerte Bilder (documentData = Binär)."
),
picker_label=t("Alle Ausgabe-Dokumente"),
picker_item_label=t("je Dokument"),
),
PortField(name="data", type="Dict", required=False, PortField(name="data", type="Dict", required=False,
description="Ergebnisdaten"), description="Ergebnisdaten",
picker_label=t("Technische Detaildaten (data)")),
# Mirror AiResult primary text fields so DataPicker / primaryTextRef behave the same
PortField(name="prompt", type="str", required=False,
description="Optional: auslösender Prompt / Schrittname",
picker_label=t("Auslöser / Prompt (falls vorhanden)")),
PortField(name="response", type="str", required=False,
description=(
"Primär nur Fließtext (z. B. nach Extraktion: alle Text-Parts verkettet, keine Bilder)."
),
recommended=True,
picker_label=t("Nur Fließtext (gesamt)")),
PortField(name="context", type="str", required=False,
description="Optional: Eingabe-Kontext",
picker_label=t("Mitgegebener Kontext")),
PortField(name="imageDocumentsOnly", type="List[ActionDocument]", required=False,
description=(
"Nur Bildausgaben (ohne JSON-Handover), z. B. von context.extractContent."
),
picker_label=t("Nur Bilder (Liste)")),
PortField(name="responseData", type="Dict", required=False,
description="Optional: strukturierte Zusatzdaten",
picker_label=t("Strukturierte Zusatzdaten")),
]), ]),
"Transit": PortSchema(name="Transit", fields=[]), "Transit": PortSchema(name="Transit", fields=[]),
"UdmDocument": PortSchema(name="UdmDocument", fields=[ "UdmDocument": PortSchema(name="UdmDocument", carriesConnectionProvenance=True, fields=[
PortField(name="id", type="str", description="Dokument-ID"), PortField(name="id", type="str", description="Dokument-ID"),
PortField(name="sourceType", type="str", description="Quellformat (pdf, docx, …)"), PortField(name="sourceType", type="str", description="Quellformat (pdf, docx, …)"),
PortField(name="sourcePath", type="str", description="Quellpfad"), PortField(name="sourcePath", type="str", description="Quellpfad"),
@ -622,6 +679,7 @@ SYSTEM_VARIABLES: Dict[str, Dict[str, str]] = {
PRIMARY_TEXT_HANDOVER_REF_PATH: Dict[str, List[Any]] = { PRIMARY_TEXT_HANDOVER_REF_PATH: Dict[str, List[Any]] = {
"AiResult": ["response"], "AiResult": ["response"],
"ActionResult": ["response"],
"TextResult": ["text"], "TextResult": ["text"],
"ConsolidateResult": ["result"], "ConsolidateResult": ["result"],
} }

View file

@ -36,6 +36,31 @@ def _paths_for_port_schema(schema: PortSchema, producer_node_id: str) -> List[Di
return out return out
def _paths_for_data_pick_options(
options: List[Dict[str, Any]],
producer_node_id: str,
) -> List[Dict[str, Any]]:
"""Explicit per-port pick list from node definition (authoritative; no catalog expansion)."""
out: List[Dict[str, Any]] = []
for o in options:
if not isinstance(o, dict):
continue
path = o.get("path")
if not isinstance(path, list):
continue
label = o.get("pickerLabel")
out.append(
{
"producerNodeId": producer_node_id,
"path": path,
"type": o.get("type") or "Any",
"label": label if isinstance(label, str) else ".".join(str(p) for p in path),
"scopeOrigin": "data",
}
)
return out
def _paths_for_schema(schema_name: str, producer_node_id: str) -> List[Dict[str, Any]]: def _paths_for_schema(schema_name: str, producer_node_id: str) -> List[Dict[str, Any]]:
if not schema_name or schema_name == "Transit": if not schema_name or schema_name == "Transit":
return [] return []
@ -83,7 +108,16 @@ def compute_upstream_paths(graph: Dict[str, Any], target_node_id: str) -> List[D
if not ndef: if not ndef:
continue continue
out0 = (ndef.get("outputPorts") or {}).get(0, {}) out0 = (ndef.get("outputPorts") or {}).get(0, {})
derived = parse_graph_defined_output_schema(anode, out0 if isinstance(out0, dict) else {}) out0 = out0 if isinstance(out0, dict) else {}
dpo = out0.get("dataPickOptions")
if isinstance(dpo, list) and len(dpo) > 0:
plab = (anode.get("title") or "").strip() or aid
for entry in _paths_for_data_pick_options(dpo, aid):
entry["producerLabel"] = plab
paths.append(entry)
continue
derived = parse_graph_defined_output_schema(anode, out0)
if derived: if derived:
for entry in _paths_for_port_schema(derived, aid): for entry in _paths_for_port_schema(derived, aid):
entry["producerLabel"] = (anode.get("title") or "").strip() or aid entry["producerLabel"] = (anode.get("title") or "").strip() or aid

View file

@ -70,6 +70,14 @@ def _coerce_document_data_to_bytes(raw: Any) -> Optional[bytes]:
return None return None
def _image_documents_from_docs_list(docs_list: list) -> list:
"""All image/* ActionDocument dicts (generic — no assumptions about index 0)."""
return [
d for d in (docs_list or [])
if isinstance(d, dict) and str(d.get("mimeType") or "").strip().lower().startswith("image/")
]
_USER_CONNECTION_ID_RE = re.compile( _USER_CONNECTION_ID_RE = re.compile(
r"^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$", r"^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$",
re.IGNORECASE, re.IGNORECASE,
@ -206,6 +214,13 @@ def _buildConnectionRefDict(connRef: str, chatService, services) -> Optional[Dic
return {"id": conn_id, "authority": authority, "label": label or f"{authority}:{user}"} return {"id": conn_id, "authority": authority, "label": label or f"{authority}:{user}"}
def _schemaCarriesConnectionProvenance(outputSchema: str) -> bool:
"""True iff the port schema declares ``carriesConnectionProvenance`` in the catalog."""
from modules.features.graphicalEditor.portTypes import PORT_TYPE_CATALOG
schema = PORT_TYPE_CATALOG.get(outputSchema)
return bool(getattr(schema, "carriesConnectionProvenance", False))
def _attachConnectionProvenance( def _attachConnectionProvenance(
out: Dict[str, Any], out: Dict[str, Any],
resolvedParams: Dict[str, Any], resolvedParams: Dict[str, Any],
@ -219,7 +234,7 @@ def _attachConnectionProvenance(
cref = resolvedParams.get("connectionReference") cref = resolvedParams.get("connectionReference")
if not cref: if not cref:
return return
if outputSchema not in ("FileList", "DocumentList", "EmailList", "TaskList", "EmailDraft", "UdmDocument"): if not _schemaCarriesConnectionProvenance(outputSchema):
return return
payload = _buildConnectionRefDict(str(cref), chatService, services) payload = _buildConnectionRefDict(str(cref), chatService, services)
if payload: if payload:
@ -235,8 +250,7 @@ def _resolveConnectionParam(params: Dict, chatService, services) -> None:
params["connectionReference"] = resolved params["connectionReference"] = resolved
def _applyEmailCheckFilter(params: Dict) -> None: def _mapper_emailCheckFilter(params: Dict, **_) -> None:
"""Build filter from discrete email params for email.checkEmail."""
built = _buildEmailFilter( built = _buildEmailFilter(
fromAddress=params.get("fromAddress"), fromAddress=params.get("fromAddress"),
subjectContains=params.get("subjectContains"), subjectContains=params.get("subjectContains"),
@ -248,8 +262,7 @@ def _applyEmailCheckFilter(params: Dict) -> None:
params.pop(k, None) params.pop(k, None)
def _applyEmailSearchQuery(params: Dict) -> None: def _mapper_emailSearchQuery(params: Dict, **_) -> None:
"""Build query from discrete email params for email.searchEmail."""
built = _buildSearchQuery( built = _buildSearchQuery(
query=params.get("query"), query=params.get("query"),
fromAddress=params.get("fromAddress"), fromAddress=params.get("fromAddress"),
@ -264,6 +277,56 @@ def _applyEmailSearchQuery(params: Dict) -> None:
params.pop(k, None) params.pop(k, None)
def _mapper_aiPromptLegacyAlias(params: Dict, **_) -> None:
"""Backwards-compatible alias: legacy ``prompt`` parameter is exposed as ``aiPrompt``."""
if "aiPrompt" not in params and "prompt" in params:
params["aiPrompt"] = params.pop("prompt")
def _mapper_emailDraftContextFromSubjectBody(params: Dict, **_) -> None:
"""Build ``context`` from discrete subject + body fields and drop them."""
subject = params.get("subject", "")
body = params.get("body", "")
if not (subject or body):
return
parts = []
if subject:
parts.append(f"Subject: {subject}")
if body:
parts.append(f"Body:\n{body}")
params["context"] = "\n\n".join(parts)
params.pop("subject", None)
params.pop("body", None)
def _mapper_clickupTaskUpdateMerge(params: Dict, **_) -> None:
from modules.workflows.automation2.clickupTaskUpdateMerge import merge_clickup_task_update_entries
merge_clickup_task_update_entries(params)
_PARAM_MAPPERS: Dict[str, Any] = {
"emailCheckFilter": _mapper_emailCheckFilter,
"emailSearchQuery": _mapper_emailSearchQuery,
"aiPromptLegacyAlias": _mapper_aiPromptLegacyAlias,
"emailDraftContextFromSubjectBody": _mapper_emailDraftContextFromSubjectBody,
"clickupTaskUpdateMerge": _mapper_clickupTaskUpdateMerge,
}
def _applyParamMappers(nodeDef: Dict[str, Any], resolvedParams: Dict[str, Any]) -> None:
"""Run declared ``paramMappers`` from the node definition (no node-id branching)."""
mappers = nodeDef.get("paramMappers") or []
for name in mappers:
fn = _PARAM_MAPPERS.get(name)
if not fn:
logger.warning("Unknown paramMapper %r — node %s; skipping", name, nodeDef.get("id"))
continue
try:
fn(resolvedParams)
except Exception as e:
logger.warning("paramMapper %r failed for node %s: %s", name, nodeDef.get("id"), e)
def _getOutputSchemaName(nodeDef: Dict) -> str: def _getOutputSchemaName(nodeDef: Dict) -> str:
"""Get the output schema name from the node definition.""" """Get the output schema name from the node definition."""
outputPorts = nodeDef.get("outputPorts", {}) outputPorts = nodeDef.get("outputPorts", {})
@ -338,14 +401,8 @@ class ActionNodeExecutor:
chatService = getattr(self.services, "chat", None) chatService = getattr(self.services, "chat", None)
_resolveConnectionParam(resolvedParams, chatService, self.services) _resolveConnectionParam(resolvedParams, chatService, self.services)
# 4. Node-type-specific param transformations # 4. Apply declarative paramMappers from the node definition
if nodeType == "email.checkEmail": _applyParamMappers(nodeDef, resolvedParams)
_applyEmailCheckFilter(resolvedParams)
elif nodeType == "email.searchEmail":
_applyEmailSearchQuery(resolvedParams)
elif nodeType == "clickup.updateTask":
from modules.workflows.automation2.clickupTaskUpdateMerge import merge_clickup_task_update_entries
merge_clickup_task_update_entries(resolvedParams)
# 5. email.checkEmail pause for email wait # 5. email.checkEmail pause for email wait
if nodeType == "email.checkEmail": if nodeType == "email.checkEmail":
@ -362,26 +419,7 @@ class ActionNodeExecutor:
} }
raise PauseForEmailWaitError(runId=runId, nodeId=nodeId, waitConfig=waitConfig) raise PauseForEmailWaitError(runId=runId, nodeId=nodeId, waitConfig=waitConfig)
# 6. AI nodes: normalize legacy "prompt" -> "aiPrompt" # 6. Execute action
if nodeType == "ai.prompt":
if "aiPrompt" not in resolvedParams and "prompt" in resolvedParams:
resolvedParams["aiPrompt"] = resolvedParams.pop("prompt")
# 7. Build context for email.draftEmail from subject + body
if nodeType == "email.draftEmail":
subject = resolvedParams.get("subject", "")
body = resolvedParams.get("body", "")
if subject or body:
contextParts = []
if subject:
contextParts.append(f"Subject: {subject}")
if body:
contextParts.append(f"Body:\n{body}")
resolvedParams["context"] = "\n\n".join(contextParts)
resolvedParams.pop("subject", None)
resolvedParams.pop("body", None)
# 8. Execute action
logger.info("ActionNodeExecutor node %s calling %s.%s with %d params", nodeId, methodName, actionName, len(resolvedParams)) logger.info("ActionNodeExecutor node %s calling %s.%s with %d params", nodeId, methodName, actionName, len(resolvedParams))
try: try:
executor = ActionExecutor(self.services) executor = ActionExecutor(self.services)
@ -392,7 +430,7 @@ class ActionNodeExecutor:
logger.exception("ActionNodeExecutor node %s FAILED: %s", nodeId, e) logger.exception("ActionNodeExecutor node %s FAILED: %s", nodeId, e)
return _normalizeError(e, outputSchema) return _normalizeError(e, outputSchema)
# 9. Persist generated documents as files and build JSON-safe output # 7. Persist generated documents as files and build JSON-safe output
_raw_folder_id = resolvedParams.get("folderId") _raw_folder_id = resolvedParams.get("folderId")
persist_folder_id: Optional[str] = None persist_folder_id: Optional[str] = None
if _raw_folder_id is not None: if _raw_folder_id is not None:
@ -415,6 +453,18 @@ class ActionNodeExecutor:
rawData = getattr(d, "documentData", None) if hasattr(d, "documentData") else (dumped.get("documentData") if isinstance(dumped, dict) else None) rawData = getattr(d, "documentData", None) if hasattr(d, "documentData") else (dumped.get("documentData") if isinstance(dumped, dict) else None)
rawBytes = _coerce_document_data_to_bytes(rawData) rawBytes = _coerce_document_data_to_bytes(rawData)
# Extracted page images are workflow intermediates — keep bytes as base64 on the
# ActionDocument only; do not create rows in the user's file library (Meine Dateien).
if isinstance(dumped, dict) and rawBytes:
_meta = dumped.get("validationMetadata") if isinstance(dumped.get("validationMetadata"), dict) else {}
if (
_meta.get("actionType") == "context.extractContent"
and _meta.get("handoverRole") == "extractedMedia"
):
dumped["documentData"] = base64.b64encode(rawBytes).decode("ascii")
dumped["_hasBinaryData"] = True
docsList.append(dumped)
continue
if isinstance(dumped, dict) and rawBytes: if isinstance(dumped, dict) and rawBytes:
try: try:
from modules.interfaces.interfaceDbManagement import getInterface as _getMgmtInterface from modules.interfaces.interfaceDbManagement import getInterface as _getMgmtInterface
@ -452,11 +502,7 @@ class ActionNodeExecutor:
docsList.append(dumped) docsList.append(dumped)
# Clean DocumentList shape for document nodes (documents + count, no ActionResult/AiResult noise) # Clean DocumentList shape for document nodes (documents + count, no ActionResult/AiResult noise)
if outputSchema == "DocumentList" and nodeType in ( if outputSchema == "DocumentList":
"ai.generateDocument",
"ai.convertDocument",
"file.create",
):
if not result.success: if not result.success:
return _normalizeError( return _normalizeError(
RuntimeError(str(result.error or "document action failed")), RuntimeError(str(result.error or "document action failed")),
@ -470,7 +516,10 @@ class ActionNodeExecutor:
return normalizeToSchema(list_out, outputSchema) return normalizeToSchema(list_out, outputSchema)
extractedContext = "" extractedContext = ""
if result.documents: rd_early = getattr(result, "data", None)
if isinstance(rd_early, dict) and rd_early.get("response") is not None:
extractedContext = str(rd_early.get("response")).strip()
elif result.documents:
doc = result.documents[0] doc = result.documents[0]
raw = getattr(doc, "documentData", None) if hasattr(doc, "documentData") else (doc.get("documentData") if isinstance(doc, dict) else None) raw = getattr(doc, "documentData", None) if hasattr(doc, "documentData") else (doc.get("documentData") if isinstance(doc, dict) else None)
if isinstance(raw, bytes): if isinstance(raw, bytes):
@ -480,14 +529,6 @@ class ActionNodeExecutor:
extractedContext = "" extractedContext = ""
elif raw: elif raw:
extractedContext = str(raw).strip() extractedContext = str(raw).strip()
else:
# ai.process (and similar): text handover in ActionResult.data — no persisted document row
rd = getattr(result, "data", None)
if isinstance(rd, dict):
handover = rd.get("response")
if handover is not None:
extractedContext = str(handover).strip()
promptText = str(resolvedParams.get("aiPrompt") or resolvedParams.get("prompt") or "").strip() promptText = str(resolvedParams.get("aiPrompt") or resolvedParams.get("prompt") or "").strip()
resultData = getattr(result, "data", None) resultData = getattr(result, "data", None)
@ -505,7 +546,7 @@ class ActionNodeExecutor:
"data": dataField, "data": dataField,
} }
if nodeType.startswith("ai."): if outputSchema == "AiResult":
out["prompt"] = promptText out["prompt"] = promptText
out["response"] = extractedContext out["response"] = extractedContext
inputContext = resolvedParams.get("context") inputContext = resolvedParams.get("context")
@ -521,8 +562,24 @@ class ActionNodeExecutor:
out["responseData"] = parsed out["responseData"] = parsed
except (json.JSONDecodeError, TypeError): except (json.JSONDecodeError, TypeError):
pass pass
if outputSchema == "AiResult" and result.success:
out["imageDocumentsOnly"] = _image_documents_from_docs_list(docsList)
if nodeType.startswith("clickup.") and result.success and docsList: if outputSchema == "ActionResult":
# Unified handover: mirror AiResult primary paths for DataRefs / primaryTextRef
inp_ctx = resolvedParams.get("context")
ctx_str = ""
if inp_ctx is not None:
ctx_str = inp_ctx if isinstance(inp_ctx, str) else json.dumps(inp_ctx, ensure_ascii=False, default=str)
out.setdefault("prompt", "")
out.setdefault("context", ctx_str if ctx_str else "")
rsp = str(out.get("response") or "").strip()
if not rsp:
out["response"] = extractedContext or ""
if result.success:
out["imageDocumentsOnly"] = _image_documents_from_docs_list(docsList)
if outputSchema == "TaskResult" and result.success and docsList:
try: try:
d0 = docsList[0] if isinstance(docsList[0], dict) else {} d0 = docsList[0] if isinstance(docsList[0], dict) else {}
raw = d0.get("documentData") raw = d0.get("documentData")
@ -534,7 +591,7 @@ class ActionNodeExecutor:
except (json.JSONDecodeError, TypeError, ValueError): except (json.JSONDecodeError, TypeError, ValueError):
pass pass
if outputSchema == "ConsolidateResult" and nodeType == "ai.consolidate": if outputSchema == "ConsolidateResult":
data_dict = result.data if isinstance(getattr(result, "data", None), dict) else {} data_dict = result.data if isinstance(getattr(result, "data", None), dict) else {}
cr_out = { cr_out = {
"result": data_dict.get("result", ""), "result": data_dict.get("result", ""),

View file

@ -410,10 +410,14 @@ def resolveParameterReferences(value: Any, nodeOutputs: Dict[str, Any]) -> Any:
return re.sub(r"\{\{\s*([^}]+)\s*\}\}", repl, value) return re.sub(r"\{\{\s*([^}]+)\s*\}\}", repl, value)
if isinstance(value, list): if isinstance(value, list):
# contextBuilder: list where every item is a `{"type":"ref",...}` envelope. # contextBuilder: list where every item is a `{"type":"ref",...}` envelope.
# Resolve each ref and join the serialised parts into a single prompt string. # Resolve each part; a single ref preserves the resolved type (str, list, dict).
if value and all(isinstance(v, dict) and v.get("type") == "ref" for v in value): if value and all(isinstance(v, dict) and v.get("type") == "ref" for v in value):
from modules.workflows.methods.methodAi._common import serialize_context from modules.workflows.methods.methodAi._common import serialize_context
parts = [serialize_context(resolveParameterReferences(v, nodeOutputs)) for v in value]
resolved_parts = [resolveParameterReferences(v, nodeOutputs) for v in value]
if len(resolved_parts) == 1:
return resolved_parts[0]
parts = [serialize_context(p) for p in resolved_parts]
return "\n\n".join(p for p in parts if p) return "\n\n".join(p for p in parts if p)
return [resolveParameterReferences(v, nodeOutputs) for v in value] return [resolveParameterReferences(v, nodeOutputs) for v in value]
return value return value

View file

@ -7,6 +7,19 @@ import json
from typing import Any from typing import Any
def is_image_action_document_list(val: Any) -> bool:
"""True if ``val`` is a non-empty list of ActionDocument-shaped dicts (mimeType image/*)."""
if not isinstance(val, list) or not val:
return False
for item in val:
if not isinstance(item, dict):
return False
mime = str(item.get("mimeType") or "").strip().lower()
if not mime.startswith("image/"):
return False
return True
def serialize_context(val: Any) -> str: def serialize_context(val: Any) -> str:
"""Convert any context value to a readable string for use in AI prompts. """Convert any context value to a readable string for use in AI prompts.

View file

@ -1,149 +1,358 @@
# Copyright (c) 2025 Patrick Motsch # Copyright (c) 2025 Patrick Motsch
# All rights reserved. # All rights reserved.
"""context.extractContent — extracts content without AI.
Returns a unified handover compatible with AiResult-style downstream wiring:
- ``documents[0]``: structured JSON (`context.extractContent.handover.v1`); image ``parts``
keep metadata but omit pixel data; each dropped image references
``handoverMediaDocumentName`` matching a sibling blob document.
- ``documents[1:]``: each extracted image as its own binary ``ActionDocument`` (like
``ai.process`` artefact outputs).
- ``ActionResult.data["response"]`` plus normalized executor field ``response``: concatenated
plain text from all text parts safe default for ``file.create`` / primaryTextRef."""
import base64 as _b64
import binascii as _binascii
import logging import logging
import re
import time import time
from typing import Dict, Any from typing import Any, Dict, List, Tuple
from modules.datamodels.datamodelChat import ActionResult, ActionDocument from modules.datamodels.datamodelChat import ActionResult, ActionDocument
from modules.datamodels.datamodelDocref import ( from modules.datamodels.datamodelDocref import coerceDocumentReferenceList
DocumentReferenceList, from modules.datamodels.datamodelExtraction import ContentExtracted, ExtractionOptions
coerceDocumentReferenceList,
)
from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
_UNSAFE_FILE_KEY = re.compile(r"[^\w\-.\(\)\[\]%@+]")
HANDOVER_KIND = "context.extractContent.handover.v1"
def _default_extraction_options() -> ExtractionOptions:
"""No merge — keep all parts for downstream JSON selection."""
return ExtractionOptions(
prompt="Extract all content from the document",
mergeStrategy=None,
processDocumentsIndividually=True,
outputFormat="parts",
outputDetail="full",
)
def _file_json_key(display_name: str, index: int, key_counts: Dict[str, int]) -> str:
stem = (display_name or "").strip() or f"document_{index + 1}"
slug = stem.replace("/", "_").replace("\\", "_").replace(" ", "_")
slug = _UNSAFE_FILE_KEY.sub("_", slug).strip("_") or f"document_{index + 1}"
base = f"file_{index + 1}_{slug}"
n = key_counts.get(base, 0)
key_counts[base] = n + 1
return base if n == 0 else f"{base}__{n}"
def _serialize_parts(parts: Any) -> List[Dict[str, Any]]:
out: List[Dict[str, Any]] = []
for p in parts or []:
if hasattr(p, "model_dump"):
out.append(p.model_dump(mode="json"))
elif isinstance(p, dict):
out.append(dict(p))
return out
def _rebuild_by_type_group(parts_ser: List[Dict[str, Any]]) -> Dict[str, List[Dict[str, Any]]]:
by_type: Dict[str, List[Dict[str, Any]]] = {}
for entry in parts_ser:
if not isinstance(entry, dict):
continue
tg = (entry.get("typeGroup") or "").strip() or "_other"
by_type.setdefault(tg, []).append(entry)
return by_type
def _joined_text_from_handover_payload(payload: Dict[str, Any]) -> str:
"""Concatenate text parts across fileOrder for AiResult-compatible ``response``."""
files_section = payload.get("files") or {}
ordered = payload.get("fileOrder")
keys: List[str] = ordered if isinstance(ordered, list) and ordered else list(files_section.keys())
chunks: List[str] = []
for fk in keys:
bucket = files_section.get(fk)
if not isinstance(bucket, dict):
continue
for p in bucket.get("parts") or []:
if not isinstance(p, dict):
continue
if (p.get("typeGroup") or "").strip() != "text":
continue
raw = p.get("data")
if raw is None:
continue
s = str(raw).strip()
if s:
chunks.append(s)
return "\n\n".join(chunks)
def _mime_to_file_extension(mime: str) -> str:
m = (mime or "").split(";")[0].strip().lower()
mapping = {
"image/jpeg": "jpg",
"image/jpg": "jpg",
"image/png": "png",
"image/gif": "gif",
"image/webp": "webp",
"image/bmp": "bmp",
"image/tiff": "tiff",
}
return mapping.get(m, m.rsplit("/", 1)[-1] if "/" in m else "bin")
def _split_images_to_sidecar_documents(
payload: Dict[str, Any],
*,
document_name_stem: str,
) -> Tuple[Dict[str, Any], List[ActionDocument]]:
"""
Deep-copy handover JSON, clear image pixel data from ``parts``, attach
``handoverMediaDocumentName`` on each image part, emit binary ActionDocuments.
"""
import copy
bundle = copy.deepcopy(payload)
files_section = bundle.get("files") or {}
ordered = bundle.get("fileOrder")
key_order: List[str] = ordered if isinstance(ordered, list) and ordered else list(files_section.keys())
media_docs: List[ActionDocument] = []
kind = bundle.get("kind") or HANDOVER_KIND
stem = re.sub(r"[^\w\-]+", "_", document_name_stem).strip("_") or "extract"
for fk in key_order:
bucket = files_section.get(fk)
if not isinstance(bucket, dict):
continue
parts = bucket.get("parts")
if not isinstance(parts, list):
continue
new_parts: List[Dict[str, Any]] = []
for p in parts:
if not isinstance(p, dict):
new_parts.append(p)
continue
pcopy = dict(p)
tg = (pcopy.get("typeGroup") or "").strip()
mime = (pcopy.get("mimeType") or "").strip()
raw_data = pcopy.get("data")
if tg == "image" and mime.lower().startswith("image/") and raw_data:
raw_s = raw_data.strip() if isinstance(raw_data, str) else ""
try:
blob = _b64.b64decode(raw_s, validate=True) if raw_s else b""
except (_binascii.Error, TypeError, ValueError) as e:
logger.warning(
"extractContent: could not decode image part %s (keep inline): %s",
pcopy.get("id"),
e,
)
new_parts.append(pcopy)
continue
if not blob:
new_parts.append(pcopy)
continue
part_id = str(pcopy.get("id") or "part")
# Full part id (UUID) — must not truncate or names collide / break linking
safe_id = re.sub(r"[^\w\-.]+", "_", part_id).strip("_") or "media"
if len(safe_id) > 200:
safe_id = safe_id[:200]
ext = _mime_to_file_extension(mime)
media_name = f"extract_media_{stem}_{safe_id}.{ext}"
pcopy["data"] = ""
pcopy["handoverMediaDocumentName"] = media_name
media_docs.append(
ActionDocument(
documentName=media_name,
documentData=blob,
mimeType=mime,
validationMetadata={
"actionType": "context.extractContent",
"handoverRole": "extractedMedia",
"sourcePartId": part_id,
"handoverSchema": kind,
"containerFileKey": fk,
},
)
)
new_parts.append(pcopy)
else:
new_parts.append(pcopy)
bucket["parts"] = new_parts
bucket["byTypeGroup"] = _rebuild_by_type_group(new_parts)
files_section[fk] = bucket
return bundle, media_docs
def _one_file_bucket(ec: ContentExtracted, source_file_name: str) -> Dict[str, Any]:
parts_ser = _serialize_parts(ec.parts)
ud = getattr(ec, "udm", None)
if hasattr(ud, "model_dump"):
ud = ud.model_dump(mode="json")
summary = getattr(ec, "summary", None)
if hasattr(summary, "model_dump"):
summary = summary.model_dump(mode="json")
elif isinstance(summary, dict):
summary = dict(summary)
elif summary is None:
summary = {}
return {
"sourceFileName": source_file_name,
"extractedId": getattr(ec, "id", ""),
"summary": summary,
"udm": ud,
"parts": parts_ser,
"byTypeGroup": _rebuild_by_type_group(parts_ser),
}
def build_extract_content_handover(
*,
extracted_results: List[ContentExtracted],
chat_file_names: List[str],
operation_ref: str,
) -> Dict[str, Any]:
key_counts: Dict[str, int] = {}
files: Dict[str, Any] = {}
ordered: List[str] = []
for i, ec in enumerate(extracted_results):
name = chat_file_names[i] if i < len(chat_file_names) else ""
fk = _file_json_key(str(name), i, key_counts)
files[fk] = _one_file_bucket(ec, str(name))
ordered.append(fk)
return {
"schemaVersion": 1,
"kind": HANDOVER_KIND,
"operationRef": operation_ref,
"fileOrder": ordered,
"files": files,
}
async def extractContent(self, parameters: Dict[str, Any]) -> ActionResult: async def extractContent(self, parameters: Dict[str, Any]) -> ActionResult:
operationId = None operation_id = None
try: try:
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}" wf = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
operationId = f"context_extract_{workflowId}_{int(time.time())}" operation_id = f"context_extract_{wf}_{int(time.time())}"
documentListParam = parameters.get("documentList") document_list_param = parameters.get("documentList")
if not documentListParam: if not document_list_param:
return ActionResult.isFailure(error="documentList is required") return ActionResult.isFailure(error="documentList is required")
documentList = coerceDocumentReferenceList(documentListParam) dl = coerceDocumentReferenceList(document_list_param)
if not documentList.references: if not dl.references:
return ActionResult.isFailure( return ActionResult.isFailure(
error=f"documentList could not be parsed (type={type(documentListParam).__name__}); " error=(
f"expected DocumentReferenceList, list of strings/dicts, or " f"documentList could not be parsed (type={type(document_list_param).__name__}); "
f"a wrapper dict like {{'documents': [...]}}" "expected DocumentReferenceList, list of strings/dicts, or "
"a wrapper dict like {'documents': [...]}"
),
) )
# Start progress tracking parent_operation_id = parameters.get("parentOperationId")
parentOperationId = parameters.get('parentOperationId')
self.services.chat.progressLogStart( self.services.chat.progressLogStart(
operationId, operation_id,
"Extracting content from documents", "Extracting content from documents",
"Content Extraction", "Content Extraction",
f"Documents: {len(documentList.references)}", f"Documents: {len(dl.references)}",
parentOperationId=parentOperationId parentOperationId=parent_operation_id,
) )
# Get ChatDocuments from documentList self.services.chat.progressLogUpdate(operation_id, 0.2, "Loading documents")
self.services.chat.progressLogUpdate(operationId, 0.2, "Loading documents") chat_documents = self.services.chat.getChatDocumentsFromDocumentList(dl)
chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(documentList) if not chat_documents:
self.services.chat.progressLogFinish(operation_id, False)
if not chatDocuments:
self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error="No documents found in documentList") return ActionResult.isFailure(error="No documents found in documentList")
logger.info(f"Extracting content from {len(chatDocuments)} documents") logger.info(f"Extracting JSON handover from {len(chat_documents)} documents")
# Prepare extraction options self.services.chat.progressLogUpdate(operation_id, 0.3, "Preparing extraction options")
self.services.chat.progressLogUpdate(operationId, 0.3, "Preparing extraction options")
extractionOptionsParam = parameters.get("extractionOptions") eo_param = parameters.get("extractionOptions")
extraction_options: ExtractionOptions
# Convert dict to ExtractionOptions object if needed, or create defaults if isinstance(eo_param, dict) and eo_param:
if extractionOptionsParam: eo = dict(eo_param)
if isinstance(extractionOptionsParam, dict): eo.setdefault("prompt", "Extract all content from the document")
# Ensure required fields are present if "mergeStrategy" not in eo:
if "prompt" not in extractionOptionsParam: eo["mergeStrategy"] = None
extractionOptionsParam["prompt"] = "Extract all content from the document" try:
if "mergeStrategy" not in extractionOptionsParam: extraction_options = ExtractionOptions(**eo)
extractionOptionsParam["mergeStrategy"] = MergeStrategy( except Exception as e:
mergeType="concatenate", logger.warning(f"Invalid extractionOptions, using defaults: {e}")
groupBy="typeGroup", extraction_options = _default_extraction_options()
orderBy="id" elif isinstance(eo_param, ExtractionOptions):
) extraction_options = eo_param
# Convert dict to ExtractionOptions object
try:
extractionOptions = ExtractionOptions(**extractionOptionsParam)
except Exception as e:
logger.warning(f"Failed to create ExtractionOptions from dict: {str(e)}, using defaults")
extractionOptions = None
elif isinstance(extractionOptionsParam, ExtractionOptions):
extractionOptions = extractionOptionsParam
else:
# Invalid type, use defaults
logger.warning(f"Invalid extractionOptions type: {type(extractionOptionsParam)}, using defaults")
extractionOptions = None
else: else:
extractionOptions = None extraction_options = _default_extraction_options()
# If extractionOptions not provided, create defaults self.services.chat.progressLogUpdate(operation_id, 0.4, "Extracting …")
if not extractionOptions: self.services.chat.progressLogUpdate(operation_id, 0.5, f"Extracting {len(chat_documents)} document(s)")
# Default extraction options for pure content extraction (no AI processing) extracted_results = self.services.extraction.extractContent(chat_documents, extraction_options, operationId=operation_id)
extractionOptions = ExtractionOptions(
prompt="Extract all content from the document", file_names = [getattr(cd, "fileName", "") or "" for cd in chat_documents]
mergeStrategy=MergeStrategy(
mergeType="concatenate", payload = build_extract_content_handover(
groupBy="typeGroup", extracted_results=extracted_results,
orderBy="id" chat_file_names=file_names,
), operation_ref=operation_id,
processDocumentsIndividually=True )
)
self.services.chat.progressLogUpdate(operation_id, 0.9, "Building JSON")
# Call extraction service with hierarchical progress logging
self.services.chat.progressLogUpdate(operationId, 0.4, "Initiating") stem = f"{wf}_{int(time.time())}"
self.services.chat.progressLogUpdate(operationId, 0.5, f"Extracting content from {len(chatDocuments)} documents") stripped_payload, media_docs = _split_images_to_sidecar_documents(
# Pass operationId for hierarchical per-document progress logging payload,
extractedResults = self.services.extraction.extractContent(chatDocuments, extractionOptions, operationId=operationId) document_name_stem=stem,
)
# Build ActionDocuments from ContentExtracted results joined_text = _joined_text_from_handover_payload(payload)
self.services.chat.progressLogUpdate(operationId, 0.8, "Building result documents")
actionDocuments = [] json_meta = {
# Map extracted results back to original documents by index (results are in same order) "actionType": "context.extractContent",
for i, extracted in enumerate(extractedResults): "documentCountInput": len(chat_documents),
# Get original document name if available "documentCountRoots": len(extracted_results),
originalDoc = chatDocuments[i] if i < len(chatDocuments) else None "handoverSchema": stripped_payload.get("kind"),
if originalDoc and hasattr(originalDoc, 'fileName') and originalDoc.fileName: "handoverRole": "structuredHandover",
# Use original filename with "extracted_" prefix "mediaDocumentCount": len(media_docs),
baseName = originalDoc.fileName.rsplit('.', 1)[0] if '.' in originalDoc.fileName else originalDoc.fileName }
documentName = f"{baseName}_extracted_{extracted.id}.json"
else: json_doc = ActionDocument(
# Fallback to generic name with index documentName=f"extracted_content_{stem}.json",
documentName = f"document_{i+1:03d}_extracted_{extracted.id}.json" documentData=stripped_payload,
mimeType="application/json",
# Store ContentExtracted object in ActionDocument.documentData validationMetadata=json_meta,
validationMetadata = { )
"actionType": "context.extractContent",
"documentIndex": i, handover_data = {
"extractedId": extracted.id, "response": joined_text,
"partCount": len(extracted.parts) if extracted.parts else 0, "contentType": "text",
"originalFileName": originalDoc.fileName if originalDoc and hasattr(originalDoc, 'fileName') else None "handoverKind": stripped_payload.get("kind"),
} "structuredDocumentIndex": 0,
actionDoc = ActionDocument( "mediaDocumentCount": len(media_docs),
documentName=documentName, }
documentData=extracted, # ContentExtracted object
mimeType="application/json", self.services.chat.progressLogFinish(operation_id, True)
validationMetadata=validationMetadata return ActionResult.isSuccess(documents=[json_doc] + media_docs, data=handover_data)
)
actionDocuments.append(actionDoc)
self.services.chat.progressLogFinish(operationId, True)
return ActionResult.isSuccess(documents=actionDocuments)
except Exception as e: except Exception as e:
logger.error(f"Error in content extraction: {str(e)}") logger.error(f"Error in content extraction: {str(e)}")
try: try:
if operationId: if operation_id:
self.services.chat.progressLogFinish(operationId, False) self.services.chat.progressLogFinish(operation_id, False)
except Exception: except Exception:
pass pass
return ActionResult.isFailure(error=str(e)) return ActionResult.isFailure(error=str(e))

View file

@ -1,240 +1,310 @@
# Copyright (c) 2025 Patrick Motsch # Copyright (c) 2025 Patrick Motsch
# All rights reserved. # All rights reserved.
import base64 as _b64
import logging import logging
import time import time
from typing import Dict, Any from typing import Any, Dict
from modules.datamodels.datamodelChat import ActionResult, ActionDocument from modules.datamodels.datamodelChat import ActionResult, ActionDocument
from modules.datamodels.datamodelDocref import ( from modules.datamodels.datamodelDocref import coerceDocumentReferenceList
DocumentReferenceList,
coerceDocumentReferenceList,
)
from modules.datamodels.datamodelExtraction import ContentExtracted, ContentPart from modules.datamodels.datamodelExtraction import ContentExtracted, ContentPart
from .extractContent import _one_file_bucket
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
HANDOVER_KIND = "context.extractContent.handover.v1"
async def _neutralize_one_content_extracted(
*,
svc,
content_extracted: ContentExtracted,
operation_id: str,
chat_doc_slot: int,
chat_documents_len: int,
) -> ContentExtracted:
"""Neutralize every part inside a ContentExtracted (copied semantics from legacy inline loop)."""
neutralized_parts = []
for part in content_extracted.parts:
if not isinstance(part, ContentPart):
if isinstance(part, dict):
try:
part = ContentPart(**part)
except Exception as e:
logger.warning(f"Could not parse ContentPart: {str(e)}")
neutralized_parts.append(part)
continue
else:
neutralized_parts.append(part)
continue
_type_group = getattr(part, "typeGroup", "") or ""
prog = 0.3 + (chat_doc_slot / max(1, chat_documents_len)) * 0.6
if _type_group == "image" and part.data:
try:
svc.services.chat.progressLogUpdate(
operation_id,
prog,
f"Checking image part {len(neutralized_parts) + 1}",
)
_img_bytes = _b64.b64decode(str(part.data))
_img_result = await svc.services.neutralization.processImageAsync(_img_bytes, f"part_{part.id}")
if _img_result.get("status") == "ok":
neutralized_parts.append(part)
else:
logger.warning("Fail-Safe: Image part %s blocked (PII), SKIPPING", part.id)
except Exception as _img_err:
logger.error(f"Fail-Safe: Image check failed for part {part.id}: {_img_err}, SKIPPING")
elif part.data:
try:
svc.services.chat.progressLogUpdate(
operation_id,
prog,
f"Neutralizing part {len(neutralized_parts) + 1}",
)
neut_res = await svc.services.neutralization.processTextAsync(part.data)
if neut_res and "neutralized_text" in neut_res:
neutral_data = neut_res["neutralized_text"]
neutralized_parts.append(
ContentPart(
id=part.id,
parentId=part.parentId,
label=part.label,
typeGroup=part.typeGroup,
mimeType=part.mimeType,
data=neutral_data,
metadata=part.metadata.copy() if part.metadata else {},
)
)
else:
logger.warning(
"Fail-Safe: Neutralization incomplete for part %s — SKIPPING (not passing original)",
part.id,
)
continue
except Exception as e:
logger.error(f"Fail-Safe: Error neutralizing part {part.id}: {str(e)}, SKIPPING")
continue
else:
neutralized_parts.append(part)
return ContentExtracted(
id=content_extracted.id,
parts=neutralized_parts,
summary=content_extracted.summary,
)
async def neutralizeData(self, parameters: Dict[str, Any]) -> ActionResult: async def neutralizeData(self, parameters: Dict[str, Any]) -> ActionResult:
operationId = None operation_id = None
try: try:
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}" workflow_id = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
operationId = f"context_neutralize_{workflowId}_{int(time.time())}" operation_id = f"context_neutralize_{workflow_id}_{int(time.time())}"
neutralizationEnabled = False neutralization_enabled = False
try: try:
config = self.services.neutralization.getConfig() config = self.services.neutralization.getConfig()
neutralizationEnabled = config and config.enabled neutralization_enabled = config and config.enabled
except Exception as e: except Exception as e:
logger.debug(f"Could not check neutralization config: {str(e)}") logger.debug(f"Could not check neutralization config: {str(e)}")
if not neutralizationEnabled: if not neutralization_enabled:
logger.info("Neutralization is not enabled, returning documents unchanged") logger.info("Neutralization is not enabled, returning documents unchanged")
# Return original documents if neutralization is disabled document_list_param = parameters.get("documentList")
documentListParam = parameters.get("documentList") if not document_list_param:
if not documentListParam:
return ActionResult.isFailure(error="documentList is required") return ActionResult.isFailure(error="documentList is required")
documentList = coerceDocumentReferenceList(documentListParam) doc_list = coerceDocumentReferenceList(document_list_param)
if not documentList.references: if not doc_list.references:
return ActionResult.isFailure( return ActionResult.isFailure(error=f"documentList invalid (empty)")
error=f"documentList could not be parsed (type={type(documentListParam).__name__})"
) chat_docs = self.services.chat.getChatDocumentsFromDocumentList(doc_list)
if not chat_docs:
# Get ChatDocuments from documentList
chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(documentList)
if not chatDocuments:
return ActionResult.isFailure(error="No documents found in documentList") return ActionResult.isFailure(error="No documents found in documentList")
# Return original documents as ActionDocuments action_documents = []
actionDocuments = [] for chat_doc in chat_docs:
for chatDoc in chatDocuments: if hasattr(chat_doc, "documentData") and chat_doc.documentData:
# Extract ContentExtracted from documentData if available action_documents.append(
if hasattr(chatDoc, 'documentData') and chatDoc.documentData: ActionDocument(
actionDoc = ActionDocument( documentName=getattr(chat_doc, "fileName", "unknown"),
documentName=getattr(chatDoc, 'fileName', 'unknown'), documentData=chat_doc.documentData,
documentData=chatDoc.documentData, mimeType=getattr(chat_doc, "mimeType", "application/json"),
mimeType=getattr(chatDoc, 'mimeType', 'application/json'), validationMetadata={
validationMetadata={ "actionType": "context.neutralizeData",
"actionType": "context.neutralizeData", "neutralized": False,
"neutralized": False, "reason": "Neutralization disabled",
"reason": "Neutralization disabled" },
} )
) )
actionDocuments.append(actionDoc) return ActionResult.isSuccess(documents=action_documents)
return ActionResult.isSuccess(documents=actionDocuments) document_list_param = parameters.get("documentList")
if not document_list_param:
documentListParam = parameters.get("documentList")
if not documentListParam:
return ActionResult.isFailure(error="documentList is required") return ActionResult.isFailure(error="documentList is required")
documentList = coerceDocumentReferenceList(documentListParam) doc_list = coerceDocumentReferenceList(document_list_param)
if not documentList.references: if not doc_list.references:
return ActionResult.isFailure( return ActionResult.isFailure(error=f"documentList invalid")
error=f"documentList could not be parsed (type={type(documentListParam).__name__})"
) parent_operation_id = parameters.get("parentOperationId")
# Start progress tracking
parentOperationId = parameters.get('parentOperationId')
self.services.chat.progressLogStart( self.services.chat.progressLogStart(
operationId, operation_id,
"Neutralizing data from documents", "Neutralizing data from documents",
"Data Neutralization", "Data Neutralization",
f"Documents: {len(documentList.references)}", f"Documents: {len(doc_list.references)}",
parentOperationId=parentOperationId parentOperationId=parent_operation_id,
) )
# Get ChatDocuments from documentList self.services.chat.progressLogUpdate(operation_id, 0.2, "Loading documents")
self.services.chat.progressLogUpdate(operationId, 0.2, "Loading documents") chat_documents = self.services.chat.getChatDocumentsFromDocumentList(doc_list)
chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(documentList) if not chat_documents:
self.services.chat.progressLogFinish(operation_id, False)
if not chatDocuments:
self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error="No documents found in documentList") return ActionResult.isFailure(error="No documents found in documentList")
logger.info(f"Neutralizing data from {len(chatDocuments)} documents") logger.info(f"Neutralizing data from {len(chat_documents)} document(s)")
self.services.chat.progressLogUpdate(operation_id, 0.3, "Processing documents")
# Process each document action_documents = []
self.services.chat.progressLogUpdate(operationId, 0.3, "Processing documents")
actionDocuments = [] for i, chat_doc in enumerate(chat_documents):
for i, chatDoc in enumerate(chatDocuments):
try: try:
# Extract ContentExtracted from documentData dd = getattr(chat_doc, "documentData", None)
if not hasattr(chatDoc, 'documentData') or not chatDoc.documentData: if not dd:
logger.warning(f"Document {i+1} has no documentData, skipping") logger.warning(f"Document {i + 1} has no documentData, skipping")
continue continue
documentData = chatDoc.documentData fn = str(getattr(chat_doc, "fileName", "") or "")
mime_guess = str(getattr(chat_doc, "mimeType", "") or "").lower()
# Check if it's a ContentExtracted object if (
if isinstance(documentData, ContentExtracted): mime_guess.startswith("image/")
contentExtracted = documentData and fn.startswith("extract_media_")
elif isinstance(documentData, dict): and not (isinstance(dd, dict) and dd.get("kind") == HANDOVER_KIND)
# Try to parse as ContentExtracted ):
action_documents.append(
ActionDocument(
documentName=fn or f"media_{i + 1}",
documentData=dd,
mimeType=mime_guess or "application/octet-stream",
validationMetadata={
"actionType": "context.neutralizeData",
"neutralized": False,
"reason": "extractContent_media_sidecar_pass_through",
},
)
)
continue
# --- Unified JSON envelope from context.extractContent (v1) ---
if isinstance(dd, dict) and dd.get("kind") == HANDOVER_KIND:
bundle = dict(dd)
files_section = dd.get("files") or {}
new_files = {}
for fk, bucket in files_section.items():
if not isinstance(bucket, dict):
continue
parts_raw = bucket.get("parts") or []
parsed_parts = []
for pd in parts_raw:
parsed_parts.append(ContentPart(**pd) if isinstance(pd, dict) else pd)
summary = bucket.get("summary") or {}
if hasattr(summary, "model_dump"):
summary = summary.model_dump(mode="json")
ce = ContentExtracted(
id=str(bucket.get("extractedId") or ""),
parts=parsed_parts,
summary=summary if isinstance(summary, dict) else {},
)
ce_out = await _neutralize_one_content_extracted(
svc=self,
content_extracted=ce,
operation_id=operation_id,
chat_doc_slot=i,
chat_documents_len=max(len(chat_documents), 1),
)
new_files[fk] = _one_file_bucket(ce_out, str(bucket.get("sourceFileName") or fk))
bundle["files"] = new_files
original_filename = getattr(chat_doc, "fileName", f"neutralized_bundle_{workflow_id}.json")
bn = original_filename.rsplit(".", 1)[0] if "." in original_filename else original_filename
action_documents.append(
ActionDocument(
documentName=f"{bn}_neutralized.json",
documentData=bundle,
mimeType="application/json",
validationMetadata={
"actionType": "context.neutralizeData",
"neutralized": True,
"handoverKind": HANDOVER_KIND,
"bundleFileCount": len(new_files),
},
)
)
continue
# --- Legacy ContentExtracted per persisted document ---
if isinstance(dd, ContentExtracted):
content_extracted = dd
elif isinstance(dd, dict):
try: try:
contentExtracted = ContentExtracted(**documentData) content_extracted = ContentExtracted(**dd)
except Exception as e: except Exception:
logger.warning(f"Document {i+1} documentData is not ContentExtracted: {str(e)}") logger.warning(f"Document {i + 1} documentData cannot be parsed as ContentExtracted dict")
continue continue
else: else:
logger.warning(f"Document {i+1} documentData is not ContentExtracted or dict") logger.warning(f"Document {i + 1} documentData is not supported")
continue continue
# Neutralize each ContentPart's data field neut_out = await _neutralize_one_content_extracted(
neutralizedParts = [] svc=self,
for part in contentExtracted.parts: content_extracted=content_extracted,
if not isinstance(part, ContentPart): operation_id=operation_id,
# Try to parse as ContentPart chat_doc_slot=i,
if isinstance(part, dict): chat_documents_len=max(len(chat_documents), 1),
try:
part = ContentPart(**part)
except Exception as e:
logger.warning(f"Could not parse ContentPart: {str(e)}")
neutralizedParts.append(part)
continue
else:
neutralizedParts.append(part)
continue
# Neutralize the data field based on typeGroup
_typeGroup = getattr(part, 'typeGroup', '') or ''
if _typeGroup == 'image' and part.data:
import base64 as _b64
try:
self.services.chat.progressLogUpdate(
operationId,
0.3 + (i / len(chatDocuments)) * 0.6,
f"Checking image part {len(neutralizedParts) + 1} of document {i+1}"
)
_imgBytes = _b64.b64decode(str(part.data))
_imgResult = await self.services.neutralization.processImageAsync(_imgBytes, f"part_{part.id}")
if _imgResult.get("status") == "ok":
neutralizedParts.append(part)
else:
logger.warning(f"Fail-Safe: Image part {part.id} blocked (PII detected), SKIPPING")
except Exception as _imgErr:
logger.error(f"Fail-Safe: Image check failed for part {part.id}: {_imgErr}, SKIPPING")
elif part.data:
try:
self.services.chat.progressLogUpdate(
operationId,
0.3 + (i / len(chatDocuments)) * 0.6,
f"Neutralizing part {len(neutralizedParts) + 1} of document {i+1}"
)
neutralizationResult = await self.services.neutralization.processTextAsync(part.data)
if neutralizationResult and 'neutralized_text' in neutralizationResult:
neutralizedData = neutralizationResult['neutralized_text']
neutralizedPart = ContentPart(
id=part.id,
parentId=part.parentId,
label=part.label,
typeGroup=part.typeGroup,
mimeType=part.mimeType,
data=neutralizedData,
metadata=part.metadata.copy() if part.metadata else {}
)
neutralizedParts.append(neutralizedPart)
else:
logger.warning(f"Fail-Safe: Neutralization incomplete for part {part.id}, SKIPPING (not passing original)")
continue
except Exception as e:
logger.error(f"Fail-Safe: Error neutralizing part {part.id}, SKIPPING document (not passing original): {str(e)}")
continue
else:
neutralizedParts.append(part)
# Create neutralized ContentExtracted object
neutralizedContentExtracted = ContentExtracted(
id=contentExtracted.id,
parts=neutralizedParts,
summary=contentExtracted.summary
) )
# Create ActionDocument original_file_name = getattr(chat_doc, "fileName", f"document_{i + 1}.json")
originalFileName = getattr(chatDoc, 'fileName', f"document_{i+1}.json") base_name = original_file_name.rsplit(".", 1)[0] if "." in original_file_name else original_file_name
baseName = originalFileName.rsplit('.', 1)[0] if '.' in originalFileName else originalFileName document_name = f"{base_name}_neutralized_{neut_out.id}.json"
documentName = f"{baseName}_neutralized_{contentExtracted.id}.json"
action_documents.append(
validationMetadata = { ActionDocument(
"actionType": "context.neutralizeData", documentName=document_name,
"documentIndex": i, documentData=neut_out,
"extractedId": contentExtracted.id, mimeType="application/json",
"partCount": len(neutralizedParts), validationMetadata={
"neutralized": True, "actionType": "context.neutralizeData",
"originalFileName": originalFileName "documentIndex": i,
} "extractedId": neut_out.id,
"partCount": len(neut_out.parts),
actionDoc = ActionDocument( "neutralized": True,
documentName=documentName, "originalFileName": original_file_name,
documentData=neutralizedContentExtracted, },
mimeType="application/json", )
validationMetadata=validationMetadata
) )
actionDocuments.append(actionDoc)
except Exception as e: except Exception as e:
logger.error(f"Error processing document {i+1}: {str(e)}") logger.error(f"Error processing document {i + 1}: {str(e)}")
# Continue with other documents
continue continue
if not actionDocuments: if not action_documents:
self.services.chat.progressLogFinish(operationId, False) self.services.chat.progressLogFinish(operation_id, False)
return ActionResult.isFailure(error="No valid ContentExtracted documents found to neutralize") return ActionResult.isFailure(error="No valid documents found to neutralize")
self.services.chat.progressLogFinish(operationId, True) self.services.chat.progressLogFinish(operation_id, True)
return ActionResult.isSuccess(documents=action_documents)
return ActionResult.isSuccess(documents=actionDocuments)
except Exception as e: except Exception as e:
logger.error(f"Error in data neutralization: {str(e)}") logger.error(f"Error in data neutralization: {str(e)}")
try: try:
if operationId: if operation_id:
self.services.chat.progressLogFinish(operationId, False) self.services.chat.progressLogFinish(operation_id, False)
except Exception: except Exception:
pass pass
return ActionResult.isFailure(error=str(e)) return ActionResult.isFailure(error=str(e))

View file

@ -52,7 +52,14 @@ class MethodContext(MethodBase):
), ),
"extractContent": WorkflowActionDefinition( "extractContent": WorkflowActionDefinition(
actionId="context.extractContent", actionId="context.extractContent",
description="Extract raw content parts from documents without AI processing. Returns ContentParts with different typeGroups (text, image, table, structure, container). Images are returned as base64 data, not as extracted text. Text content is extracted from text-based formats (PDF text layers, Word docs, etc.) but NOT from images (no OCR). Use this action to prepare documents for subsequent AI processing actions.", description=(
"Extract document content without AI. Unified handover: (1) `documents[0]` "
"JSON `context.extractContent.handover.v1` with text in `parts` and image placeholders "
"linking to sibling blobs via `handoverMediaDocumentName`; "
"(2) each extracted image as a separate binary document (`extract_media_*`); "
"(3) `data.response` / top-level `response` after normalization — concatenated plain text "
"for prompts and file.create. Pick `response`, a specific document, or deep JSON paths."
),
dynamicMode=True, dynamicMode=True,
outputType="UdmDocument", outputType="UdmDocument",
parameters={ parameters={
@ -61,15 +68,8 @@ class MethodContext(MethodBase):
type="DocumentList", type="DocumentList",
frontendType=FrontendType.DOCUMENT_REFERENCE, frontendType=FrontendType.DOCUMENT_REFERENCE,
required=True, required=True,
description="Document reference(s) to extract content from" description="Document reference(s) to extract content from",
), ),
"extractionOptions": WorkflowActionParameter(
name="extractionOptions",
type="Dict[str,Any]",
frontendType=FrontendType.JSON,
required=False,
description="Extraction options (if not provided, defaults are used). Note: This action does NOT use AI - it performs pure content extraction. Images are preserved as base64 data, not converted to text."
)
}, },
execute=extractContent.__get__(self, self.__class__) execute=extractContent.__get__(self, self.__class__)
), ),

View file

@ -1,18 +1,25 @@
# Copyright (c) 2025 Patrick Motsch # Copyright (c) 2025 Patrick Motsch
# All rights reserved. # All rights reserved.
from typing import Dict, Any, Optional from typing import Any, Dict, List, Optional
import base64 import base64
import binascii import binascii
import io
import json
import logging import logging
import re
from modules.datamodels.datamodelChat import ActionResult, ActionDocument from modules.datamodels.datamodelChat import ActionResult, ActionDocument
from modules.serviceCenter.services.serviceGeneration.subDocumentUtility import markdownToDocumentJson from modules.serviceCenter.services.serviceGeneration.subDocumentUtility import markdownToDocumentJson
from modules.shared.i18nRegistry import normalizePrimaryLanguageTag from modules.shared.i18nRegistry import normalizePrimaryLanguageTag
from modules.workflows.automation2.executors.actionNodeExecutor import _coerce_document_data_to_bytes
from modules.workflows.methods.methodAi._common import is_image_action_document_list, serialize_context
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
_SAFE_FILENAME = re.compile(r'[^\w\-.\(\)\s\[\]%@+]')
def _persistDocumentsToUserFiles( def _persistDocumentsToUserFiles(
action_documents: list, action_documents: list,
@ -88,13 +95,155 @@ def _persistDocumentsToUserFiles(
logger.warning("file.create: failed to persist document %s: %s", dname, e) logger.warning("file.create: failed to persist document %s: %s", dname, e)
def _sanitize_output_stem(title: str) -> str:
t = (title or "").strip() or "Document"
stem = _SAFE_FILENAME.sub("_", t).strip("._")
return stem[:120] if stem else "Document"
def _get_management_interface(services) -> Optional[Any]:
mgmt = getattr(services, "interfaceDbComponent", None)
if mgmt:
return mgmt
try:
import modules.interfaces.interfaceDbManagement as iface
user = getattr(services, "user", None)
if not user:
return None
return iface.getInterface(
user,
mandateId=getattr(services, "mandateId", None) or "",
featureInstanceId=getattr(services, "featureInstanceId", None) or "",
)
except Exception as e:
logger.warning("file.create: could not get management interface: %s", e)
return None
def _load_image_bytes_from_action_doc(doc: dict, services) -> Optional[bytes]:
raw = doc.get("documentData")
blob = _coerce_document_data_to_bytes(raw)
if blob:
return blob
fid = doc.get("fileId")
if not fid and isinstance(doc.get("validationMetadata"), dict):
fid = (doc.get("validationMetadata") or {}).get("fileId")
if fid and str(fid).strip():
mgmt = _get_management_interface(services)
if mgmt and hasattr(mgmt, "getFileData"):
try:
return mgmt.getFileData(str(fid))
except Exception as e:
logger.warning("file.create: getFileData(%s) failed: %s", fid, e)
return None
def _images_list_to_pdf(image_bytes_list: List[bytes]) -> bytes:
"""One PDF page per image; embedded raster data via PyMuPDF."""
import fitz
pdf = fitz.open()
try:
for blob in image_bytes_list:
page = pdf.new_page()
page.insert_image(page.rect, stream=blob, keep_proportion=True)
return pdf.tobytes()
finally:
pdf.close()
def _images_list_to_docx(image_bytes_list: List[bytes]) -> bytes:
"""Images embedded in the document package (inline shapes), not hyperlinks."""
from docx import Document
from docx.shared import Inches
doc = Document()
for blob in image_bytes_list:
p = doc.add_paragraph()
run = p.add_run()
run.add_picture(io.BytesIO(blob), width=Inches(6.5))
doc.add_paragraph()
out = io.BytesIO()
doc.save(out)
return out.getvalue()
async def _create_merged_image_documents(
self,
parameters: Dict[str, Any],
image_docs: List[dict],
) -> ActionResult:
"""Build one PDF or DOCX containing all extracted images (``imageDocumentsOnly``)."""
output_format = (parameters.get("outputFormat") or "docx").strip().lower().lstrip(".")
title = (parameters.get("title") or "Document").strip()
stem = _sanitize_output_stem(title)
folder_id: Optional[str] = None
raw_folder = parameters.get("folderId")
if raw_folder is not None and str(raw_folder).strip():
folder_id = str(raw_folder).strip()
if output_format not in ("pdf", "docx"):
return ActionResult.isFailure(
error=(
f"Nur-Bilder-Kontext: „{output_format}“ wird nicht unterstützt. "
"Bitte Ausgabeformat „pdf“ oder „docx“ wählen."
)
)
blobs: List[bytes] = []
for d in image_docs:
b = _load_image_bytes_from_action_doc(d, self.services)
if not b:
name = d.get("documentName") or "?"
return ActionResult.isFailure(
error=f"Bilddaten fehlen oder sind nicht lesbar (Datei: {name})."
)
blobs.append(b)
if output_format == "pdf":
try:
combined = _images_list_to_pdf(blobs)
except Exception as e:
logger.warning("file.create: PDF merge failed: %s", e, exc_info=True)
return ActionResult.isFailure(error=f"PDF aus Bildern konnte nicht erzeugt werden: {e}")
out_name = f"{stem}.pdf"
mime = "application/pdf"
else:
combined = _images_list_to_docx(blobs)
out_name = f"{stem}.docx"
mime = "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
if not combined:
return ActionResult.isFailure(error="Zusammenfügen der Bilder ergab leere Ausgabe")
doc_b64 = base64.b64encode(combined).decode("ascii")
action_documents = [
ActionDocument(
documentName=out_name,
documentData=doc_b64,
mimeType=mime,
validationMetadata={
"actionType": "file.create",
"outputFormat": output_format,
"source": "mergedImageDocumentsOnly",
},
)
]
_persistDocumentsToUserFiles(action_documents, self.services, folder_id=folder_id)
return ActionResult.isSuccess(documents=action_documents)
async def create(self, parameters: Dict[str, Any]) -> ActionResult: async def create(self, parameters: Dict[str, Any]) -> ActionResult:
""" """
Create a file from context (text/markdown from upstream AI node). Create a file from context (text/markdown from upstream AI node).
Uses GenerationService.renderReport to produce docx, pdf, txt, md, html, xlsx, etc. Uses GenerationService.renderReport to produce docx, pdf, txt, md, html, xlsx, etc.
""" """
from modules.workflows.methods.methodAi._common import serialize_context
raw_context = parameters.get("context", "") or parameters.get("text", "") or "" raw_context = parameters.get("context", "") or parameters.get("text", "") or ""
if isinstance(raw_context, list) and is_image_action_document_list(raw_context):
return await _create_merged_image_documents(self, parameters, raw_context)
context = serialize_context(raw_context) context = serialize_context(raw_context)
if not context: if not context:

View file

@ -0,0 +1,63 @@
# Unit tests: unified extractContent handover (text vs image sidecars).
import base64
from modules.workflows.methods.methodContext.actions import extractContent as ec
def test_joined_text_from_handover_orders_text_parts_only():
payload = {
"kind": ec.HANDOVER_KIND,
"fileOrder": ["f1"],
"files": {
"f1": {
"parts": [
{"typeGroup": "text", "data": " A\n", "id": "x"},
{"typeGroup": "container", "data": "", "id": "c"},
{"typeGroup": "text", "data": "B", "id": "y"},
]
}
},
}
assert ec._joined_text_from_handover_payload(payload) == "A\n\nB"
def test_split_images_moves_pixels_to_blob_docs():
raw = b"fake-binary-image"
b64 = base64.b64encode(raw).decode("ascii")
payload = {
"kind": ec.HANDOVER_KIND,
"schemaVersion": 1,
"fileOrder": ["f1"],
"files": {
"f1": {
"parts": [
{"typeGroup": "text", "data": "x", "id": "t1"},
{
"typeGroup": "image",
"mimeType": "image/png",
"data": b64,
"id": "p1-img",
"metadata": {},
},
]
}
},
}
stripped, blobs = ec._split_images_to_sidecar_documents(payload, document_name_stem="abc")
assert len(blobs) == 1
assert blobs[0].mimeType == "image/png"
assert blobs[0].documentData == raw
assert blobs[0].documentName.endswith(".png")
assert blobs[0].documentName.startswith("extract_media_")
meta = blobs[0].validationMetadata or {}
assert meta.get("handoverRole") == "extractedMedia"
img_parts = [
p
for p in stripped["files"]["f1"]["parts"]
if isinstance(p, dict) and (p.get("typeGroup") or "") == "image"
]
assert len(img_parts) == 1
assert img_parts[0]["data"] == ""
assert img_parts[0]["handoverMediaDocumentName"] == blobs[0].documentName
assert "image" in stripped["files"]["f1"]["byTypeGroup"]

View file

@ -21,8 +21,11 @@ def test_context_extractContent_node_shape():
assert node["meta"]["usesAi"] is False assert node["meta"]["usesAi"] is False
assert node["_method"] == "context" assert node["_method"] == "context"
assert node["_action"] == "extractContent" assert node["_action"] == "extractContent"
assert node["outputPorts"][0]["schema"] == "UdmDocument" assert node["outputPorts"][0]["schema"] == "ActionResult"
assert "DocumentList" in node["inputPorts"][0]["accepts"] assert "DocumentList" in node["inputPorts"][0]["accepts"]
assert "LoopItem" in node["inputPorts"][0]["accepts"]
names = [p["name"] for p in node["parameters"]]
assert names == ["documentList"]
def test_udm_port_types_registered(): def test_udm_port_types_registered():