node handover standartisiert, kein hardcoden mehr, inhalt extraktion node verbessert, output ports vereinheitlicht mit user im blick

This commit is contained in:
Ida 2026-05-06 12:50:49 +02:00
parent 6e3da0d0d8
commit 93aff13d26
25 changed files with 1579 additions and 465 deletions

View file

@ -3,6 +3,127 @@
from modules.shared.i18nRegistry import t
from modules.features.graphicalEditor.nodeDefinitions.contextPickerHelp import (
CONTEXT_BUILDER_PARAM_DESCRIPTION,
)
# Shared authoritative DataPicker paths (same handover idea as ``context.extractContent`` outputPorts).
ACTION_RESULT_DATA_PICK_OPTIONS = [
{
"path": ["documents", 0, "documentData"],
"pickerLabel": t("Gesamter Inhalt"),
"detail": t(
"Strukturiertes Handover als JSON inklusive aller Textteile "
"und Verweisen auf ausgelagerte Bilder."
),
"recommended": True,
"type": "Any",
},
{
"path": ["response"],
"pickerLabel": t("Nur Text"),
"detail": t("Verketteter Klartext aus allen erkannten Textteilen."),
"recommended": True,
"type": "str",
},
{
"path": ["imageDocumentsOnly"],
"pickerLabel": t("Nur Bilder"),
"detail": t("Nur die extrahierten Bilddokumente als Liste, ohne JSON-Handover."),
"recommended": False,
"type": "List[ActionDocument]",
},
{
"path": ["documents"],
"pickerLabel": t("Alle Dateitypen"),
"detail": t("Alle Ausgabedokumente nacheinander: JSON-Handover und Bilder."),
"recommended": False,
"type": "List[ActionDocument]",
},
]
AI_RESULT_DATA_PICK_OPTIONS = [
{
"path": ["documents", 0, "documentData"],
"pickerLabel": t("Gesamter Inhalt"),
"detail": t(
"Hauptausgabedatei oder strukturierter Inhalt von ``documents[0]`` "
"(z. B. erzeugtes Dokument, JSON-Handover)."
),
"recommended": True,
"type": "Any",
},
{
"path": ["response"],
"pickerLabel": t("Nur Text"),
"detail": t("Modell-Antwort als reiner Fließtext (ohne eingebettete Bildbytes)."),
"recommended": True,
"type": "str",
},
{
"path": ["imageDocumentsOnly"],
"pickerLabel": t("Nur Bilder"),
"detail": t("Nur Bild-Dokumente aus ``documents`` (ohne erstes Nicht-Bild-Artefakt, falls gesetzt)."),
"recommended": False,
"type": "List[ActionDocument]",
},
{
"path": ["documents"],
"pickerLabel": t("Alle Ausgabedateien"),
"detail": t("Alle Dokumente der KI-Antwort: erzeugte Dateien, Bilder, Anhänge."),
"recommended": False,
"type": "List[Document]",
},
]
DOCUMENT_LIST_DATA_PICK_OPTIONS = [
{
"path": ["documents"],
"pickerLabel": t("Alle Dokumente"),
"detail": t("Die vollständige Dokumentenliste."),
"recommended": True,
"type": "List[Document]",
},
{
"path": ["documents", 0],
"pickerLabel": t("Erstes Dokument"),
"detail": t("Metadaten und Pfade des ersten Listeneintrags."),
"recommended": False,
"type": "Document",
},
{
"path": ["count"],
"pickerLabel": t("Anzahl"),
"detail": t("Anzahl der Dokumente."),
"recommended": False,
"type": "int",
},
]
CONSOLIDATE_RESULT_DATA_PICK_OPTIONS = [
{
"path": ["result"],
"pickerLabel": t("Konsolidiertes Ergebnis"),
"detail": t("Text oder Struktur nach Konsolidierung."),
"recommended": True,
"type": "Any",
},
{
"path": ["mode"],
"pickerLabel": t("Modus"),
"detail": t("Verwendeter Konsolidierungsmodus."),
"recommended": False,
"type": "str",
},
{
"path": ["count"],
"pickerLabel": t("Anzahl"),
"detail": t("Anzahl zusammengeführter Elemente."),
"recommended": False,
"type": "int",
},
]
_AI_COMMON_PARAMS = [
{"name": "requireNeutralization", "type": "bool", "required": False,
"frontendType": "checkbox", "default": False,
@ -28,7 +149,7 @@ AI_NODES = [
"description": t("Dokumente aus vorherigen Schritten"), "default": "",
"graphInherit": {"port": 0, "kind": "documentListWire"}},
{"name": "context", "type": "Any", "required": False, "frontendType": "contextBuilder",
"description": t("Daten aus vorherigen Schritten"), "default": "",
"description": CONTEXT_BUILDER_PARAM_DESCRIPTION, "default": "",
"graphInherit": {"port": 0, "kind": "primaryTextRef"}},
{"name": "simpleMode", "type": "bool", "required": False, "frontendType": "checkbox",
"description": t("Einfacher Modus"), "default": True},
@ -38,7 +159,8 @@ AI_NODES = [
"inputPorts": {0: {"accepts": [
"FormPayload", "DocumentList", "AiResult", "TextResult", "Transit", "LoopItem", "ActionResult",
]}},
"outputPorts": {0: {"schema": "AiResult"}},
"outputPorts": {0: {"schema": "AiResult", "dataPickOptions": AI_RESULT_DATA_PICK_OPTIONS}},
"paramMappers": ["aiPromptLegacyAlias"],
"meta": {"icon": "mdi-robot", "color": "#9C27B0", "usesAi": True},
"_method": "ai",
"_action": "process",
@ -52,7 +174,7 @@ AI_NODES = [
{"name": "prompt", "type": "str", "required": True, "frontendType": "textarea",
"description": t("Recherche-Anfrage")},
{"name": "context", "type": "Any", "required": False, "frontendType": "contextBuilder",
"description": t("Daten aus vorherigen Schritten"), "default": "",
"description": CONTEXT_BUILDER_PARAM_DESCRIPTION, "default": "",
"graphInherit": {"port": 0, "kind": "primaryTextRef"}},
{"name": "documentList", "type": "DocumentList", "required": False, "frontendType": "hidden",
"description": t("Dokumente aus vorherigen Schritten"), "default": "",
@ -63,7 +185,7 @@ AI_NODES = [
"inputPorts": {0: {"accepts": [
"FormPayload", "Transit", "AiResult", "DocumentList", "ActionResult", "LoopItem", "TextResult",
]}},
"outputPorts": {0: {"schema": "AiResult"}},
"outputPorts": {0: {"schema": "AiResult", "dataPickOptions": AI_RESULT_DATA_PICK_OPTIONS}},
"meta": {"icon": "mdi-magnify", "color": "#9C27B0", "usesAi": True},
"_method": "ai",
"_action": "webResearch",
@ -90,7 +212,7 @@ AI_NODES = [
"inputs": 1,
"outputs": 1,
"inputPorts": {0: {"accepts": ["DocumentList", "Transit", "LoopItem"]}},
"outputPorts": {0: {"schema": "AiResult"}},
"outputPorts": {0: {"schema": "AiResult", "dataPickOptions": AI_RESULT_DATA_PICK_OPTIONS}},
"meta": {"icon": "mdi-file-document-outline", "color": "#9C27B0", "usesAi": True},
"_method": "ai",
"_action": "summarizeDocument",
@ -116,7 +238,7 @@ AI_NODES = [
"inputs": 1,
"outputs": 1,
"inputPorts": {0: {"accepts": ["DocumentList", "Transit", "LoopItem"]}},
"outputPorts": {0: {"schema": "AiResult"}},
"outputPorts": {0: {"schema": "AiResult", "dataPickOptions": AI_RESULT_DATA_PICK_OPTIONS}},
"meta": {"icon": "mdi-translate", "color": "#9C27B0", "usesAi": True},
"_method": "ai",
"_action": "translateDocument",
@ -140,7 +262,7 @@ AI_NODES = [
"inputs": 1,
"outputs": 1,
"inputPorts": {0: {"accepts": ["DocumentList", "Transit", "LoopItem"]}},
"outputPorts": {0: {"schema": "DocumentList"}},
"outputPorts": {0: {"schema": "DocumentList", "dataPickOptions": DOCUMENT_LIST_DATA_PICK_OPTIONS}},
"meta": {"icon": "mdi-file-convert", "color": "#9C27B0", "usesAi": True},
"_method": "ai",
"_action": "convertDocument",
@ -165,7 +287,7 @@ AI_NODES = [
"description": t("Zielordner in Meine Dateien"),
"default": ""},
{"name": "context", "type": "Any", "required": False, "frontendType": "contextBuilder",
"description": t("Daten aus vorherigen Schritten"), "default": "",
"description": CONTEXT_BUILDER_PARAM_DESCRIPTION, "default": "",
"graphInherit": {"port": 0, "kind": "primaryTextRef"}},
{"name": "documentList", "type": "DocumentList", "required": False, "frontendType": "hidden",
"description": t("Dokumente aus vorherigen Schritten"), "default": "",
@ -176,7 +298,7 @@ AI_NODES = [
"inputPorts": {0: {"accepts": [
"FormPayload", "Transit", "AiResult", "DocumentList", "ActionResult", "LoopItem", "TextResult",
]}},
"outputPorts": {0: {"schema": "DocumentList"}},
"outputPorts": {0: {"schema": "DocumentList", "dataPickOptions": DOCUMENT_LIST_DATA_PICK_OPTIONS}},
"meta": {"icon": "mdi-file-plus", "color": "#9C27B0", "usesAi": True},
"_method": "ai",
"_action": "generateDocument",
@ -196,7 +318,7 @@ AI_NODES = [
"description": t("Zielordner in Meine Dateien"),
"default": ""},
{"name": "context", "type": "Any", "required": False, "frontendType": "contextBuilder",
"description": t("Daten aus vorherigen Schritten"), "default": "",
"description": CONTEXT_BUILDER_PARAM_DESCRIPTION, "default": "",
"graphInherit": {"port": 0, "kind": "primaryTextRef"}},
{"name": "documentList", "type": "DocumentList", "required": False, "frontendType": "hidden",
"description": t("Dokumente aus vorherigen Schritten"), "default": "",
@ -207,7 +329,7 @@ AI_NODES = [
"inputPorts": {0: {"accepts": [
"FormPayload", "Transit", "AiResult", "DocumentList", "ActionResult", "LoopItem", "TextResult",
]}},
"outputPorts": {0: {"schema": "AiResult"}},
"outputPorts": {0: {"schema": "AiResult", "dataPickOptions": AI_RESULT_DATA_PICK_OPTIONS}},
"meta": {"icon": "mdi-code-tags", "color": "#9C27B0", "usesAi": True},
"_method": "ai",
"_action": "generateCode",
@ -227,7 +349,7 @@ AI_NODES = [
"inputs": 1,
"outputs": 1,
"inputPorts": {0: {"accepts": ["AggregateResult", "Transit"]}},
"outputPorts": {0: {"schema": "ConsolidateResult"}},
"outputPorts": {0: {"schema": "ConsolidateResult", "dataPickOptions": CONSOLIDATE_RESULT_DATA_PICK_OPTIONS}},
"meta": {"icon": "mdi-table-merge-cells", "color": "#9C27B0", "usesAi": True},
"_method": "ai",
"_action": "consolidate",

View file

@ -4,6 +4,63 @@
from modules.shared.i18nRegistry import t
from modules.features.graphicalEditor.nodeDefinitions.ai import ACTION_RESULT_DATA_PICK_OPTIONS
TASK_LIST_DATA_PICK_OPTIONS = [
{
"path": ["tasks"],
"pickerLabel": t("Alle Aufgaben"),
"detail": t("Vollständige Aufgabenliste."),
"recommended": True,
"type": "List[TaskItem]",
},
{
"path": ["tasks", 0],
"pickerLabel": t("Erste Aufgabe"),
"detail": t("Erstes Listenelement."),
"recommended": False,
"type": "TaskItem",
},
{
"path": ["count"],
"pickerLabel": t("Anzahl"),
"detail": t("Anzahl der Aufgaben."),
"recommended": False,
"type": "int",
},
{
"path": ["listId"],
"pickerLabel": t("Listen-ID"),
"detail": t("ClickUp-Listen-Kontext, falls gesetzt."),
"recommended": False,
"type": "str",
},
]
TASK_RESULT_DATA_PICK_OPTIONS = [
{
"path": ["success"],
"pickerLabel": t("Erfolg"),
"detail": t("Ob der API-Aufruf erfolgreich war."),
"recommended": True,
"type": "bool",
},
{
"path": ["taskId"],
"pickerLabel": t("Aufgaben-ID"),
"detail": t("ID der betroffenen Aufgabe."),
"recommended": True,
"type": "str",
},
{
"path": ["task"],
"pickerLabel": t("Aufgabendaten"),
"detail": t("Vollständiges Task-Objekt (Dict)."),
"recommended": True,
"type": "Dict",
},
]
CLICKUP_NODES = [
{
"id": "clickup.searchTasks",
@ -33,7 +90,7 @@ CLICKUP_NODES = [
"inputs": 1,
"outputs": 1,
"inputPorts": {0: {"accepts": ["Transit"]}},
"outputPorts": {0: {"schema": "TaskList"}},
"outputPorts": {0: {"schema": "TaskList", "dataPickOptions": TASK_LIST_DATA_PICK_OPTIONS}},
"meta": {"icon": "mdi-magnify", "color": "#7B68EE", "usesAi": False},
"_method": "clickup",
"_action": "searchTasks",
@ -58,7 +115,7 @@ CLICKUP_NODES = [
"inputs": 1,
"outputs": 1,
"inputPorts": {0: {"accepts": ["Transit"]}},
"outputPorts": {0: {"schema": "TaskList"}},
"outputPorts": {0: {"schema": "TaskList", "dataPickOptions": TASK_LIST_DATA_PICK_OPTIONS}},
"meta": {"icon": "mdi-format-list-bulleted", "color": "#7B68EE", "usesAi": False},
"_method": "clickup",
"_action": "listTasks",
@ -80,7 +137,7 @@ CLICKUP_NODES = [
"inputs": 1,
"outputs": 1,
"inputPorts": {0: {"accepts": ["Transit"]}},
"outputPorts": {0: {"schema": "TaskResult"}},
"outputPorts": {0: {"schema": "TaskResult", "dataPickOptions": TASK_RESULT_DATA_PICK_OPTIONS}},
"meta": {"icon": "mdi-file-document-outline", "color": "#7B68EE", "usesAi": False},
"_method": "clickup",
"_action": "getTask",
@ -124,7 +181,7 @@ CLICKUP_NODES = [
"inputs": 1,
"outputs": 1,
"inputPorts": {0: {"accepts": ["Transit"]}},
"outputPorts": {0: {"schema": "TaskResult"}},
"outputPorts": {0: {"schema": "TaskResult", "dataPickOptions": TASK_RESULT_DATA_PICK_OPTIONS}},
"meta": {"icon": "mdi-plus-circle-outline", "color": "#7B68EE", "usesAi": False},
"_method": "clickup",
"_action": "createTask",
@ -148,7 +205,8 @@ CLICKUP_NODES = [
"inputs": 1,
"outputs": 1,
"inputPorts": {0: {"accepts": ["TaskResult", "Transit"]}},
"outputPorts": {0: {"schema": "TaskResult"}},
"outputPorts": {0: {"schema": "TaskResult", "dataPickOptions": TASK_RESULT_DATA_PICK_OPTIONS}},
"paramMappers": ["clickupTaskUpdateMerge"],
"meta": {"icon": "mdi-pencil-outline", "color": "#7B68EE", "usesAi": False},
"_method": "clickup",
"_action": "updateTask",
@ -174,7 +232,7 @@ CLICKUP_NODES = [
"inputs": 1,
"outputs": 1,
"inputPorts": {0: {"accepts": ["DocumentList", "Transit"]}},
"outputPorts": {0: {"schema": "ActionResult"}},
"outputPorts": {0: {"schema": "ActionResult", "dataPickOptions": ACTION_RESULT_DATA_PICK_OPTIONS}},
"meta": {"icon": "mdi-attachment", "color": "#7B68EE", "usesAi": False},
"_method": "clickup",
"_action": "uploadAttachment",

View file

@ -8,21 +8,66 @@ CONTEXT_NODES = [
"id": "context.extractContent",
"category": "context",
"label": t("Inhalt extrahieren"),
"description": t("Dokumentstruktur extrahieren ohne KI (Seiten, Abschnitte, Bilder, Tabellen)"),
"description": t(
"Extrahiert Inhalt ohne KI. Ergebnis einheitlich wie KI-Schritte: `response` "
"(gesammelter Klartext), strukturierte JSON-Unterlage in `documents[0]`, "
"einzelne Bilder als eigene Dokumente `extract_media_*` (nur im Workflow, ohne Eintrag unter „Meine Dateien“) — "
"Auswahl im Daten-Picker wie bei `ai.process`."
),
"parameters": [
{"name": "documentList", "type": "str", "required": True, "frontendType": "hidden",
"description": t("Dokumentenliste (via Wire oder DataRef)"), "default": "",
"graphInherit": {"port": 0, "kind": "documentListWire"}},
{"name": "extractionOptions", "type": "object", "required": False, "frontendType": "json",
"description": t(
"Extraktions-Optionen (JSON), z.B. {\"includeImages\": true, \"includeTables\": true, "
"\"outputDetail\": \"full\"}"),
"default": {}},
],
"inputs": 1,
"outputs": 1,
"inputPorts": {0: {"accepts": ["DocumentList", "Transit"]}},
"outputPorts": {0: {"schema": "UdmDocument"}},
"inputPorts": {0: {"accepts": ["DocumentList", "Transit", "LoopItem"]}},
"outputPorts": {
0: {
"schema": "ActionResult",
# Authoritative DataPicker paths (same idea as ``parameters`` for configuration).
# Frontend uses only this list — no schema expansion merge for this port.
"dataPickOptions": [
{
"path": ["documents", 0, "documentData"],
"pickerLabel": t("Gesamter Inhalt"),
"detail": t(
"Strukturiertes Handover als JSON inklusive aller Textteile "
"und Verweisen auf ausgelagerte Bilder."
),
"recommended": True,
"type": "Any",
},
{
"path": ["response"],
"pickerLabel": t("Nur Text"),
"detail": t(
"Verketteter Klartext aus allen erkannten Textteilen."
),
"recommended": True,
"type": "str",
},
{
"path": ["imageDocumentsOnly"],
"pickerLabel": t("Nur Bilder"),
"detail": t(
"Nur die extrahierten Bilddokumente als Liste, ohne JSON-Handover."
),
"recommended": False,
"type": "List[ActionDocument]",
},
{
"path": ["documents"],
"pickerLabel": t("Alle Dateitypen"),
"detail": t(
"Alle Ausgabedokumente nacheinander: JSON-Handover und Bilder."
),
"recommended": False,
"type": "List[ActionDocument]",
},
],
}
},
"meta": {"icon": "mdi-file-tree-outline", "color": "#00897B", "usesAi": False},
"_method": "context",
"_action": "extractContent",

View file

@ -0,0 +1,22 @@
# Copyright (c) 2025 Patrick Motsch
# Shared parameter copy for ``contextBuilder`` fields (upstream data pick).
from modules.shared.i18nRegistry import t
CONTEXT_BUILDER_PARAM_DESCRIPTION = t(
"Inhalt aus vorherigen Schritten wählen (DataRef / Daten-Picker): z. B. „response“ für Klartext, "
"Handover-Pfade für strukturiertes JSON oder Medienlisten. "
"Die Auflösung erfolgt vollständig serverseitig (`resolveParameterReferences`). "
"Formular-Schritte speichern Antworten unter „payload“ — fehlt ein gewählter Pfad am Root, "
"wird derselbe Pfad automatisch unter „payload“ nachgeschlagen (Kompatibilität mit älteren "
"und neuen Picker-Pfaden). "
"In Freitext-/Template-Feldern werden weiterhin Platzhalter `{{KnotenId.feld.b.z.}}` ersetzt "
"(gleiche Semantik inkl. optionalem Nachschlagen unter „payload“)."
)
# Kurzreferenz für Node-Beschreibungen (optional einbinden): dieselbe Auflösungslogik
# wie bei DataRefs — kein separates Variablen-Subsystem.
REF_AND_TEMPLATE_COMPATIBILITY_SUMMARY = t(
"Verweise: typisierte DataRefs im Parameter; Zeichenketten-Templates mit {{…}}; "
"Formular-Felder unter output.payload."
)

View file

@ -3,6 +3,25 @@
from modules.shared.i18nRegistry import t
from modules.features.graphicalEditor.nodeDefinitions.ai import CONSOLIDATE_RESULT_DATA_PICK_OPTIONS
AGGREGATE_RESULT_DATA_PICK_OPTIONS = [
{
"path": ["items"],
"pickerLabel": t("Gesammelte Elemente"),
"detail": t("Alle aus der Schleife gesammelten Werte."),
"recommended": True,
"type": "List[Any]",
},
{
"path": ["count"],
"pickerLabel": t("Anzahl"),
"detail": t("Anzahl gesammelter Elemente."),
"recommended": False,
"type": "int",
},
]
DATA_NODES = [
{
"id": "data.aggregate",
@ -17,7 +36,7 @@ DATA_NODES = [
"inputs": 1,
"outputs": 1,
"inputPorts": {0: {"accepts": ["Transit", "AiResult", "LoopItem"]}},
"outputPorts": {0: {"schema": "AggregateResult"}},
"outputPorts": {0: {"schema": "AggregateResult", "dataPickOptions": AGGREGATE_RESULT_DATA_PICK_OPTIONS}},
"executor": "data",
"meta": {"icon": "mdi-playlist-plus", "color": "#607D8B", "usesAi": False},
},
@ -55,7 +74,7 @@ DATA_NODES = [
"inputs": 1,
"outputs": 1,
"inputPorts": {0: {"accepts": ["AggregateResult", "Transit"]}},
"outputPorts": {0: {"schema": "ConsolidateResult"}},
"outputPorts": {0: {"schema": "ConsolidateResult", "dataPickOptions": CONSOLIDATE_RESULT_DATA_PICK_OPTIONS}},
"executor": "data",
"meta": {"icon": "mdi-table-merge-cells", "color": "#607D8B", "usesAi": False},
},

View file

@ -3,6 +3,35 @@
from modules.shared.i18nRegistry import t
from modules.features.graphicalEditor.nodeDefinitions.contextPickerHelp import (
CONTEXT_BUILDER_PARAM_DESCRIPTION,
)
from modules.features.graphicalEditor.nodeDefinitions.ai import ACTION_RESULT_DATA_PICK_OPTIONS
EMAIL_LIST_DATA_PICK_OPTIONS = [
{
"path": ["emails"],
"pickerLabel": t("Alle E-Mails"),
"detail": t("Die vollständige E-Mail-Liste des Schritts."),
"recommended": True,
"type": "List[EmailItem]",
},
{
"path": ["emails", 0],
"pickerLabel": t("Erste E-Mail"),
"detail": t("Das erste Element der Liste."),
"recommended": False,
"type": "EmailItem",
},
{
"path": ["count"],
"pickerLabel": t("Anzahl"),
"detail": t("Anzahl gefundener E-Mails."),
"recommended": False,
"type": "int",
},
]
EMAIL_NODES = [
{
"id": "email.checkEmail",
@ -23,7 +52,8 @@ EMAIL_NODES = [
"inputs": 1,
"outputs": 1,
"inputPorts": {0: {"accepts": ["Transit"]}},
"outputPorts": {0: {"schema": "EmailList"}},
"outputPorts": {0: {"schema": "EmailList", "dataPickOptions": EMAIL_LIST_DATA_PICK_OPTIONS}},
"paramMappers": ["emailCheckFilter"],
"meta": {"icon": "mdi-email-check", "color": "#1976D2", "usesAi": False},
"_method": "outlook",
"_action": "readEmails",
@ -47,7 +77,8 @@ EMAIL_NODES = [
"inputs": 1,
"outputs": 1,
"inputPorts": {0: {"accepts": ["Transit"]}},
"outputPorts": {0: {"schema": "EmailList"}},
"outputPorts": {0: {"schema": "EmailList", "dataPickOptions": EMAIL_LIST_DATA_PICK_OPTIONS}},
"paramMappers": ["emailSearchQuery"],
"meta": {"icon": "mdi-email-search", "color": "#1976D2", "usesAi": False},
"_method": "outlook",
"_action": "searchEmails",
@ -63,7 +94,7 @@ EMAIL_NODES = [
"frontendOptions": {"authority": "msft"},
"description": t("E-Mail-Konto")},
{"name": "context", "type": "Any", "required": False, "frontendType": "templateTextarea",
"description": t("Daten aus vorherigen Schritten (oder direkte Beschreibung)"), "default": "",
"description": CONTEXT_BUILDER_PARAM_DESCRIPTION, "default": "",
"graphInherit": {"port": 0, "kind": "primaryTextRef"}},
{"name": "to", "type": "str", "required": False, "frontendType": "text",
"description": t("Empfänger (komma-separiert, optional für Entwurf)"), "default": ""},
@ -80,7 +111,8 @@ EMAIL_NODES = [
"inputs": 1,
"outputs": 1,
"inputPorts": {0: {"accepts": ["EmailDraft", "AiResult", "Transit", "ConsolidateResult", "DocumentList"]}},
"outputPorts": {0: {"schema": "ActionResult"}},
"outputPorts": {0: {"schema": "ActionResult", "dataPickOptions": ACTION_RESULT_DATA_PICK_OPTIONS}},
"paramMappers": ["emailDraftContextFromSubjectBody"],
"meta": {"icon": "mdi-email-edit", "color": "#1976D2", "usesAi": False},
"_method": "outlook",
"_action": "composeAndDraftEmailWithContext",

View file

@ -3,12 +3,21 @@
from modules.shared.i18nRegistry import t
from modules.features.graphicalEditor.nodeDefinitions.contextPickerHelp import (
CONTEXT_BUILDER_PARAM_DESCRIPTION,
)
from modules.features.graphicalEditor.nodeDefinitions.ai import DOCUMENT_LIST_DATA_PICK_OPTIONS
FILE_NODES = [
{
"id": "file.create",
"category": "file",
"label": t("Datei erstellen"),
"description": t("Erstellt eine Datei aus Kontext (Text/Markdown von KI)."),
"description": t(
"Erstellt eine Datei aus Kontext. Nach „Inhalt extrahieren“: „response“ für reinen Text; "
"„Nur Bilder“ liefert alle extrahierten Bilder — Datei erstellen fasst sie zu einer PDF oder DOCX "
"(Ausgabeformat pdf oder docx wählen)."
),
"parameters": [
{"name": "outputFormat", "type": "str", "required": True, "frontendType": "select",
"frontendOptions": {"options": ["docx", "pdf", "txt", "html", "md"]},
@ -19,13 +28,13 @@ FILE_NODES = [
"description": t("Zielordner in Meine Dateien"),
"default": ""},
{"name": "context", "type": "Any", "required": False, "frontendType": "contextBuilder",
"description": t("Daten aus vorherigen Schritten"), "default": "",
"description": CONTEXT_BUILDER_PARAM_DESCRIPTION, "default": "",
"graphInherit": {"port": 0, "kind": "primaryTextRef"}},
],
"inputs": 1,
"outputs": 1,
"inputPorts": {0: {"accepts": ["AiResult", "TextResult", "Transit", "FormPayload", "LoopItem", "ActionResult"]}},
"outputPorts": {0: {"schema": "DocumentList"}},
"outputPorts": {0: {"schema": "DocumentList", "dataPickOptions": DOCUMENT_LIST_DATA_PICK_OPTIONS}},
"meta": {"icon": "mdi-file-plus-outline", "color": "#2196F3", "usesAi": False},
"_method": "file",
"_action": "create",

View file

@ -3,6 +3,61 @@
from modules.shared.i18nRegistry import t
LOOP_ITEM_DATA_PICK_OPTIONS = [
{
"path": ["currentItem"],
"pickerLabel": t("Aktuelles Element"),
"detail": t("Das aktuelle Iterationselement."),
"recommended": True,
"type": "Any",
},
{
"path": ["currentIndex"],
"pickerLabel": t("Aktueller Index"),
"detail": t("0-basierter Index der aktuellen Iteration."),
"recommended": False,
"type": "int",
},
{
"path": ["items"],
"pickerLabel": t("Alle Elemente"),
"detail": t("Die vollständige Quellliste."),
"recommended": False,
"type": "List[Any]",
},
{
"path": ["count"],
"pickerLabel": t("Gesamtanzahl"),
"detail": t("Anzahl der Elemente in der Schleife."),
"recommended": False,
"type": "int",
},
]
MERGE_RESULT_DATA_PICK_OPTIONS = [
{
"path": ["merged"],
"pickerLabel": t("Zusammengeführt"),
"detail": t("Zusammengeführtes Ergebnis (je nach Modus)."),
"recommended": True,
"type": "Dict",
},
{
"path": ["first"],
"pickerLabel": t("Erster Zweig"),
"detail": t("Daten vom ersten verbundenen Eingang (Modus „first“)."),
"recommended": False,
"type": "Any",
},
{
"path": ["inputs"],
"pickerLabel": t("Alle Eingänge"),
"detail": t("Dict der Eingabeobjekte nach Port-Index."),
"recommended": False,
"type": "Dict[int,Any]",
},
]
# Ports, die typische Schritt-Ausgaben durchreichen (nicht nur leerer Transit).
_FLOW_INPUT_SCHEMAS = [
"Transit",
@ -119,7 +174,7 @@ FLOW_NODES = [
"Transit", "UdmDocument", "EmailList", "DocumentList", "FileList", "TaskList",
"ActionResult", "AiResult", "QueryResult", "FormPayload",
]}},
"outputPorts": {0: {"schema": "LoopItem"}},
"outputPorts": {0: {"schema": "LoopItem", "dataPickOptions": LOOP_ITEM_DATA_PICK_OPTIONS}},
"executor": "flow",
"meta": {"icon": "mdi-repeat", "color": "#FF9800", "usesAi": False},
},
@ -157,7 +212,7 @@ FLOW_NODES = [
0: {"accepts": list(_FLOW_INPUT_SCHEMAS)},
1: {"accepts": list(_FLOW_INPUT_SCHEMAS)},
},
"outputPorts": {0: {"schema": "MergeResult"}},
"outputPorts": {0: {"schema": "MergeResult", "dataPickOptions": MERGE_RESULT_DATA_PICK_OPTIONS}},
"executor": "flow",
"meta": {"icon": "mdi-call-merge", "color": "#FF9800", "usesAi": False},
},

View file

@ -3,6 +3,35 @@
from modules.shared.i18nRegistry import t
from modules.features.graphicalEditor.nodeDefinitions.ai import DOCUMENT_LIST_DATA_PICK_OPTIONS
BOOL_RESULT_DATA_PICK_OPTIONS = [
{
"path": ["result"],
"pickerLabel": t("Ergebnis"),
"detail": t("Boolesches Ergebnis (z. B. Genehmigung ja/nein)."),
"recommended": True,
"type": "bool",
},
{
"path": ["reason"],
"pickerLabel": t("Begründung"),
"detail": t("Optionale textuelle Begründung."),
"recommended": False,
"type": "str",
},
]
TEXT_RESULT_DATA_PICK_OPTIONS = [
{
"path": ["text"],
"pickerLabel": t("Text"),
"detail": t("Vom Benutzer eingegebener oder gewählter Text."),
"recommended": True,
"type": "str",
},
]
# Canonical form field types — single source of truth.
# portType maps to the PORT_TYPE_CATALOG primitive used by DataPicker / validateGraph.
FORM_FIELD_TYPES = [
@ -55,7 +84,7 @@ INPUT_NODES = [
"inputs": 1,
"outputs": 1,
"inputPorts": {0: {"accepts": ["Transit"]}},
"outputPorts": {0: {"schema": "BoolResult"}},
"outputPorts": {0: {"schema": "BoolResult", "dataPickOptions": BOOL_RESULT_DATA_PICK_OPTIONS}},
"executor": "input",
"meta": {"icon": "mdi-check-decagram", "color": "#4CAF50", "usesAi": False},
},
@ -78,7 +107,7 @@ INPUT_NODES = [
"inputs": 1,
"outputs": 1,
"inputPorts": {0: {"accepts": ["Transit"]}},
"outputPorts": {0: {"schema": "DocumentList"}},
"outputPorts": {0: {"schema": "DocumentList", "dataPickOptions": DOCUMENT_LIST_DATA_PICK_OPTIONS}},
"executor": "input",
"meta": {"icon": "mdi-upload", "color": "#2196F3", "usesAi": False},
},
@ -96,7 +125,7 @@ INPUT_NODES = [
"inputs": 1,
"outputs": 1,
"inputPorts": {0: {"accepts": ["Transit"]}},
"outputPorts": {0: {"schema": "TextResult"}},
"outputPorts": {0: {"schema": "TextResult", "dataPickOptions": TEXT_RESULT_DATA_PICK_OPTIONS}},
"executor": "input",
"meta": {"icon": "mdi-comment-text", "color": "#FF9800", "usesAi": False},
},
@ -115,7 +144,7 @@ INPUT_NODES = [
"inputs": 1,
"outputs": 1,
"inputPorts": {0: {"accepts": ["Transit"]}},
"outputPorts": {0: {"schema": "BoolResult"}},
"outputPorts": {0: {"schema": "BoolResult", "dataPickOptions": BOOL_RESULT_DATA_PICK_OPTIONS}},
"executor": "input",
"meta": {"icon": "mdi-magnify-scan", "color": "#673AB7", "usesAi": False},
},
@ -133,7 +162,7 @@ INPUT_NODES = [
"inputs": 1,
"outputs": 1,
"inputPorts": {0: {"accepts": ["Transit"]}},
"outputPorts": {0: {"schema": "TextResult"}},
"outputPorts": {0: {"schema": "TextResult", "dataPickOptions": TEXT_RESULT_DATA_PICK_OPTIONS}},
"executor": "input",
"meta": {"icon": "mdi-format-list-checks", "color": "#009688", "usesAi": False},
},
@ -153,7 +182,7 @@ INPUT_NODES = [
"inputs": 1,
"outputs": 1,
"inputPorts": {0: {"accepts": ["Transit"]}},
"outputPorts": {0: {"schema": "BoolResult"}},
"outputPorts": {0: {"schema": "BoolResult", "dataPickOptions": BOOL_RESULT_DATA_PICK_OPTIONS}},
"executor": "input",
"meta": {"icon": "mdi-checkbox-marked-circle", "color": "#8BC34A", "usesAi": False},
},

View file

@ -4,6 +4,8 @@
from modules.shared.i18nRegistry import t
from modules.features.graphicalEditor.nodeDefinitions.ai import ACTION_RESULT_DATA_PICK_OPTIONS
# Typed FeatureInstance binding (replaces legacy `string, hidden`).
# - type FeatureInstanceRef[redmine] is filtered by the DataPicker.
# - frontendType "featureInstance" is rendered by FeatureInstancePicker which
@ -31,7 +33,7 @@ REDMINE_NODES = [
"inputs": 1,
"outputs": 1,
"inputPorts": {0: {"accepts": ["Transit"]}},
"outputPorts": {0: {"schema": "ActionResult"}},
"outputPorts": {0: {"schema": "ActionResult", "dataPickOptions": ACTION_RESULT_DATA_PICK_OPTIONS}},
"meta": {"icon": "mdi-ticket-outline", "color": "#4A6FA5", "usesAi": False},
"_method": "redmine",
"_action": "readTicket",
@ -59,7 +61,7 @@ REDMINE_NODES = [
"inputs": 1,
"outputs": 1,
"inputPorts": {0: {"accepts": ["Transit"]}},
"outputPorts": {0: {"schema": "ActionResult"}},
"outputPorts": {0: {"schema": "ActionResult", "dataPickOptions": ACTION_RESULT_DATA_PICK_OPTIONS}},
"meta": {"icon": "mdi-format-list-bulleted", "color": "#4A6FA5", "usesAi": False},
"_method": "redmine",
"_action": "listTickets",
@ -91,7 +93,7 @@ REDMINE_NODES = [
"inputs": 1,
"outputs": 1,
"inputPorts": {0: {"accepts": ["Transit"]}},
"outputPorts": {0: {"schema": "ActionResult"}},
"outputPorts": {0: {"schema": "ActionResult", "dataPickOptions": ACTION_RESULT_DATA_PICK_OPTIONS}},
"meta": {"icon": "mdi-ticket-plus-outline", "color": "#4A6FA5", "usesAi": False},
"_method": "redmine",
"_action": "createTicket",
@ -127,7 +129,7 @@ REDMINE_NODES = [
"inputs": 1,
"outputs": 1,
"inputPorts": {0: {"accepts": ["Transit"]}},
"outputPorts": {0: {"schema": "ActionResult"}},
"outputPorts": {0: {"schema": "ActionResult", "dataPickOptions": ACTION_RESULT_DATA_PICK_OPTIONS}},
"meta": {"icon": "mdi-ticket-confirmation-outline", "color": "#4A6FA5", "usesAi": False},
"_method": "redmine",
"_action": "updateTicket",
@ -151,7 +153,7 @@ REDMINE_NODES = [
"inputs": 1,
"outputs": 1,
"inputPorts": {0: {"accepts": ["Transit"]}},
"outputPorts": {0: {"schema": "ActionResult"}},
"outputPorts": {0: {"schema": "ActionResult", "dataPickOptions": ACTION_RESULT_DATA_PICK_OPTIONS}},
"meta": {"icon": "mdi-chart-bar", "color": "#4A6FA5", "usesAi": False},
"_method": "redmine",
"_action": "getStats",
@ -169,7 +171,7 @@ REDMINE_NODES = [
"inputs": 1,
"outputs": 1,
"inputPorts": {0: {"accepts": ["Transit"]}},
"outputPorts": {0: {"schema": "ActionResult"}},
"outputPorts": {0: {"schema": "ActionResult", "dataPickOptions": ACTION_RESULT_DATA_PICK_OPTIONS}},
"meta": {"icon": "mdi-database-sync", "color": "#4A6FA5", "usesAi": False},
"_method": "redmine",
"_action": "runSync",

View file

@ -3,6 +3,35 @@
from modules.shared.i18nRegistry import t
from modules.features.graphicalEditor.nodeDefinitions.ai import (
ACTION_RESULT_DATA_PICK_OPTIONS,
DOCUMENT_LIST_DATA_PICK_OPTIONS,
)
FILE_LIST_DATA_PICK_OPTIONS = [
{
"path": ["files"],
"pickerLabel": t("Alle Dateien"),
"detail": t("Die vollständige Dateiliste."),
"recommended": True,
"type": "List[FileItem]",
},
{
"path": ["files", 0],
"pickerLabel": t("Erste Datei"),
"detail": t("Das erste Listenelement."),
"recommended": False,
"type": "FileItem",
},
{
"path": ["count"],
"pickerLabel": t("Anzahl"),
"detail": t("Anzahl der Dateien."),
"recommended": False,
"type": "int",
},
]
SHAREPOINT_NODES = [
{
"id": "sharepoint.findFile",
@ -23,7 +52,7 @@ SHAREPOINT_NODES = [
"inputs": 1,
"outputs": 1,
"inputPorts": {0: {"accepts": ["Transit"]}},
"outputPorts": {0: {"schema": "FileList"}},
"outputPorts": {0: {"schema": "FileList", "dataPickOptions": FILE_LIST_DATA_PICK_OPTIONS}},
"meta": {"icon": "mdi-file-search", "color": "#0078D4", "usesAi": False},
"_method": "sharepoint",
"_action": "findDocumentPath",
@ -44,7 +73,7 @@ SHAREPOINT_NODES = [
"inputs": 1,
"outputs": 1,
"inputPorts": {0: {"accepts": ["FileList", "Transit", "LoopItem"]}},
"outputPorts": {0: {"schema": "DocumentList"}},
"outputPorts": {0: {"schema": "DocumentList", "dataPickOptions": DOCUMENT_LIST_DATA_PICK_OPTIONS}},
"meta": {"icon": "mdi-file-document", "color": "#0078D4", "usesAi": False},
"_method": "sharepoint",
"_action": "readDocuments",
@ -67,7 +96,7 @@ SHAREPOINT_NODES = [
"inputs": 1,
"outputs": 1,
"inputPorts": {0: {"accepts": ["DocumentList", "Transit"]}},
"outputPorts": {0: {"schema": "ActionResult"}},
"outputPorts": {0: {"schema": "ActionResult", "dataPickOptions": ACTION_RESULT_DATA_PICK_OPTIONS}},
"meta": {"icon": "mdi-upload", "color": "#0078D4", "usesAi": False},
"_method": "sharepoint",
"_action": "uploadFile",
@ -88,7 +117,7 @@ SHAREPOINT_NODES = [
"inputs": 1,
"outputs": 1,
"inputPorts": {0: {"accepts": ["Transit"]}},
"outputPorts": {0: {"schema": "FileList"}},
"outputPorts": {0: {"schema": "FileList", "dataPickOptions": FILE_LIST_DATA_PICK_OPTIONS}},
"meta": {"icon": "mdi-folder-open", "color": "#0078D4", "usesAi": False},
"_method": "sharepoint",
"_action": "listDocuments",
@ -109,7 +138,7 @@ SHAREPOINT_NODES = [
"inputs": 1,
"outputs": 1,
"inputPorts": {0: {"accepts": ["FileList", "Transit", "LoopItem"]}},
"outputPorts": {0: {"schema": "DocumentList"}},
"outputPorts": {0: {"schema": "DocumentList", "dataPickOptions": DOCUMENT_LIST_DATA_PICK_OPTIONS}},
"meta": {"icon": "mdi-download", "color": "#0078D4", "usesAi": False},
"_method": "sharepoint",
"_action": "downloadFileByPath",
@ -133,7 +162,7 @@ SHAREPOINT_NODES = [
"inputs": 1,
"outputs": 1,
"inputPorts": {0: {"accepts": ["Transit"]}},
"outputPorts": {0: {"schema": "ActionResult"}},
"outputPorts": {0: {"schema": "ActionResult", "dataPickOptions": ACTION_RESULT_DATA_PICK_OPTIONS}},
"meta": {"icon": "mdi-content-copy", "color": "#0078D4", "usesAi": False},
"_method": "sharepoint",
"_action": "copyFile",

View file

@ -3,6 +3,8 @@
from modules.shared.i18nRegistry import t
from modules.features.graphicalEditor.nodeDefinitions.ai import ACTION_RESULT_DATA_PICK_OPTIONS
TRIGGER_NODES = [
{
"id": "trigger.manual",
@ -13,7 +15,7 @@ TRIGGER_NODES = [
"inputs": 0,
"outputs": 1,
"inputPorts": {},
"outputPorts": {0: {"schema": "ActionResult"}},
"outputPorts": {0: {"schema": "ActionResult", "dataPickOptions": ACTION_RESULT_DATA_PICK_OPTIONS}},
"executor": "trigger",
"meta": {"icon": "mdi-play", "color": "#4CAF50", "usesAi": False},
},
@ -55,7 +57,7 @@ TRIGGER_NODES = [
"inputs": 0,
"outputs": 1,
"inputPorts": {},
"outputPorts": {0: {"schema": "ActionResult"}},
"outputPorts": {0: {"schema": "ActionResult", "dataPickOptions": ACTION_RESULT_DATA_PICK_OPTIONS}},
"executor": "trigger",
"meta": {"icon": "mdi-clock", "color": "#2196F3", "usesAi": False},
},

View file

@ -3,6 +3,8 @@
from modules.shared.i18nRegistry import t
from modules.features.graphicalEditor.nodeDefinitions.ai import ACTION_RESULT_DATA_PICK_OPTIONS
# Typed FeatureInstance binding (replaces legacy `string, hidden`).
# - type uses the discriminator notation `FeatureInstanceRef[<code>]` so the
# DataPicker / RequiredAttributePicker can filter compatible upstream paths.
@ -35,7 +37,7 @@ TRUSTEE_NODES = [
"inputs": 1,
"outputs": 1,
"inputPorts": {0: {"accepts": ["Transit"]}},
"outputPorts": {0: {"schema": "ActionResult"}},
"outputPorts": {0: {"schema": "ActionResult", "dataPickOptions": ACTION_RESULT_DATA_PICK_OPTIONS}},
"meta": {"icon": "mdi-database-refresh", "color": "#4CAF50", "usesAi": False},
"_method": "trustee",
"_action": "refreshAccountingData",
@ -62,7 +64,7 @@ TRUSTEE_NODES = [
# Runtime returns ActionResult.isSuccess(documents=[...]) — see
# actions/extractFromFiles.py. Declaring DocumentList here was adapter
# drift and broke the DataPicker for downstream nodes.
"outputPorts": {0: {"schema": "ActionResult"}},
"outputPorts": {0: {"schema": "ActionResult", "dataPickOptions": ACTION_RESULT_DATA_PICK_OPTIONS}},
"meta": {"icon": "mdi-file-document-scan", "color": "#4CAF50", "usesAi": True},
"_method": "trustee",
"_action": "extractFromFiles",
@ -84,7 +86,7 @@ TRUSTEE_NODES = [
"inputs": 1,
"outputs": 1,
"inputPorts": {0: {"accepts": ["ActionResult", "DocumentList", "Transit"]}},
"outputPorts": {0: {"schema": "ActionResult"}},
"outputPorts": {0: {"schema": "ActionResult", "dataPickOptions": ACTION_RESULT_DATA_PICK_OPTIONS}},
"meta": {"icon": "mdi-file-document-check", "color": "#4CAF50", "usesAi": False},
"_method": "trustee",
"_action": "processDocuments",
@ -103,7 +105,7 @@ TRUSTEE_NODES = [
"inputs": 1,
"outputs": 1,
"inputPorts": {0: {"accepts": ["ActionResult", "DocumentList", "Transit"]}},
"outputPorts": {0: {"schema": "ActionResult"}},
"outputPorts": {0: {"schema": "ActionResult", "dataPickOptions": ACTION_RESULT_DATA_PICK_OPTIONS}},
"meta": {"icon": "mdi-calculator", "color": "#4CAF50", "usesAi": False},
"_method": "trustee",
"_action": "syncToAccounting",
@ -140,7 +142,7 @@ TRUSTEE_NODES = [
"inputs": 1,
"outputs": 1,
"inputPorts": {0: {"accepts": ["Transit", "AiResult", "ConsolidateResult", "UdmDocument"]}},
"outputPorts": {0: {"schema": "ActionResult"}},
"outputPorts": {0: {"schema": "ActionResult", "dataPickOptions": ACTION_RESULT_DATA_PICK_OPTIONS}},
"meta": {"icon": "mdi-database-search", "color": "#4CAF50", "usesAi": False},
"_method": "trustee",
"_action": "queryData",

View file

@ -82,6 +82,34 @@ def _localizeNode(node: Dict[str, Any], language: str) -> Dict[str, Any]:
pc["description"] = resolveText(pd, lang)
params.append(pc)
out["parameters"] = params
out_ports: Dict[Any, Dict[str, Any]] = {}
for idx, po in (node.get("outputPorts") or {}).items():
if not isinstance(po, dict):
continue
port_copy = dict(po)
opts = port_copy.get("dataPickOptions")
if isinstance(opts, list):
loc_opts: List[Dict[str, Any]] = []
for o in opts:
if not isinstance(o, dict):
continue
oc = dict(o)
pl = oc.get("pickerLabel")
if pl is not None:
oc["pickerLabel"] = resolveText(pl, lang)
dt = oc.get("detail")
if dt is not None:
oc["detail"] = resolveText(dt, lang)
loc_opts.append(oc)
port_copy["dataPickOptions"] = loc_opts
out_ports[idx] = port_copy
if isinstance(node.get("outputPorts"), dict):
out["outputPorts"] = out_ports
# Legacy node-level key no longer used — do not expose.
out.pop("outputPickHints", None)
return out
@ -112,7 +140,7 @@ def getNodeTypesForApi(
for name, schema in PORT_TYPE_CATALOG.items():
catalogSerialized[name] = {
"name": schema.name,
"fields": [f.model_dump() for f in schema.fields],
"fields": [f.model_dump(by_alias=True, exclude_none=True) for f in schema.fields],
}
return {

View file

@ -13,9 +13,9 @@ import time
import uuid
from typing import Any, Dict, List, Optional
from pydantic import BaseModel, Field
from pydantic import BaseModel, ConfigDict, Field
from modules.shared.i18nRegistry import resolveText
from modules.shared.i18nRegistry import resolveText, t
logger = logging.getLogger(__name__)
@ -25,6 +25,8 @@ logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
class PortField(BaseModel):
model_config = ConfigDict(populate_by_name=True)
name: str
type: str # str, int, bool, List[str], List[Document], Dict[str,Any], ConnectionRef, …
description: str = ""
@ -36,11 +38,19 @@ class PortField(BaseModel):
discriminator: bool = False
# Surfaces this field at the top of the DataPicker list as the most common pick.
recommended: bool = False
# Human DataPicker title (camelCase JSON for frontend). Omit for technical paths-only.
picker_label: Optional[str] = Field(default=None, serialization_alias="pickerLabel")
# For List[T] fields: segment between parent and inner field (iteration / one list item).
picker_item_label: Optional[str] = Field(default=None, serialization_alias="pickerItemLabel")
class PortSchema(BaseModel):
name: str # e.g. "EmailDraft", "AiResult", "Transit"
fields: List[PortField]
# Declarative flag for the engine: when True, the executor attaches
# connection provenance ({id, authority, label}) onto the output. Replaces
# hard-coded schema lists in actionNodeExecutor._attachConnectionProvenance.
carriesConnectionProvenance: bool = False
class InputPortDef(BaseModel):
@ -153,7 +163,7 @@ PORT_TYPE_CATALOG: Dict[str, PortSchema] = {
PortField(name="text", type="str", required=False, description="Textinhalt"),
PortField(name="children", type="List[Any]", required=False, description="Unterblöcke"),
]),
"DocumentList": PortSchema(name="DocumentList", fields=[
"DocumentList": PortSchema(name="DocumentList", carriesConnectionProvenance=True, fields=[
PortField(name="documents", type="List[Document]",
description="Dokumente aus vorherigen Schritten", recommended=True),
PortField(name="connection", type="ConnectionRef", required=False,
@ -163,7 +173,7 @@ PORT_TYPE_CATALOG: Dict[str, PortSchema] = {
PortField(name="count", type="int", required=False,
description="Anzahl Dokumente"),
]),
"FileList": PortSchema(name="FileList", fields=[
"FileList": PortSchema(name="FileList", carriesConnectionProvenance=True, fields=[
PortField(name="files", type="List[FileItem]",
description="Dateiliste"),
PortField(name="connection", type="ConnectionRef", required=False,
@ -173,7 +183,7 @@ PORT_TYPE_CATALOG: Dict[str, PortSchema] = {
PortField(name="count", type="int", required=False,
description="Anzahl Dateien"),
]),
"EmailDraft": PortSchema(name="EmailDraft", fields=[
"EmailDraft": PortSchema(name="EmailDraft", carriesConnectionProvenance=True, fields=[
PortField(name="subject", type="str",
description="Betreff"),
PortField(name="body", type="str",
@ -187,7 +197,7 @@ PORT_TYPE_CATALOG: Dict[str, PortSchema] = {
PortField(name="connection", type="ConnectionRef", required=False,
description="Outlook-/Graph-Verbindung"),
]),
"EmailList": PortSchema(name="EmailList", fields=[
"EmailList": PortSchema(name="EmailList", carriesConnectionProvenance=True, fields=[
PortField(name="emails", type="List[EmailItem]",
description="E-Mails"),
PortField(name="connection", type="ConnectionRef", required=False,
@ -195,7 +205,7 @@ PORT_TYPE_CATALOG: Dict[str, PortSchema] = {
PortField(name="count", type="int", required=False,
description="Anzahl"),
]),
"TaskList": PortSchema(name="TaskList", fields=[
"TaskList": PortSchema(name="TaskList", carriesConnectionProvenance=True, fields=[
PortField(name="tasks", type="List[TaskItem]",
description="Aufgaben"),
PortField(name="connection", type="ConnectionRef", required=False,
@ -219,15 +229,29 @@ PORT_TYPE_CATALOG: Dict[str, PortSchema] = {
]),
"AiResult": PortSchema(name="AiResult", fields=[
PortField(name="prompt", type="str",
description="Prompt"),
description="Prompt",
picker_label=t("Eingabe (Prompt des Schritts)"),
),
PortField(name="response", type="str",
description="Antworttext", recommended=True),
description=(
"Antworttext (Modell-Fließtext o. ä.; Bilder liegen in documents, nicht hier)."
),
recommended=True,
picker_label=t("Ausgabetext (Modell)"),
),
PortField(name="responseData", type="Dict", required=False,
description="Strukturierte Antwort (nur bei JSON-Ausgabe)"),
description="Strukturierte Antwort (nur bei JSON-Ausgabe)",
picker_label=t("Strukturierte Antwortdaten")),
PortField(name="context", type="str",
description="Kontext"),
description="Kontext",
picker_label=t("Eingabe-Kontext")),
PortField(name="documents", type="List[Document]",
description="Dokumente"),
description=(
"Erzeugte oder mitgegebene Dateien (z. B. Bilder); documentData = Nutzlast pro Eintrag."
),
picker_label=t("Alle Ausgabe-Dateien (Liste)"),
picker_item_label=t("je Datei"),
),
]),
"BoolResult": PortSchema(name="BoolResult", fields=[
PortField(name="result", type="bool",
@ -237,7 +261,8 @@ PORT_TYPE_CATALOG: Dict[str, PortSchema] = {
]),
"TextResult": PortSchema(name="TextResult", fields=[
PortField(name="text", type="str",
description="Text"),
description="Text",
picker_label=t("Text (Schrittausgabe)")),
]),
"LoopItem": PortSchema(name="LoopItem", fields=[
PortField(name="currentItem", type="Any",
@ -265,11 +290,15 @@ PORT_TYPE_CATALOG: Dict[str, PortSchema] = {
]),
"ActionDocument": PortSchema(name="ActionDocument", fields=[
PortField(name="documentName", type="str",
description="Dokumentname"),
description="Dokumentname",
picker_label=t("Dateiname")),
PortField(name="documentData", type="Any",
description="Inhalt / Rohdaten (z.B. JSON-String, Bytes)"),
description="Inhalt / Rohdaten (z.B. JSON-String, Bytes)",
picker_label=t("Dateiinhalt (JSON, Text oder Bild)"),
recommended=True),
PortField(name="mimeType", type="str",
description="MIME-Typ"),
description="MIME-Typ",
picker_label=t("Dateityp (MIME)")),
PortField(name="fileId", type="str", required=False,
description="Persistierte FileItem.id (vom Engine ergänzt)"),
PortField(name="fileName", type="str", required=False,
@ -285,12 +314,40 @@ PORT_TYPE_CATALOG: Dict[str, PortSchema] = {
# Without it in the catalog the DataPicker cannot offer downstream
# bindings like `processDocuments → documents → *` for syncToAccounting.
PortField(name="documents", type="List[ActionDocument]", required=False,
description="Erzeugte Dokumente (immer befüllt für Trustee/AI/Email/...)"),
description=(
"Dokumentliste: Index 0 oft JSON-Handover oder Hauptdatei; Einträge mit "
"MIME image/* oder Namen extract_media_* sind ausgelagerte Bilder (documentData = Binär)."
),
picker_label=t("Alle Ausgabe-Dokumente"),
picker_item_label=t("je Dokument"),
),
PortField(name="data", type="Dict", required=False,
description="Ergebnisdaten"),
description="Ergebnisdaten",
picker_label=t("Technische Detaildaten (data)")),
# Mirror AiResult primary text fields so DataPicker / primaryTextRef behave the same
PortField(name="prompt", type="str", required=False,
description="Optional: auslösender Prompt / Schrittname",
picker_label=t("Auslöser / Prompt (falls vorhanden)")),
PortField(name="response", type="str", required=False,
description=(
"Primär nur Fließtext (z. B. nach Extraktion: alle Text-Parts verkettet, keine Bilder)."
),
recommended=True,
picker_label=t("Nur Fließtext (gesamt)")),
PortField(name="context", type="str", required=False,
description="Optional: Eingabe-Kontext",
picker_label=t("Mitgegebener Kontext")),
PortField(name="imageDocumentsOnly", type="List[ActionDocument]", required=False,
description=(
"Nur Bildausgaben (ohne JSON-Handover), z. B. von context.extractContent."
),
picker_label=t("Nur Bilder (Liste)")),
PortField(name="responseData", type="Dict", required=False,
description="Optional: strukturierte Zusatzdaten",
picker_label=t("Strukturierte Zusatzdaten")),
]),
"Transit": PortSchema(name="Transit", fields=[]),
"UdmDocument": PortSchema(name="UdmDocument", fields=[
"UdmDocument": PortSchema(name="UdmDocument", carriesConnectionProvenance=True, fields=[
PortField(name="id", type="str", description="Dokument-ID"),
PortField(name="sourceType", type="str", description="Quellformat (pdf, docx, …)"),
PortField(name="sourcePath", type="str", description="Quellpfad"),
@ -622,6 +679,7 @@ SYSTEM_VARIABLES: Dict[str, Dict[str, str]] = {
PRIMARY_TEXT_HANDOVER_REF_PATH: Dict[str, List[Any]] = {
"AiResult": ["response"],
"ActionResult": ["response"],
"TextResult": ["text"],
"ConsolidateResult": ["result"],
}

View file

@ -36,6 +36,31 @@ def _paths_for_port_schema(schema: PortSchema, producer_node_id: str) -> List[Di
return out
def _paths_for_data_pick_options(
options: List[Dict[str, Any]],
producer_node_id: str,
) -> List[Dict[str, Any]]:
"""Explicit per-port pick list from node definition (authoritative; no catalog expansion)."""
out: List[Dict[str, Any]] = []
for o in options:
if not isinstance(o, dict):
continue
path = o.get("path")
if not isinstance(path, list):
continue
label = o.get("pickerLabel")
out.append(
{
"producerNodeId": producer_node_id,
"path": path,
"type": o.get("type") or "Any",
"label": label if isinstance(label, str) else ".".join(str(p) for p in path),
"scopeOrigin": "data",
}
)
return out
def _paths_for_schema(schema_name: str, producer_node_id: str) -> List[Dict[str, Any]]:
if not schema_name or schema_name == "Transit":
return []
@ -83,7 +108,16 @@ def compute_upstream_paths(graph: Dict[str, Any], target_node_id: str) -> List[D
if not ndef:
continue
out0 = (ndef.get("outputPorts") or {}).get(0, {})
derived = parse_graph_defined_output_schema(anode, out0 if isinstance(out0, dict) else {})
out0 = out0 if isinstance(out0, dict) else {}
dpo = out0.get("dataPickOptions")
if isinstance(dpo, list) and len(dpo) > 0:
plab = (anode.get("title") or "").strip() or aid
for entry in _paths_for_data_pick_options(dpo, aid):
entry["producerLabel"] = plab
paths.append(entry)
continue
derived = parse_graph_defined_output_schema(anode, out0)
if derived:
for entry in _paths_for_port_schema(derived, aid):
entry["producerLabel"] = (anode.get("title") or "").strip() or aid

View file

@ -70,6 +70,14 @@ def _coerce_document_data_to_bytes(raw: Any) -> Optional[bytes]:
return None
def _image_documents_from_docs_list(docs_list: list) -> list:
"""All image/* ActionDocument dicts (generic — no assumptions about index 0)."""
return [
d for d in (docs_list or [])
if isinstance(d, dict) and str(d.get("mimeType") or "").strip().lower().startswith("image/")
]
_USER_CONNECTION_ID_RE = re.compile(
r"^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$",
re.IGNORECASE,
@ -206,6 +214,13 @@ def _buildConnectionRefDict(connRef: str, chatService, services) -> Optional[Dic
return {"id": conn_id, "authority": authority, "label": label or f"{authority}:{user}"}
def _schemaCarriesConnectionProvenance(outputSchema: str) -> bool:
"""True iff the port schema declares ``carriesConnectionProvenance`` in the catalog."""
from modules.features.graphicalEditor.portTypes import PORT_TYPE_CATALOG
schema = PORT_TYPE_CATALOG.get(outputSchema)
return bool(getattr(schema, "carriesConnectionProvenance", False))
def _attachConnectionProvenance(
out: Dict[str, Any],
resolvedParams: Dict[str, Any],
@ -219,7 +234,7 @@ def _attachConnectionProvenance(
cref = resolvedParams.get("connectionReference")
if not cref:
return
if outputSchema not in ("FileList", "DocumentList", "EmailList", "TaskList", "EmailDraft", "UdmDocument"):
if not _schemaCarriesConnectionProvenance(outputSchema):
return
payload = _buildConnectionRefDict(str(cref), chatService, services)
if payload:
@ -235,8 +250,7 @@ def _resolveConnectionParam(params: Dict, chatService, services) -> None:
params["connectionReference"] = resolved
def _applyEmailCheckFilter(params: Dict) -> None:
"""Build filter from discrete email params for email.checkEmail."""
def _mapper_emailCheckFilter(params: Dict, **_) -> None:
built = _buildEmailFilter(
fromAddress=params.get("fromAddress"),
subjectContains=params.get("subjectContains"),
@ -248,8 +262,7 @@ def _applyEmailCheckFilter(params: Dict) -> None:
params.pop(k, None)
def _applyEmailSearchQuery(params: Dict) -> None:
"""Build query from discrete email params for email.searchEmail."""
def _mapper_emailSearchQuery(params: Dict, **_) -> None:
built = _buildSearchQuery(
query=params.get("query"),
fromAddress=params.get("fromAddress"),
@ -264,6 +277,56 @@ def _applyEmailSearchQuery(params: Dict) -> None:
params.pop(k, None)
def _mapper_aiPromptLegacyAlias(params: Dict, **_) -> None:
"""Backwards-compatible alias: legacy ``prompt`` parameter is exposed as ``aiPrompt``."""
if "aiPrompt" not in params and "prompt" in params:
params["aiPrompt"] = params.pop("prompt")
def _mapper_emailDraftContextFromSubjectBody(params: Dict, **_) -> None:
"""Build ``context`` from discrete subject + body fields and drop them."""
subject = params.get("subject", "")
body = params.get("body", "")
if not (subject or body):
return
parts = []
if subject:
parts.append(f"Subject: {subject}")
if body:
parts.append(f"Body:\n{body}")
params["context"] = "\n\n".join(parts)
params.pop("subject", None)
params.pop("body", None)
def _mapper_clickupTaskUpdateMerge(params: Dict, **_) -> None:
from modules.workflows.automation2.clickupTaskUpdateMerge import merge_clickup_task_update_entries
merge_clickup_task_update_entries(params)
_PARAM_MAPPERS: Dict[str, Any] = {
"emailCheckFilter": _mapper_emailCheckFilter,
"emailSearchQuery": _mapper_emailSearchQuery,
"aiPromptLegacyAlias": _mapper_aiPromptLegacyAlias,
"emailDraftContextFromSubjectBody": _mapper_emailDraftContextFromSubjectBody,
"clickupTaskUpdateMerge": _mapper_clickupTaskUpdateMerge,
}
def _applyParamMappers(nodeDef: Dict[str, Any], resolvedParams: Dict[str, Any]) -> None:
"""Run declared ``paramMappers`` from the node definition (no node-id branching)."""
mappers = nodeDef.get("paramMappers") or []
for name in mappers:
fn = _PARAM_MAPPERS.get(name)
if not fn:
logger.warning("Unknown paramMapper %r — node %s; skipping", name, nodeDef.get("id"))
continue
try:
fn(resolvedParams)
except Exception as e:
logger.warning("paramMapper %r failed for node %s: %s", name, nodeDef.get("id"), e)
def _getOutputSchemaName(nodeDef: Dict) -> str:
"""Get the output schema name from the node definition."""
outputPorts = nodeDef.get("outputPorts", {})
@ -338,14 +401,8 @@ class ActionNodeExecutor:
chatService = getattr(self.services, "chat", None)
_resolveConnectionParam(resolvedParams, chatService, self.services)
# 4. Node-type-specific param transformations
if nodeType == "email.checkEmail":
_applyEmailCheckFilter(resolvedParams)
elif nodeType == "email.searchEmail":
_applyEmailSearchQuery(resolvedParams)
elif nodeType == "clickup.updateTask":
from modules.workflows.automation2.clickupTaskUpdateMerge import merge_clickup_task_update_entries
merge_clickup_task_update_entries(resolvedParams)
# 4. Apply declarative paramMappers from the node definition
_applyParamMappers(nodeDef, resolvedParams)
# 5. email.checkEmail pause for email wait
if nodeType == "email.checkEmail":
@ -362,26 +419,7 @@ class ActionNodeExecutor:
}
raise PauseForEmailWaitError(runId=runId, nodeId=nodeId, waitConfig=waitConfig)
# 6. AI nodes: normalize legacy "prompt" -> "aiPrompt"
if nodeType == "ai.prompt":
if "aiPrompt" not in resolvedParams and "prompt" in resolvedParams:
resolvedParams["aiPrompt"] = resolvedParams.pop("prompt")
# 7. Build context for email.draftEmail from subject + body
if nodeType == "email.draftEmail":
subject = resolvedParams.get("subject", "")
body = resolvedParams.get("body", "")
if subject or body:
contextParts = []
if subject:
contextParts.append(f"Subject: {subject}")
if body:
contextParts.append(f"Body:\n{body}")
resolvedParams["context"] = "\n\n".join(contextParts)
resolvedParams.pop("subject", None)
resolvedParams.pop("body", None)
# 8. Execute action
# 6. Execute action
logger.info("ActionNodeExecutor node %s calling %s.%s with %d params", nodeId, methodName, actionName, len(resolvedParams))
try:
executor = ActionExecutor(self.services)
@ -392,7 +430,7 @@ class ActionNodeExecutor:
logger.exception("ActionNodeExecutor node %s FAILED: %s", nodeId, e)
return _normalizeError(e, outputSchema)
# 9. Persist generated documents as files and build JSON-safe output
# 7. Persist generated documents as files and build JSON-safe output
_raw_folder_id = resolvedParams.get("folderId")
persist_folder_id: Optional[str] = None
if _raw_folder_id is not None:
@ -415,6 +453,18 @@ class ActionNodeExecutor:
rawData = getattr(d, "documentData", None) if hasattr(d, "documentData") else (dumped.get("documentData") if isinstance(dumped, dict) else None)
rawBytes = _coerce_document_data_to_bytes(rawData)
# Extracted page images are workflow intermediates — keep bytes as base64 on the
# ActionDocument only; do not create rows in the user's file library (Meine Dateien).
if isinstance(dumped, dict) and rawBytes:
_meta = dumped.get("validationMetadata") if isinstance(dumped.get("validationMetadata"), dict) else {}
if (
_meta.get("actionType") == "context.extractContent"
and _meta.get("handoverRole") == "extractedMedia"
):
dumped["documentData"] = base64.b64encode(rawBytes).decode("ascii")
dumped["_hasBinaryData"] = True
docsList.append(dumped)
continue
if isinstance(dumped, dict) and rawBytes:
try:
from modules.interfaces.interfaceDbManagement import getInterface as _getMgmtInterface
@ -452,11 +502,7 @@ class ActionNodeExecutor:
docsList.append(dumped)
# Clean DocumentList shape for document nodes (documents + count, no ActionResult/AiResult noise)
if outputSchema == "DocumentList" and nodeType in (
"ai.generateDocument",
"ai.convertDocument",
"file.create",
):
if outputSchema == "DocumentList":
if not result.success:
return _normalizeError(
RuntimeError(str(result.error or "document action failed")),
@ -470,7 +516,10 @@ class ActionNodeExecutor:
return normalizeToSchema(list_out, outputSchema)
extractedContext = ""
if result.documents:
rd_early = getattr(result, "data", None)
if isinstance(rd_early, dict) and rd_early.get("response") is not None:
extractedContext = str(rd_early.get("response")).strip()
elif result.documents:
doc = result.documents[0]
raw = getattr(doc, "documentData", None) if hasattr(doc, "documentData") else (doc.get("documentData") if isinstance(doc, dict) else None)
if isinstance(raw, bytes):
@ -480,14 +529,6 @@ class ActionNodeExecutor:
extractedContext = ""
elif raw:
extractedContext = str(raw).strip()
else:
# ai.process (and similar): text handover in ActionResult.data — no persisted document row
rd = getattr(result, "data", None)
if isinstance(rd, dict):
handover = rd.get("response")
if handover is not None:
extractedContext = str(handover).strip()
promptText = str(resolvedParams.get("aiPrompt") or resolvedParams.get("prompt") or "").strip()
resultData = getattr(result, "data", None)
@ -505,7 +546,7 @@ class ActionNodeExecutor:
"data": dataField,
}
if nodeType.startswith("ai."):
if outputSchema == "AiResult":
out["prompt"] = promptText
out["response"] = extractedContext
inputContext = resolvedParams.get("context")
@ -521,8 +562,24 @@ class ActionNodeExecutor:
out["responseData"] = parsed
except (json.JSONDecodeError, TypeError):
pass
if outputSchema == "AiResult" and result.success:
out["imageDocumentsOnly"] = _image_documents_from_docs_list(docsList)
if nodeType.startswith("clickup.") and result.success and docsList:
if outputSchema == "ActionResult":
# Unified handover: mirror AiResult primary paths for DataRefs / primaryTextRef
inp_ctx = resolvedParams.get("context")
ctx_str = ""
if inp_ctx is not None:
ctx_str = inp_ctx if isinstance(inp_ctx, str) else json.dumps(inp_ctx, ensure_ascii=False, default=str)
out.setdefault("prompt", "")
out.setdefault("context", ctx_str if ctx_str else "")
rsp = str(out.get("response") or "").strip()
if not rsp:
out["response"] = extractedContext or ""
if result.success:
out["imageDocumentsOnly"] = _image_documents_from_docs_list(docsList)
if outputSchema == "TaskResult" and result.success and docsList:
try:
d0 = docsList[0] if isinstance(docsList[0], dict) else {}
raw = d0.get("documentData")
@ -534,7 +591,7 @@ class ActionNodeExecutor:
except (json.JSONDecodeError, TypeError, ValueError):
pass
if outputSchema == "ConsolidateResult" and nodeType == "ai.consolidate":
if outputSchema == "ConsolidateResult":
data_dict = result.data if isinstance(getattr(result, "data", None), dict) else {}
cr_out = {
"result": data_dict.get("result", ""),

View file

@ -410,10 +410,14 @@ def resolveParameterReferences(value: Any, nodeOutputs: Dict[str, Any]) -> Any:
return re.sub(r"\{\{\s*([^}]+)\s*\}\}", repl, value)
if isinstance(value, list):
# contextBuilder: list where every item is a `{"type":"ref",...}` envelope.
# Resolve each ref and join the serialised parts into a single prompt string.
# Resolve each part; a single ref preserves the resolved type (str, list, dict).
if value and all(isinstance(v, dict) and v.get("type") == "ref" for v in value):
from modules.workflows.methods.methodAi._common import serialize_context
parts = [serialize_context(resolveParameterReferences(v, nodeOutputs)) for v in value]
resolved_parts = [resolveParameterReferences(v, nodeOutputs) for v in value]
if len(resolved_parts) == 1:
return resolved_parts[0]
parts = [serialize_context(p) for p in resolved_parts]
return "\n\n".join(p for p in parts if p)
return [resolveParameterReferences(v, nodeOutputs) for v in value]
return value

View file

@ -7,6 +7,19 @@ import json
from typing import Any
def is_image_action_document_list(val: Any) -> bool:
"""True if ``val`` is a non-empty list of ActionDocument-shaped dicts (mimeType image/*)."""
if not isinstance(val, list) or not val:
return False
for item in val:
if not isinstance(item, dict):
return False
mime = str(item.get("mimeType") or "").strip().lower()
if not mime.startswith("image/"):
return False
return True
def serialize_context(val: Any) -> str:
"""Convert any context value to a readable string for use in AI prompts.

View file

@ -1,149 +1,358 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""context.extractContent — extracts content without AI.
Returns a unified handover compatible with AiResult-style downstream wiring:
- ``documents[0]``: structured JSON (`context.extractContent.handover.v1`); image ``parts``
keep metadata but omit pixel data; each dropped image references
``handoverMediaDocumentName`` matching a sibling blob document.
- ``documents[1:]``: each extracted image as its own binary ``ActionDocument`` (like
``ai.process`` artefact outputs).
- ``ActionResult.data["response"]`` plus normalized executor field ``response``: concatenated
plain text from all text parts safe default for ``file.create`` / primaryTextRef."""
import base64 as _b64
import binascii as _binascii
import logging
import re
import time
from typing import Dict, Any
from typing import Any, Dict, List, Tuple
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
from modules.datamodels.datamodelDocref import (
DocumentReferenceList,
coerceDocumentReferenceList,
)
from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy
from modules.datamodels.datamodelDocref import coerceDocumentReferenceList
from modules.datamodels.datamodelExtraction import ContentExtracted, ExtractionOptions
logger = logging.getLogger(__name__)
async def extractContent(self, parameters: Dict[str, Any]) -> ActionResult:
operationId = None
try:
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
operationId = f"context_extract_{workflowId}_{int(time.time())}"
_UNSAFE_FILE_KEY = re.compile(r"[^\w\-.\(\)\[\]%@+]")
documentListParam = parameters.get("documentList")
if not documentListParam:
return ActionResult.isFailure(error="documentList is required")
HANDOVER_KIND = "context.extractContent.handover.v1"
documentList = coerceDocumentReferenceList(documentListParam)
if not documentList.references:
return ActionResult.isFailure(
error=f"documentList could not be parsed (type={type(documentListParam).__name__}); "
f"expected DocumentReferenceList, list of strings/dicts, or "
f"a wrapper dict like {{'documents': [...]}}"
)
# Start progress tracking
parentOperationId = parameters.get('parentOperationId')
self.services.chat.progressLogStart(
operationId,
"Extracting content from documents",
"Content Extraction",
f"Documents: {len(documentList.references)}",
parentOperationId=parentOperationId
)
# Get ChatDocuments from documentList
self.services.chat.progressLogUpdate(operationId, 0.2, "Loading documents")
chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(documentList)
if not chatDocuments:
self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error="No documents found in documentList")
logger.info(f"Extracting content from {len(chatDocuments)} documents")
# Prepare extraction options
self.services.chat.progressLogUpdate(operationId, 0.3, "Preparing extraction options")
extractionOptionsParam = parameters.get("extractionOptions")
# Convert dict to ExtractionOptions object if needed, or create defaults
if extractionOptionsParam:
if isinstance(extractionOptionsParam, dict):
# Ensure required fields are present
if "prompt" not in extractionOptionsParam:
extractionOptionsParam["prompt"] = "Extract all content from the document"
if "mergeStrategy" not in extractionOptionsParam:
extractionOptionsParam["mergeStrategy"] = MergeStrategy(
mergeType="concatenate",
groupBy="typeGroup",
orderBy="id"
)
# Convert dict to ExtractionOptions object
try:
extractionOptions = ExtractionOptions(**extractionOptionsParam)
except Exception as e:
logger.warning(f"Failed to create ExtractionOptions from dict: {str(e)}, using defaults")
extractionOptions = None
elif isinstance(extractionOptionsParam, ExtractionOptions):
extractionOptions = extractionOptionsParam
else:
# Invalid type, use defaults
logger.warning(f"Invalid extractionOptions type: {type(extractionOptionsParam)}, using defaults")
extractionOptions = None
else:
extractionOptions = None
# If extractionOptions not provided, create defaults
if not extractionOptions:
# Default extraction options for pure content extraction (no AI processing)
extractionOptions = ExtractionOptions(
def _default_extraction_options() -> ExtractionOptions:
"""No merge — keep all parts for downstream JSON selection."""
return ExtractionOptions(
prompt="Extract all content from the document",
mergeStrategy=MergeStrategy(
mergeType="concatenate",
groupBy="typeGroup",
orderBy="id"
),
processDocumentsIndividually=True
mergeStrategy=None,
processDocumentsIndividually=True,
outputFormat="parts",
outputDetail="full",
)
# Call extraction service with hierarchical progress logging
self.services.chat.progressLogUpdate(operationId, 0.4, "Initiating")
self.services.chat.progressLogUpdate(operationId, 0.5, f"Extracting content from {len(chatDocuments)} documents")
# Pass operationId for hierarchical per-document progress logging
extractedResults = self.services.extraction.extractContent(chatDocuments, extractionOptions, operationId=operationId)
# Build ActionDocuments from ContentExtracted results
self.services.chat.progressLogUpdate(operationId, 0.8, "Building result documents")
actionDocuments = []
# Map extracted results back to original documents by index (results are in same order)
for i, extracted in enumerate(extractedResults):
# Get original document name if available
originalDoc = chatDocuments[i] if i < len(chatDocuments) else None
if originalDoc and hasattr(originalDoc, 'fileName') and originalDoc.fileName:
# Use original filename with "extracted_" prefix
baseName = originalDoc.fileName.rsplit('.', 1)[0] if '.' in originalDoc.fileName else originalDoc.fileName
documentName = f"{baseName}_extracted_{extracted.id}.json"
else:
# Fallback to generic name with index
documentName = f"document_{i+1:03d}_extracted_{extracted.id}.json"
def _file_json_key(display_name: str, index: int, key_counts: Dict[str, int]) -> str:
stem = (display_name or "").strip() or f"document_{index + 1}"
slug = stem.replace("/", "_").replace("\\", "_").replace(" ", "_")
slug = _UNSAFE_FILE_KEY.sub("_", slug).strip("_") or f"document_{index + 1}"
base = f"file_{index + 1}_{slug}"
n = key_counts.get(base, 0)
key_counts[base] = n + 1
return base if n == 0 else f"{base}__{n}"
# Store ContentExtracted object in ActionDocument.documentData
def _serialize_parts(parts: Any) -> List[Dict[str, Any]]:
out: List[Dict[str, Any]] = []
for p in parts or []:
if hasattr(p, "model_dump"):
out.append(p.model_dump(mode="json"))
elif isinstance(p, dict):
out.append(dict(p))
return out
def _rebuild_by_type_group(parts_ser: List[Dict[str, Any]]) -> Dict[str, List[Dict[str, Any]]]:
by_type: Dict[str, List[Dict[str, Any]]] = {}
for entry in parts_ser:
if not isinstance(entry, dict):
continue
tg = (entry.get("typeGroup") or "").strip() or "_other"
by_type.setdefault(tg, []).append(entry)
return by_type
def _joined_text_from_handover_payload(payload: Dict[str, Any]) -> str:
"""Concatenate text parts across fileOrder for AiResult-compatible ``response``."""
files_section = payload.get("files") or {}
ordered = payload.get("fileOrder")
keys: List[str] = ordered if isinstance(ordered, list) and ordered else list(files_section.keys())
chunks: List[str] = []
for fk in keys:
bucket = files_section.get(fk)
if not isinstance(bucket, dict):
continue
for p in bucket.get("parts") or []:
if not isinstance(p, dict):
continue
if (p.get("typeGroup") or "").strip() != "text":
continue
raw = p.get("data")
if raw is None:
continue
s = str(raw).strip()
if s:
chunks.append(s)
return "\n\n".join(chunks)
def _mime_to_file_extension(mime: str) -> str:
m = (mime or "").split(";")[0].strip().lower()
mapping = {
"image/jpeg": "jpg",
"image/jpg": "jpg",
"image/png": "png",
"image/gif": "gif",
"image/webp": "webp",
"image/bmp": "bmp",
"image/tiff": "tiff",
}
return mapping.get(m, m.rsplit("/", 1)[-1] if "/" in m else "bin")
def _split_images_to_sidecar_documents(
payload: Dict[str, Any],
*,
document_name_stem: str,
) -> Tuple[Dict[str, Any], List[ActionDocument]]:
"""
Deep-copy handover JSON, clear image pixel data from ``parts``, attach
``handoverMediaDocumentName`` on each image part, emit binary ActionDocuments.
"""
import copy
bundle = copy.deepcopy(payload)
files_section = bundle.get("files") or {}
ordered = bundle.get("fileOrder")
key_order: List[str] = ordered if isinstance(ordered, list) and ordered else list(files_section.keys())
media_docs: List[ActionDocument] = []
kind = bundle.get("kind") or HANDOVER_KIND
stem = re.sub(r"[^\w\-]+", "_", document_name_stem).strip("_") or "extract"
for fk in key_order:
bucket = files_section.get(fk)
if not isinstance(bucket, dict):
continue
parts = bucket.get("parts")
if not isinstance(parts, list):
continue
new_parts: List[Dict[str, Any]] = []
for p in parts:
if not isinstance(p, dict):
new_parts.append(p)
continue
pcopy = dict(p)
tg = (pcopy.get("typeGroup") or "").strip()
mime = (pcopy.get("mimeType") or "").strip()
raw_data = pcopy.get("data")
if tg == "image" and mime.lower().startswith("image/") and raw_data:
raw_s = raw_data.strip() if isinstance(raw_data, str) else ""
try:
blob = _b64.b64decode(raw_s, validate=True) if raw_s else b""
except (_binascii.Error, TypeError, ValueError) as e:
logger.warning(
"extractContent: could not decode image part %s (keep inline): %s",
pcopy.get("id"),
e,
)
new_parts.append(pcopy)
continue
if not blob:
new_parts.append(pcopy)
continue
part_id = str(pcopy.get("id") or "part")
# Full part id (UUID) — must not truncate or names collide / break linking
safe_id = re.sub(r"[^\w\-.]+", "_", part_id).strip("_") or "media"
if len(safe_id) > 200:
safe_id = safe_id[:200]
ext = _mime_to_file_extension(mime)
media_name = f"extract_media_{stem}_{safe_id}.{ext}"
pcopy["data"] = ""
pcopy["handoverMediaDocumentName"] = media_name
media_docs.append(
ActionDocument(
documentName=media_name,
documentData=blob,
mimeType=mime,
validationMetadata={
"actionType": "context.extractContent",
"documentIndex": i,
"extractedId": extracted.id,
"partCount": len(extracted.parts) if extracted.parts else 0,
"originalFileName": originalDoc.fileName if originalDoc and hasattr(originalDoc, 'fileName') else None
}
actionDoc = ActionDocument(
documentName=documentName,
documentData=extracted, # ContentExtracted object
mimeType="application/json",
validationMetadata=validationMetadata
"handoverRole": "extractedMedia",
"sourcePartId": part_id,
"handoverSchema": kind,
"containerFileKey": fk,
},
)
actionDocuments.append(actionDoc)
)
new_parts.append(pcopy)
else:
new_parts.append(pcopy)
bucket["parts"] = new_parts
bucket["byTypeGroup"] = _rebuild_by_type_group(new_parts)
files_section[fk] = bucket
self.services.chat.progressLogFinish(operationId, True)
return bundle, media_docs
return ActionResult.isSuccess(documents=actionDocuments)
def _one_file_bucket(ec: ContentExtracted, source_file_name: str) -> Dict[str, Any]:
parts_ser = _serialize_parts(ec.parts)
ud = getattr(ec, "udm", None)
if hasattr(ud, "model_dump"):
ud = ud.model_dump(mode="json")
summary = getattr(ec, "summary", None)
if hasattr(summary, "model_dump"):
summary = summary.model_dump(mode="json")
elif isinstance(summary, dict):
summary = dict(summary)
elif summary is None:
summary = {}
return {
"sourceFileName": source_file_name,
"extractedId": getattr(ec, "id", ""),
"summary": summary,
"udm": ud,
"parts": parts_ser,
"byTypeGroup": _rebuild_by_type_group(parts_ser),
}
def build_extract_content_handover(
*,
extracted_results: List[ContentExtracted],
chat_file_names: List[str],
operation_ref: str,
) -> Dict[str, Any]:
key_counts: Dict[str, int] = {}
files: Dict[str, Any] = {}
ordered: List[str] = []
for i, ec in enumerate(extracted_results):
name = chat_file_names[i] if i < len(chat_file_names) else ""
fk = _file_json_key(str(name), i, key_counts)
files[fk] = _one_file_bucket(ec, str(name))
ordered.append(fk)
return {
"schemaVersion": 1,
"kind": HANDOVER_KIND,
"operationRef": operation_ref,
"fileOrder": ordered,
"files": files,
}
async def extractContent(self, parameters: Dict[str, Any]) -> ActionResult:
operation_id = None
try:
wf = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
operation_id = f"context_extract_{wf}_{int(time.time())}"
document_list_param = parameters.get("documentList")
if not document_list_param:
return ActionResult.isFailure(error="documentList is required")
dl = coerceDocumentReferenceList(document_list_param)
if not dl.references:
return ActionResult.isFailure(
error=(
f"documentList could not be parsed (type={type(document_list_param).__name__}); "
"expected DocumentReferenceList, list of strings/dicts, or "
"a wrapper dict like {'documents': [...]}"
),
)
parent_operation_id = parameters.get("parentOperationId")
self.services.chat.progressLogStart(
operation_id,
"Extracting content from documents",
"Content Extraction",
f"Documents: {len(dl.references)}",
parentOperationId=parent_operation_id,
)
self.services.chat.progressLogUpdate(operation_id, 0.2, "Loading documents")
chat_documents = self.services.chat.getChatDocumentsFromDocumentList(dl)
if not chat_documents:
self.services.chat.progressLogFinish(operation_id, False)
return ActionResult.isFailure(error="No documents found in documentList")
logger.info(f"Extracting JSON handover from {len(chat_documents)} documents")
self.services.chat.progressLogUpdate(operation_id, 0.3, "Preparing extraction options")
eo_param = parameters.get("extractionOptions")
extraction_options: ExtractionOptions
if isinstance(eo_param, dict) and eo_param:
eo = dict(eo_param)
eo.setdefault("prompt", "Extract all content from the document")
if "mergeStrategy" not in eo:
eo["mergeStrategy"] = None
try:
extraction_options = ExtractionOptions(**eo)
except Exception as e:
logger.warning(f"Invalid extractionOptions, using defaults: {e}")
extraction_options = _default_extraction_options()
elif isinstance(eo_param, ExtractionOptions):
extraction_options = eo_param
else:
extraction_options = _default_extraction_options()
self.services.chat.progressLogUpdate(operation_id, 0.4, "Extracting …")
self.services.chat.progressLogUpdate(operation_id, 0.5, f"Extracting {len(chat_documents)} document(s)")
extracted_results = self.services.extraction.extractContent(chat_documents, extraction_options, operationId=operation_id)
file_names = [getattr(cd, "fileName", "") or "" for cd in chat_documents]
payload = build_extract_content_handover(
extracted_results=extracted_results,
chat_file_names=file_names,
operation_ref=operation_id,
)
self.services.chat.progressLogUpdate(operation_id, 0.9, "Building JSON")
stem = f"{wf}_{int(time.time())}"
stripped_payload, media_docs = _split_images_to_sidecar_documents(
payload,
document_name_stem=stem,
)
joined_text = _joined_text_from_handover_payload(payload)
json_meta = {
"actionType": "context.extractContent",
"documentCountInput": len(chat_documents),
"documentCountRoots": len(extracted_results),
"handoverSchema": stripped_payload.get("kind"),
"handoverRole": "structuredHandover",
"mediaDocumentCount": len(media_docs),
}
json_doc = ActionDocument(
documentName=f"extracted_content_{stem}.json",
documentData=stripped_payload,
mimeType="application/json",
validationMetadata=json_meta,
)
handover_data = {
"response": joined_text,
"contentType": "text",
"handoverKind": stripped_payload.get("kind"),
"structuredDocumentIndex": 0,
"mediaDocumentCount": len(media_docs),
}
self.services.chat.progressLogFinish(operation_id, True)
return ActionResult.isSuccess(documents=[json_doc] + media_docs, data=handover_data)
except Exception as e:
logger.error(f"Error in content extraction: {str(e)}")
try:
if operationId:
self.services.chat.progressLogFinish(operationId, False)
if operation_id:
self.services.chat.progressLogFinish(operation_id, False)
except Exception:
pass
return ActionResult.isFailure(error=str(e))

View file

@ -1,239 +1,309 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
import base64 as _b64
import logging
import time
from typing import Dict, Any
from typing import Any, Dict
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
from modules.datamodels.datamodelDocref import (
DocumentReferenceList,
coerceDocumentReferenceList,
)
from modules.datamodels.datamodelDocref import coerceDocumentReferenceList
from modules.datamodels.datamodelExtraction import ContentExtracted, ContentPart
from .extractContent import _one_file_bucket
logger = logging.getLogger(__name__)
async def neutralizeData(self, parameters: Dict[str, Any]) -> ActionResult:
operationId = None
try:
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
operationId = f"context_neutralize_{workflowId}_{int(time.time())}"
HANDOVER_KIND = "context.extractContent.handover.v1"
neutralizationEnabled = False
try:
config = self.services.neutralization.getConfig()
neutralizationEnabled = config and config.enabled
except Exception as e:
logger.debug(f"Could not check neutralization config: {str(e)}")
if not neutralizationEnabled:
logger.info("Neutralization is not enabled, returning documents unchanged")
# Return original documents if neutralization is disabled
documentListParam = parameters.get("documentList")
if not documentListParam:
return ActionResult.isFailure(error="documentList is required")
documentList = coerceDocumentReferenceList(documentListParam)
if not documentList.references:
return ActionResult.isFailure(
error=f"documentList could not be parsed (type={type(documentListParam).__name__})"
)
# Get ChatDocuments from documentList
chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(documentList)
if not chatDocuments:
return ActionResult.isFailure(error="No documents found in documentList")
# Return original documents as ActionDocuments
actionDocuments = []
for chatDoc in chatDocuments:
# Extract ContentExtracted from documentData if available
if hasattr(chatDoc, 'documentData') and chatDoc.documentData:
actionDoc = ActionDocument(
documentName=getattr(chatDoc, 'fileName', 'unknown'),
documentData=chatDoc.documentData,
mimeType=getattr(chatDoc, 'mimeType', 'application/json'),
validationMetadata={
"actionType": "context.neutralizeData",
"neutralized": False,
"reason": "Neutralization disabled"
}
)
actionDocuments.append(actionDoc)
return ActionResult.isSuccess(documents=actionDocuments)
documentListParam = parameters.get("documentList")
if not documentListParam:
return ActionResult.isFailure(error="documentList is required")
documentList = coerceDocumentReferenceList(documentListParam)
if not documentList.references:
return ActionResult.isFailure(
error=f"documentList could not be parsed (type={type(documentListParam).__name__})"
)
# Start progress tracking
parentOperationId = parameters.get('parentOperationId')
self.services.chat.progressLogStart(
operationId,
"Neutralizing data from documents",
"Data Neutralization",
f"Documents: {len(documentList.references)}",
parentOperationId=parentOperationId
)
# Get ChatDocuments from documentList
self.services.chat.progressLogUpdate(operationId, 0.2, "Loading documents")
chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(documentList)
if not chatDocuments:
self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error="No documents found in documentList")
logger.info(f"Neutralizing data from {len(chatDocuments)} documents")
# Process each document
self.services.chat.progressLogUpdate(operationId, 0.3, "Processing documents")
actionDocuments = []
for i, chatDoc in enumerate(chatDocuments):
try:
# Extract ContentExtracted from documentData
if not hasattr(chatDoc, 'documentData') or not chatDoc.documentData:
logger.warning(f"Document {i+1} has no documentData, skipping")
continue
documentData = chatDoc.documentData
# Check if it's a ContentExtracted object
if isinstance(documentData, ContentExtracted):
contentExtracted = documentData
elif isinstance(documentData, dict):
# Try to parse as ContentExtracted
try:
contentExtracted = ContentExtracted(**documentData)
except Exception as e:
logger.warning(f"Document {i+1} documentData is not ContentExtracted: {str(e)}")
continue
else:
logger.warning(f"Document {i+1} documentData is not ContentExtracted or dict")
continue
# Neutralize each ContentPart's data field
neutralizedParts = []
for part in contentExtracted.parts:
async def _neutralize_one_content_extracted(
*,
svc,
content_extracted: ContentExtracted,
operation_id: str,
chat_doc_slot: int,
chat_documents_len: int,
) -> ContentExtracted:
"""Neutralize every part inside a ContentExtracted (copied semantics from legacy inline loop)."""
neutralized_parts = []
for part in content_extracted.parts:
if not isinstance(part, ContentPart):
# Try to parse as ContentPart
if isinstance(part, dict):
try:
part = ContentPart(**part)
except Exception as e:
logger.warning(f"Could not parse ContentPart: {str(e)}")
neutralizedParts.append(part)
neutralized_parts.append(part)
continue
else:
neutralizedParts.append(part)
neutralized_parts.append(part)
continue
# Neutralize the data field based on typeGroup
_typeGroup = getattr(part, 'typeGroup', '') or ''
if _typeGroup == 'image' and part.data:
import base64 as _b64
_type_group = getattr(part, "typeGroup", "") or ""
prog = 0.3 + (chat_doc_slot / max(1, chat_documents_len)) * 0.6
if _type_group == "image" and part.data:
try:
self.services.chat.progressLogUpdate(
operationId,
0.3 + (i / len(chatDocuments)) * 0.6,
f"Checking image part {len(neutralizedParts) + 1} of document {i+1}"
svc.services.chat.progressLogUpdate(
operation_id,
prog,
f"Checking image part {len(neutralized_parts) + 1}",
)
_imgBytes = _b64.b64decode(str(part.data))
_imgResult = await self.services.neutralization.processImageAsync(_imgBytes, f"part_{part.id}")
if _imgResult.get("status") == "ok":
neutralizedParts.append(part)
_img_bytes = _b64.b64decode(str(part.data))
_img_result = await svc.services.neutralization.processImageAsync(_img_bytes, f"part_{part.id}")
if _img_result.get("status") == "ok":
neutralized_parts.append(part)
else:
logger.warning(f"Fail-Safe: Image part {part.id} blocked (PII detected), SKIPPING")
except Exception as _imgErr:
logger.error(f"Fail-Safe: Image check failed for part {part.id}: {_imgErr}, SKIPPING")
logger.warning("Fail-Safe: Image part %s blocked (PII), SKIPPING", part.id)
except Exception as _img_err:
logger.error(f"Fail-Safe: Image check failed for part {part.id}: {_img_err}, SKIPPING")
elif part.data:
try:
self.services.chat.progressLogUpdate(
operationId,
0.3 + (i / len(chatDocuments)) * 0.6,
f"Neutralizing part {len(neutralizedParts) + 1} of document {i+1}"
svc.services.chat.progressLogUpdate(
operation_id,
prog,
f"Neutralizing part {len(neutralized_parts) + 1}",
)
neutralizationResult = await self.services.neutralization.processTextAsync(part.data)
if neutralizationResult and 'neutralized_text' in neutralizationResult:
neutralizedData = neutralizationResult['neutralized_text']
neutralizedPart = ContentPart(
neut_res = await svc.services.neutralization.processTextAsync(part.data)
if neut_res and "neutralized_text" in neut_res:
neutral_data = neut_res["neutralized_text"]
neutralized_parts.append(
ContentPart(
id=part.id,
parentId=part.parentId,
label=part.label,
typeGroup=part.typeGroup,
mimeType=part.mimeType,
data=neutralizedData,
metadata=part.metadata.copy() if part.metadata else {}
data=neutral_data,
metadata=part.metadata.copy() if part.metadata else {},
)
)
neutralizedParts.append(neutralizedPart)
else:
logger.warning(f"Fail-Safe: Neutralization incomplete for part {part.id}, SKIPPING (not passing original)")
logger.warning(
"Fail-Safe: Neutralization incomplete for part %s — SKIPPING (not passing original)",
part.id,
)
continue
except Exception as e:
logger.error(f"Fail-Safe: Error neutralizing part {part.id}, SKIPPING document (not passing original): {str(e)}")
logger.error(f"Fail-Safe: Error neutralizing part {part.id}: {str(e)}, SKIPPING")
continue
else:
neutralizedParts.append(part)
neutralized_parts.append(part)
# Create neutralized ContentExtracted object
neutralizedContentExtracted = ContentExtracted(
id=contentExtracted.id,
parts=neutralizedParts,
summary=contentExtracted.summary
return ContentExtracted(
id=content_extracted.id,
parts=neutralized_parts,
summary=content_extracted.summary,
)
# Create ActionDocument
originalFileName = getattr(chatDoc, 'fileName', f"document_{i+1}.json")
baseName = originalFileName.rsplit('.', 1)[0] if '.' in originalFileName else originalFileName
documentName = f"{baseName}_neutralized_{contentExtracted.id}.json"
async def neutralizeData(self, parameters: Dict[str, Any]) -> ActionResult:
operation_id = None
try:
workflow_id = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
operation_id = f"context_neutralize_{workflow_id}_{int(time.time())}"
neutralization_enabled = False
try:
config = self.services.neutralization.getConfig()
neutralization_enabled = config and config.enabled
except Exception as e:
logger.debug(f"Could not check neutralization config: {str(e)}")
if not neutralization_enabled:
logger.info("Neutralization is not enabled, returning documents unchanged")
document_list_param = parameters.get("documentList")
if not document_list_param:
return ActionResult.isFailure(error="documentList is required")
doc_list = coerceDocumentReferenceList(document_list_param)
if not doc_list.references:
return ActionResult.isFailure(error=f"documentList invalid (empty)")
chat_docs = self.services.chat.getChatDocumentsFromDocumentList(doc_list)
if not chat_docs:
return ActionResult.isFailure(error="No documents found in documentList")
action_documents = []
for chat_doc in chat_docs:
if hasattr(chat_doc, "documentData") and chat_doc.documentData:
action_documents.append(
ActionDocument(
documentName=getattr(chat_doc, "fileName", "unknown"),
documentData=chat_doc.documentData,
mimeType=getattr(chat_doc, "mimeType", "application/json"),
validationMetadata={
"actionType": "context.neutralizeData",
"neutralized": False,
"reason": "Neutralization disabled",
},
)
)
return ActionResult.isSuccess(documents=action_documents)
document_list_param = parameters.get("documentList")
if not document_list_param:
return ActionResult.isFailure(error="documentList is required")
doc_list = coerceDocumentReferenceList(document_list_param)
if not doc_list.references:
return ActionResult.isFailure(error=f"documentList invalid")
parent_operation_id = parameters.get("parentOperationId")
self.services.chat.progressLogStart(
operation_id,
"Neutralizing data from documents",
"Data Neutralization",
f"Documents: {len(doc_list.references)}",
parentOperationId=parent_operation_id,
)
self.services.chat.progressLogUpdate(operation_id, 0.2, "Loading documents")
chat_documents = self.services.chat.getChatDocumentsFromDocumentList(doc_list)
if not chat_documents:
self.services.chat.progressLogFinish(operation_id, False)
return ActionResult.isFailure(error="No documents found in documentList")
logger.info(f"Neutralizing data from {len(chat_documents)} document(s)")
self.services.chat.progressLogUpdate(operation_id, 0.3, "Processing documents")
action_documents = []
for i, chat_doc in enumerate(chat_documents):
try:
dd = getattr(chat_doc, "documentData", None)
if not dd:
logger.warning(f"Document {i + 1} has no documentData, skipping")
continue
fn = str(getattr(chat_doc, "fileName", "") or "")
mime_guess = str(getattr(chat_doc, "mimeType", "") or "").lower()
if (
mime_guess.startswith("image/")
and fn.startswith("extract_media_")
and not (isinstance(dd, dict) and dd.get("kind") == HANDOVER_KIND)
):
action_documents.append(
ActionDocument(
documentName=fn or f"media_{i + 1}",
documentData=dd,
mimeType=mime_guess or "application/octet-stream",
validationMetadata={
"actionType": "context.neutralizeData",
"neutralized": False,
"reason": "extractContent_media_sidecar_pass_through",
},
)
)
continue
# --- Unified JSON envelope from context.extractContent (v1) ---
if isinstance(dd, dict) and dd.get("kind") == HANDOVER_KIND:
bundle = dict(dd)
files_section = dd.get("files") or {}
new_files = {}
for fk, bucket in files_section.items():
if not isinstance(bucket, dict):
continue
parts_raw = bucket.get("parts") or []
parsed_parts = []
for pd in parts_raw:
parsed_parts.append(ContentPart(**pd) if isinstance(pd, dict) else pd)
summary = bucket.get("summary") or {}
if hasattr(summary, "model_dump"):
summary = summary.model_dump(mode="json")
ce = ContentExtracted(
id=str(bucket.get("extractedId") or ""),
parts=parsed_parts,
summary=summary if isinstance(summary, dict) else {},
)
ce_out = await _neutralize_one_content_extracted(
svc=self,
content_extracted=ce,
operation_id=operation_id,
chat_doc_slot=i,
chat_documents_len=max(len(chat_documents), 1),
)
new_files[fk] = _one_file_bucket(ce_out, str(bucket.get("sourceFileName") or fk))
bundle["files"] = new_files
original_filename = getattr(chat_doc, "fileName", f"neutralized_bundle_{workflow_id}.json")
bn = original_filename.rsplit(".", 1)[0] if "." in original_filename else original_filename
action_documents.append(
ActionDocument(
documentName=f"{bn}_neutralized.json",
documentData=bundle,
mimeType="application/json",
validationMetadata={
"actionType": "context.neutralizeData",
"neutralized": True,
"handoverKind": HANDOVER_KIND,
"bundleFileCount": len(new_files),
},
)
)
continue
# --- Legacy ContentExtracted per persisted document ---
if isinstance(dd, ContentExtracted):
content_extracted = dd
elif isinstance(dd, dict):
try:
content_extracted = ContentExtracted(**dd)
except Exception:
logger.warning(f"Document {i + 1} documentData cannot be parsed as ContentExtracted dict")
continue
else:
logger.warning(f"Document {i + 1} documentData is not supported")
continue
neut_out = await _neutralize_one_content_extracted(
svc=self,
content_extracted=content_extracted,
operation_id=operation_id,
chat_doc_slot=i,
chat_documents_len=max(len(chat_documents), 1),
)
original_file_name = getattr(chat_doc, "fileName", f"document_{i + 1}.json")
base_name = original_file_name.rsplit(".", 1)[0] if "." in original_file_name else original_file_name
document_name = f"{base_name}_neutralized_{neut_out.id}.json"
action_documents.append(
ActionDocument(
documentName=document_name,
documentData=neut_out,
mimeType="application/json",
validationMetadata={
"actionType": "context.neutralizeData",
"documentIndex": i,
"extractedId": contentExtracted.id,
"partCount": len(neutralizedParts),
"extractedId": neut_out.id,
"partCount": len(neut_out.parts),
"neutralized": True,
"originalFileName": originalFileName
}
actionDoc = ActionDocument(
documentName=documentName,
documentData=neutralizedContentExtracted,
mimeType="application/json",
validationMetadata=validationMetadata
"originalFileName": original_file_name,
},
)
)
actionDocuments.append(actionDoc)
except Exception as e:
logger.error(f"Error processing document {i + 1}: {str(e)}")
# Continue with other documents
continue
if not actionDocuments:
self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error="No valid ContentExtracted documents found to neutralize")
if not action_documents:
self.services.chat.progressLogFinish(operation_id, False)
return ActionResult.isFailure(error="No valid documents found to neutralize")
self.services.chat.progressLogFinish(operationId, True)
return ActionResult.isSuccess(documents=actionDocuments)
self.services.chat.progressLogFinish(operation_id, True)
return ActionResult.isSuccess(documents=action_documents)
except Exception as e:
logger.error(f"Error in data neutralization: {str(e)}")
try:
if operationId:
self.services.chat.progressLogFinish(operationId, False)
if operation_id:
self.services.chat.progressLogFinish(operation_id, False)
except Exception:
pass

View file

@ -52,7 +52,14 @@ class MethodContext(MethodBase):
),
"extractContent": WorkflowActionDefinition(
actionId="context.extractContent",
description="Extract raw content parts from documents without AI processing. Returns ContentParts with different typeGroups (text, image, table, structure, container). Images are returned as base64 data, not as extracted text. Text content is extracted from text-based formats (PDF text layers, Word docs, etc.) but NOT from images (no OCR). Use this action to prepare documents for subsequent AI processing actions.",
description=(
"Extract document content without AI. Unified handover: (1) `documents[0]` "
"JSON `context.extractContent.handover.v1` with text in `parts` and image placeholders "
"linking to sibling blobs via `handoverMediaDocumentName`; "
"(2) each extracted image as a separate binary document (`extract_media_*`); "
"(3) `data.response` / top-level `response` after normalization — concatenated plain text "
"for prompts and file.create. Pick `response`, a specific document, or deep JSON paths."
),
dynamicMode=True,
outputType="UdmDocument",
parameters={
@ -61,15 +68,8 @@ class MethodContext(MethodBase):
type="DocumentList",
frontendType=FrontendType.DOCUMENT_REFERENCE,
required=True,
description="Document reference(s) to extract content from"
description="Document reference(s) to extract content from",
),
"extractionOptions": WorkflowActionParameter(
name="extractionOptions",
type="Dict[str,Any]",
frontendType=FrontendType.JSON,
required=False,
description="Extraction options (if not provided, defaults are used). Note: This action does NOT use AI - it performs pure content extraction. Images are preserved as base64 data, not converted to text."
)
},
execute=extractContent.__get__(self, self.__class__)
),

View file

@ -1,18 +1,25 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
from typing import Dict, Any, Optional
from typing import Any, Dict, List, Optional
import base64
import binascii
import io
import json
import logging
import re
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
from modules.serviceCenter.services.serviceGeneration.subDocumentUtility import markdownToDocumentJson
from modules.shared.i18nRegistry import normalizePrimaryLanguageTag
from modules.workflows.automation2.executors.actionNodeExecutor import _coerce_document_data_to_bytes
from modules.workflows.methods.methodAi._common import is_image_action_document_list, serialize_context
logger = logging.getLogger(__name__)
_SAFE_FILENAME = re.compile(r'[^\w\-.\(\)\s\[\]%@+]')
def _persistDocumentsToUserFiles(
action_documents: list,
@ -88,13 +95,155 @@ def _persistDocumentsToUserFiles(
logger.warning("file.create: failed to persist document %s: %s", dname, e)
def _sanitize_output_stem(title: str) -> str:
t = (title or "").strip() or "Document"
stem = _SAFE_FILENAME.sub("_", t).strip("._")
return stem[:120] if stem else "Document"
def _get_management_interface(services) -> Optional[Any]:
mgmt = getattr(services, "interfaceDbComponent", None)
if mgmt:
return mgmt
try:
import modules.interfaces.interfaceDbManagement as iface
user = getattr(services, "user", None)
if not user:
return None
return iface.getInterface(
user,
mandateId=getattr(services, "mandateId", None) or "",
featureInstanceId=getattr(services, "featureInstanceId", None) or "",
)
except Exception as e:
logger.warning("file.create: could not get management interface: %s", e)
return None
def _load_image_bytes_from_action_doc(doc: dict, services) -> Optional[bytes]:
raw = doc.get("documentData")
blob = _coerce_document_data_to_bytes(raw)
if blob:
return blob
fid = doc.get("fileId")
if not fid and isinstance(doc.get("validationMetadata"), dict):
fid = (doc.get("validationMetadata") or {}).get("fileId")
if fid and str(fid).strip():
mgmt = _get_management_interface(services)
if mgmt and hasattr(mgmt, "getFileData"):
try:
return mgmt.getFileData(str(fid))
except Exception as e:
logger.warning("file.create: getFileData(%s) failed: %s", fid, e)
return None
def _images_list_to_pdf(image_bytes_list: List[bytes]) -> bytes:
"""One PDF page per image; embedded raster data via PyMuPDF."""
import fitz
pdf = fitz.open()
try:
for blob in image_bytes_list:
page = pdf.new_page()
page.insert_image(page.rect, stream=blob, keep_proportion=True)
return pdf.tobytes()
finally:
pdf.close()
def _images_list_to_docx(image_bytes_list: List[bytes]) -> bytes:
"""Images embedded in the document package (inline shapes), not hyperlinks."""
from docx import Document
from docx.shared import Inches
doc = Document()
for blob in image_bytes_list:
p = doc.add_paragraph()
run = p.add_run()
run.add_picture(io.BytesIO(blob), width=Inches(6.5))
doc.add_paragraph()
out = io.BytesIO()
doc.save(out)
return out.getvalue()
async def _create_merged_image_documents(
self,
parameters: Dict[str, Any],
image_docs: List[dict],
) -> ActionResult:
"""Build one PDF or DOCX containing all extracted images (``imageDocumentsOnly``)."""
output_format = (parameters.get("outputFormat") or "docx").strip().lower().lstrip(".")
title = (parameters.get("title") or "Document").strip()
stem = _sanitize_output_stem(title)
folder_id: Optional[str] = None
raw_folder = parameters.get("folderId")
if raw_folder is not None and str(raw_folder).strip():
folder_id = str(raw_folder).strip()
if output_format not in ("pdf", "docx"):
return ActionResult.isFailure(
error=(
f"Nur-Bilder-Kontext: „{output_format}“ wird nicht unterstützt. "
"Bitte Ausgabeformat „pdf“ oder „docx“ wählen."
)
)
blobs: List[bytes] = []
for d in image_docs:
b = _load_image_bytes_from_action_doc(d, self.services)
if not b:
name = d.get("documentName") or "?"
return ActionResult.isFailure(
error=f"Bilddaten fehlen oder sind nicht lesbar (Datei: {name})."
)
blobs.append(b)
if output_format == "pdf":
try:
combined = _images_list_to_pdf(blobs)
except Exception as e:
logger.warning("file.create: PDF merge failed: %s", e, exc_info=True)
return ActionResult.isFailure(error=f"PDF aus Bildern konnte nicht erzeugt werden: {e}")
out_name = f"{stem}.pdf"
mime = "application/pdf"
else:
combined = _images_list_to_docx(blobs)
out_name = f"{stem}.docx"
mime = "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
if not combined:
return ActionResult.isFailure(error="Zusammenfügen der Bilder ergab leere Ausgabe")
doc_b64 = base64.b64encode(combined).decode("ascii")
action_documents = [
ActionDocument(
documentName=out_name,
documentData=doc_b64,
mimeType=mime,
validationMetadata={
"actionType": "file.create",
"outputFormat": output_format,
"source": "mergedImageDocumentsOnly",
},
)
]
_persistDocumentsToUserFiles(action_documents, self.services, folder_id=folder_id)
return ActionResult.isSuccess(documents=action_documents)
async def create(self, parameters: Dict[str, Any]) -> ActionResult:
"""
Create a file from context (text/markdown from upstream AI node).
Uses GenerationService.renderReport to produce docx, pdf, txt, md, html, xlsx, etc.
"""
from modules.workflows.methods.methodAi._common import serialize_context
raw_context = parameters.get("context", "") or parameters.get("text", "") or ""
if isinstance(raw_context, list) and is_image_action_document_list(raw_context):
return await _create_merged_image_documents(self, parameters, raw_context)
context = serialize_context(raw_context)
if not context:

View file

@ -0,0 +1,63 @@
# Unit tests: unified extractContent handover (text vs image sidecars).
import base64
from modules.workflows.methods.methodContext.actions import extractContent as ec
def test_joined_text_from_handover_orders_text_parts_only():
payload = {
"kind": ec.HANDOVER_KIND,
"fileOrder": ["f1"],
"files": {
"f1": {
"parts": [
{"typeGroup": "text", "data": " A\n", "id": "x"},
{"typeGroup": "container", "data": "", "id": "c"},
{"typeGroup": "text", "data": "B", "id": "y"},
]
}
},
}
assert ec._joined_text_from_handover_payload(payload) == "A\n\nB"
def test_split_images_moves_pixels_to_blob_docs():
raw = b"fake-binary-image"
b64 = base64.b64encode(raw).decode("ascii")
payload = {
"kind": ec.HANDOVER_KIND,
"schemaVersion": 1,
"fileOrder": ["f1"],
"files": {
"f1": {
"parts": [
{"typeGroup": "text", "data": "x", "id": "t1"},
{
"typeGroup": "image",
"mimeType": "image/png",
"data": b64,
"id": "p1-img",
"metadata": {},
},
]
}
},
}
stripped, blobs = ec._split_images_to_sidecar_documents(payload, document_name_stem="abc")
assert len(blobs) == 1
assert blobs[0].mimeType == "image/png"
assert blobs[0].documentData == raw
assert blobs[0].documentName.endswith(".png")
assert blobs[0].documentName.startswith("extract_media_")
meta = blobs[0].validationMetadata or {}
assert meta.get("handoverRole") == "extractedMedia"
img_parts = [
p
for p in stripped["files"]["f1"]["parts"]
if isinstance(p, dict) and (p.get("typeGroup") or "") == "image"
]
assert len(img_parts) == 1
assert img_parts[0]["data"] == ""
assert img_parts[0]["handoverMediaDocumentName"] == blobs[0].documentName
assert "image" in stripped["files"]["f1"]["byTypeGroup"]

View file

@ -21,8 +21,11 @@ def test_context_extractContent_node_shape():
assert node["meta"]["usesAi"] is False
assert node["_method"] == "context"
assert node["_action"] == "extractContent"
assert node["outputPorts"][0]["schema"] == "UdmDocument"
assert node["outputPorts"][0]["schema"] == "ActionResult"
assert "DocumentList" in node["inputPorts"][0]["accepts"]
assert "LoopItem" in node["inputPorts"][0]["accepts"]
names = [p["name"] for p in node["parameters"]]
assert names == ["documentList"]
def test_udm_port_types_registered():