gateway/modules/workflows/methods/methodContext/methodContext.py

266 lines
13 KiB
Python

# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
import logging
from modules.workflows.methods.methodBase import MethodBase
from modules.datamodels.datamodelWorkflowActions import WorkflowActionDefinition, WorkflowActionParameter
from modules.shared.frontendTypes import FrontendType
# Import helpers
from .helpers.documentIndex import DocumentIndexHelper
from .helpers.formatting import FormattingHelper
# Import actions
from .actions.getDocumentIndex import getDocumentIndex
from .actions.extractContent import extractContent
from .actions.neutralizeData import neutralizeData
from .actions.triggerPreprocessingServer import triggerPreprocessingServer
from .actions.setContext import setContext
from .actions.mergeContext import mergeContext
from .actions.filterContext import filterContext
from .actions.transformContext import transformContext
logger = logging.getLogger(__name__)
class MethodContext(MethodBase):
"""Context and workflow information methods."""
def __init__(self, services):
super().__init__(services)
self.name = "context"
self.description = "Context and workflow information methods"
# Initialize helper modules
self.documentIndex = DocumentIndexHelper(self)
self.formatting = FormattingHelper(self)
# RBAC-Integration: Action-Definitionen mit actionId
self._actions = {
"getDocumentIndex": WorkflowActionDefinition(
actionId="context.getDocumentIndex",
description="Generate a comprehensive index of all documents available in the current workflow",
dynamicMode=True,
outputType="DocumentList",
parameters={
"resultType": WorkflowActionParameter(
name="resultType",
type="str",
frontendType=FrontendType.SELECT,
frontendOptions=["json", "txt", "md"],
required=False,
default="json",
description="Output format"
)
},
execute=getDocumentIndex.__get__(self, self.__class__)
),
"extractContent": WorkflowActionDefinition(
actionId="context.extractContent",
description=(
"Extract document content without AI. Unified handover: (1) `documents[0]` "
"JSON `context.extractContent.handover.v1` with text in `parts` and image placeholders "
"linking to sibling blobs via `handoverMediaDocumentName`; "
"(2) each extracted image as a separate binary document (`extract_media_*`); "
"(3) `data.response` / top-level `response` after normalization — concatenated plain text "
"for prompts and file.create. Pick `response`, a specific document, or deep JSON paths."
),
dynamicMode=True,
outputType="UdmDocument",
parameters={
"documentList": WorkflowActionParameter(
name="documentList",
type="DocumentList",
frontendType=FrontendType.DOCUMENT_REFERENCE,
required=True,
description="Document reference(s) to extract content from",
),
},
execute=extractContent.__get__(self, self.__class__)
),
"neutralizeData": WorkflowActionDefinition(
actionId="context.neutralizeData",
description="Neutralize extracted data from ContentExtracted documents (for use after extractContent)",
outputType="DocumentList",
parameters={
"documentList": WorkflowActionParameter(
name="documentList",
type="DocumentList",
frontendType=FrontendType.DOCUMENT_REFERENCE,
required=True,
description="Document reference(s) containing ContentExtracted objects to neutralize"
)
},
execute=neutralizeData.__get__(self, self.__class__)
),
"triggerPreprocessingServer": WorkflowActionDefinition(
actionId="context.triggerPreprocessingServer",
description="Trigger preprocessing server at customer tenant to update database with configuration",
outputType="ActionResult",
parameters={
"endpoint": WorkflowActionParameter(
name="endpoint",
type="str",
frontendType=FrontendType.TEXT,
required=True,
description="The full URL endpoint for the preprocessing server API"
),
"configJson": WorkflowActionParameter(
name="configJson",
type="str",
frontendType=FrontendType.JSON,
required=True,
description="Configuration JSON object to send to the preprocessing server. Can be provided as a dict or as a JSON string"
),
"authSecretConfigKey": WorkflowActionParameter(
name="authSecretConfigKey",
type="str",
frontendType=FrontendType.TEXT,
required=True,
description="The APP_CONFIG key name to retrieve the authorization secret from"
)
},
execute=triggerPreprocessingServer.__get__(self, self.__class__)
),
"setContext": WorkflowActionDefinition(
actionId="context.setContext",
description=(
"Set workflow context: list of assignments with target key, then upstream picker, "
"fixed literal, or human task per row."
),
outputType="Transit",
parameters={
"scope": WorkflowActionParameter(
name="scope", type="str", required=False,
frontendType=FrontendType.SELECT,
frontendOptions=["local", "global", "session"],
default="local",
description="Storage scope for keys written by this node",
),
"assignments": WorkflowActionParameter(
name="assignments", type="list", required=True,
frontendType=FrontendType.CONTEXT_ASSIGNMENTS,
default=[],
description=(
"List of rows: contextKey, valueSource (pickUpstream | literal | humanTask), "
"upstreamRef, literal, sourcePath, mode, valueType, task fields."
),
),
},
execute=setContext.__get__(self, self.__class__),
),
"mergeContext": WorkflowActionDefinition(
actionId="context.mergeContext",
description=(
"Merge data arriving from multiple parallel branches into a single "
"MergeResult. Strategies: shallow, deep, firstWins, lastWins, "
"errorOnConflict. The execution engine waits for all connected "
"predecessors before invoking this action (waitsForAllPredecessors=True)."
),
outputType="MergeResult",
parameters={
"strategy": WorkflowActionParameter(
name="strategy", type="str", required=False,
frontendType=FrontendType.SELECT,
frontendOptions=["shallow", "deep", "firstWins", "lastWins", "errorOnConflict"],
default="deep",
description="Conflict resolution strategy for keys present in several branches",
),
"waitFor": WorkflowActionParameter(
name="waitFor", type="int", required=False,
frontendType=FrontendType.NUMBER,
default=0,
description="Number of branches to consume (0 = all). Used together with timeoutMs.",
),
"timeoutMs": WorkflowActionParameter(
name="timeoutMs", type="int", required=False,
frontendType=FrontendType.NUMBER,
default=30000,
description="Maximum wait time in milliseconds before continuing with available inputs",
),
},
execute=mergeContext.__get__(self, self.__class__),
),
"filterContext": WorkflowActionDefinition(
actionId="context.filterContext",
description=(
"Allow- or block-list keys/paths from the upstream payload. "
"Supports glob patterns (user.*, *.id) and dotted paths (address.city). "
"Missing-key behaviour is configurable (skip, nullFill, error)."
),
outputType="Transit",
parameters={
"mode": WorkflowActionParameter(
name="mode", type="str", required=False,
frontendType=FrontendType.SELECT,
frontendOptions=["allow", "block"],
default="allow",
description="allow = only these keys pass; block = these keys are removed",
),
"keys": WorkflowActionParameter(
name="keys", type="list", required=True,
frontendType=FrontendType.JSON,
default=[],
description="Key paths or glob patterns",
),
"missingKeyBehavior": WorkflowActionParameter(
name="missingKeyBehavior", type="str", required=False,
frontendType=FrontendType.SELECT,
frontendOptions=["skip", "nullFill", "error"],
default="skip",
description="What to do when an allowed key is missing in the input",
),
"preserveMeta": WorkflowActionParameter(
name="preserveMeta", type="bool", required=False,
frontendType=FrontendType.CHECKBOX,
default=True,
description="Always pass through internal meta fields (_success, _error, _transit)",
),
},
execute=filterContext.__get__(self, self.__class__),
),
"transformContext": WorkflowActionDefinition(
actionId="context.transformContext",
description=(
"Transform the upstream payload via a list of {sourceField, outputField, "
"operation, type, expression} mappings. Operations: rename, cast, nest, "
"flatten, compute. compute uses {{...}} templates; nesting is implicit "
"via dotted outputField paths."
),
outputType="Transit",
parameters={
"mappings": WorkflowActionParameter(
name="mappings", type="list", required=True,
frontendType=FrontendType.MAPPING_TABLE,
default=[],
description="List of mapping entries",
),
"passthroughUnmapped": WorkflowActionParameter(
name="passthroughUnmapped", type="bool", required=False,
frontendType=FrontendType.CHECKBOX,
default=False,
description="Forward fields of the upstream payload that no mapping consumed",
),
"flattenDepth": WorkflowActionParameter(
name="flattenDepth", type="int", required=False,
frontendType=FrontendType.NUMBER,
default=1,
description="Depth for flatten operation (1 = one level, -1 = full)",
),
},
execute=transformContext.__get__(self, self.__class__),
),
}
# Validate actions after definition
self._validateActions()
# Register actions as methods (optional, für direkten Zugriff)
self.getDocumentIndex = getDocumentIndex.__get__(self, self.__class__)
self.extractContent = extractContent.__get__(self, self.__class__)
self.neutralizeData = neutralizeData.__get__(self, self.__class__)
self.triggerPreprocessingServer = triggerPreprocessingServer.__get__(self, self.__class__)
self.setContext = setContext.__get__(self, self.__class__)
self.mergeContext = mergeContext.__get__(self, self.__class__)
self.filterContext = filterContext.__get__(self, self.__class__)
self.transformContext = transformContext.__get__(self, self.__class__)