platform-core/modules/workflows/automation2/executionEngine.py
ValueOn AG bc7c6fe27c
Some checks failed
Deploy Plattform-Core (Int) / test (push) Failing after 13s
Deploy Plattform-Core (Int) / deploy (push) Has been skipped
elimination of technical issues (imports)
2026-06-06 00:32:45 +02:00

1602 lines
74 KiB
Python

# Copyright (c) 2025 Patrick Motsch
# Main execution engine for automation2 graphs.
import asyncio
import json
import logging
import time
import uuid
from datetime import datetime, timezone
from typing import Dict, Any, List, Set, Optional
from modules.workflows.automation2.graphUtils import (
parseGraph,
buildConnectionMap,
validateGraph,
topoSort,
getInputSources,
getLoopBodyNodeIds,
getLoopDoneNodeIds,
getLoopPrimaryInputSource,
)
from modules.workflows.automation2.executors import (
TriggerExecutor,
FlowExecutor,
ActionNodeExecutor,
InputExecutor,
DataExecutor,
PauseForHumanTaskError,
PauseForEmailWaitError,
)
from modules.features.graphicalEditor.portTypes import normalizeToSchema, wrapTransit, unwrapTransit
from modules.features.graphicalEditor.nodeDefinitions import STATIC_NODE_TYPES
from modules.shared.serviceExceptions import SubscriptionInactiveException as _SubscriptionInactiveException, BillingContextError as _BillingContextError
from modules.workflows.automation2.graphicalEditorRunFileLogger import (
GraphicalEditorRunFileLogger,
graphical_editor_run_file_logging_enabled,
merge_run_context_with_ge_log_prefix,
)
from modules.workflows.automation2.runEnvelope import normalize_run_envelope
logger = logging.getLogger(__name__)
_NODE_DEF_BY_ID: Dict[str, dict] = {}
# Registry of currently executing runs: runId -> context dict.
# Used by requestRunStop() to set context["_stopped"] = True.
_activeRunContexts: Dict[str, Dict[str, Any]] = {}
def requestRunStop(runId: str) -> bool:
"""Request a running workflow to stop at the next node boundary.
Returns True if the run was found and flagged, False otherwise.
"""
ctx = _activeRunContexts.get(runId)
if ctx is not None:
ctx["_stopped"] = True
logger.info("requestRunStop: flagged runId=%s for stop", runId)
return True
logger.warning("requestRunStop: runId=%s not found in active runs", runId)
return False
def getActiveRunIds() -> list:
"""Return list of currently executing run IDs."""
return list(_activeRunContexts.keys())
def _getNodeDef(nodeType: str) -> Optional[dict]:
"""Lookup static node definition by type id (cached)."""
if not _NODE_DEF_BY_ID:
for nd in STATIC_NODE_TYPES:
_NODE_DEF_BY_ID[nd["id"]] = nd
return _NODE_DEF_BY_ID.get(nodeType)
def _outputSchemaForNode(nodeType: str) -> Optional[str]:
"""Return the output port schema name for a node type (port 0), or None."""
nd = _getNodeDef(nodeType)
if not nd:
return None
ports = nd.get("outputPorts")
if isinstance(ports, dict):
p0 = ports.get(0) or ports.get("0")
if isinstance(p0, dict):
spec = p0.get("schema")
if isinstance(spec, dict) and spec.get("kind") == "fromGraph":
# Read override from the port definition — ``FormPayload`` is the
# fallback for true form nodes; dynamic context nodes (e.g.
# context.transformContext) declare ``fromGraphResultSchema`` to
# avoid wrong normalization.
return p0.get("fromGraphResultSchema") or "FormPayload"
if isinstance(spec, str):
return spec
return None
def _isBarrierNode(nodeType: str) -> bool:
"""Barrier nodes wait for all connected predecessors before executing.
Backwards compatible: ``flow.merge`` is always a barrier. Any other node may
declare ``waitsForAllPredecessors: True`` in its STATIC_NODE_TYPES entry.
Note: ``context.mergeContext`` is NOT a barrier — it receives its list of
inputs via the ``dataSource`` DataRef parameter (typically ``loop.bodyResults``)
and executes once its single upstream edge is satisfied.
"""
if nodeType == "flow.merge":
return True
for nd in STATIC_NODE_TYPES:
if nd.get("id") == nodeType:
return bool(nd.get("waitsForAllPredecessors"))
return False
def _allMergePredecessorsReady(
nodeId: str,
connectionMap: Dict[str, List],
nodeOutputs: Dict[str, Any],
) -> bool:
"""For barrier nodes: check that every connected predecessor has produced output or was skipped."""
for src, _, _ in connectionMap.get(nodeId, []):
if src not in nodeOutputs:
return False
return True
def _normalizeResult(result: Any, nodeType: str) -> Any:
"""Apply normalizeToSchema if the node has a declared output schema."""
schema = _outputSchemaForNode(nodeType)
if schema and schema != "Transit" and isinstance(result, dict):
try:
return normalizeToSchema(result, schema)
except Exception as e:
logger.warning(f"_normalizeResult failed for nodeType={nodeType}, schema={schema}: {e}")
return result
def _getNodeTypeIds(services: Any = None) -> Set[str]:
"""Collect all known node type IDs from static definitions."""
return {n["id"] for n in STATIC_NODE_TYPES}
def _is_node_on_active_path(
nodeId: str,
connectionMap: Dict[str, List],
nodeOutputs: Dict[str, Any],
) -> bool:
"""
Return True if this node receives input only from active branches.
Transit envelopes: routing metadata is in out["_meta"] (branch/match).
Legacy format: branch/match directly on out.
"""
for src, source_output, _ in connectionMap.get(nodeId, []):
out = nodeOutputs.get(src)
if out is None:
return False
if not isinstance(out, dict):
continue
# Transit envelope: metadata in _meta
meta = out.get("_meta", {}) if out.get("_transit") else out
branch = meta.get("branch")
match = meta.get("match")
matches = meta.get("matches")
active_output = None
if branch is not None:
active_output = branch
elif isinstance(matches, list) and matches:
if source_output not in matches:
return False
continue
elif match is not None:
if match < 0:
return False
active_output = match
if active_output is not None and source_output != active_output:
return False
return True
def _getExecutor(
nodeType: str,
services: Any,
automation2_interface: Optional[Any] = None,
) -> Any:
"""Dispatch to correct executor based on node type."""
if nodeType.startswith("trigger."):
return TriggerExecutor()
if nodeType.startswith("flow."):
return FlowExecutor()
if nodeType.startswith("data."):
return DataExecutor()
if (nodeType.startswith("ai.") or nodeType.startswith("email.")
or nodeType.startswith("sharepoint.") or nodeType.startswith("clickup.")
or nodeType.startswith("file.") or nodeType.startswith("trustee.")
or nodeType.startswith("context.")):
return ActionNodeExecutor(services)
if nodeType.startswith("input.") and automation2_interface:
return InputExecutor(automation2_interface)
return None
_stepMeta: Dict[str, Dict[str, str]] = {}
def _stripBinaryValues(obj: Any, depth: int = 0) -> Any:
"""Recursively replace bytes values with None to keep data JSON-safe for DB storage."""
if depth > 12:
return obj
if isinstance(obj, bytes):
return None
if isinstance(obj, dict):
return {k: _stripBinaryValues(v, depth + 1) for k, v in obj.items()}
if isinstance(obj, (list, tuple)):
return [_stripBinaryValues(v, depth + 1) for v in obj]
return obj
def _serializableOutputs(nodeOutputs: Dict[str, Any]) -> Dict[str, Any]:
"""Return a JSON-safe copy of nodeOutputs: strip _context and binary data."""
cleaned = {k: v for k, v in nodeOutputs.items() if k != "_context"}
return _stripBinaryValues(cleaned)
def _merge_node_parameters_into_snap(
snap: Optional[Dict[str, Any]],
*,
node_id: Optional[str],
context: Optional[Dict[str, Any]],
) -> Dict[str, Any]:
"""Copy wire snapshot and attach **nodeParameters** from the graph definition (by ``node_id``).
Uses ``context['graphNodesById']`` populated at executeGraph start — stable even when
per-step node dict references differ. Field name is ``nodeParameters`` (no leading
underscore) so it survives consumers that hide ``_*`` keys."""
merged: Dict[str, Any] = dict(snap or {})
if not node_id or not isinstance(context, dict):
return merged
cmap = context.get("graphNodesById")
if not isinstance(cmap, dict):
return merged
gnode = cmap.get(node_id)
if not isinstance(gnode, dict):
return merged
merged["nodeParameters"] = dict(gnode.get("parameters") or {})
return merged
def _emitStepEvent(runId: str, stepData: Dict[str, Any]) -> None:
"""Emit a step-log SSE event to any listening client for this run."""
try:
from modules.serviceCenter.core.serviceStreaming.eventManager import get_event_manager
em = get_event_manager()
queueId = f"run-trace-{runId}"
if not em.has_queue(queueId):
return
loop = asyncio.get_event_loop()
if loop.is_running():
asyncio.ensure_future(em.emit_event(queueId, "step", stepData, event_category="tracing"))
except Exception as e:
logger.warning(f"_emitStepEvent failed for runId={runId}: {e}")
def _createStepLog(iface, runId: str, nodeId: str, nodeType: str, status: str = "running", inputSnapshot: Dict = None) -> Optional[str]:
"""Create an AutoStepLog entry. Returns the step log ID or None if interface unavailable."""
if not iface or not runId:
return None
try:
from modules.features.graphicalEditor.datamodelFeatureGraphicalEditor import AutoStepLog
stepId = str(uuid.uuid4())
startedAt = time.time()
iface.db.recordCreate(AutoStepLog, {
"id": stepId,
"runId": runId,
"nodeId": nodeId,
"nodeType": nodeType,
"status": status,
"inputSnapshot": _stripBinaryValues(inputSnapshot) if inputSnapshot else {},
"startedAt": startedAt,
})
_stepMeta[stepId] = {"runId": runId, "nodeId": nodeId, "nodeType": nodeType}
_emitStepEvent(runId, {
"id": stepId, "runId": runId, "nodeId": nodeId, "nodeType": nodeType,
"status": status, "startedAt": startedAt,
})
return stepId
except Exception as e:
logger.debug("Could not create AutoStepLog: %s", e)
return None
def _updateStepLog(iface, stepId: str, status: str, output: Dict = None, error: str = None,
durationMs: int = None, tokensUsed: int = 0, retryCount: int = 0) -> None:
"""Update an AutoStepLog entry with results."""
if not iface or not stepId:
return
try:
from modules.features.graphicalEditor.datamodelFeatureGraphicalEditor import AutoStepLog
completedAt = time.time()
updates: Dict[str, Any] = {
"status": status,
"completedAt": completedAt,
}
if output is not None:
updates["output"] = _stripBinaryValues(output)
if error is not None:
updates["error"] = error
if durationMs is not None:
updates["durationMs"] = durationMs
if tokensUsed:
updates["tokensUsed"] = tokensUsed
if retryCount:
updates["retryCount"] = retryCount
iface.db.recordModify(AutoStepLog, stepId, updates)
meta = _stepMeta.pop(stepId, None)
if meta:
_emitStepEvent(meta["runId"], {
"id": stepId, "runId": meta["runId"], "nodeId": meta["nodeId"],
"nodeType": meta["nodeType"], "status": status,
"completedAt": completedAt, "durationMs": durationMs,
"error": error, "tokensUsed": tokensUsed, "retryCount": retryCount,
})
except Exception as e:
logger.debug("Could not update AutoStepLog %s: %s", stepId, e)
def _ge_iso_timestamp() -> str:
"""UTC timestamp for NDJSON logs (readable, milliseconds)."""
return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z"
async def _ge_log_node_finished(
file_logger: Optional[GraphicalEditorRunFileLogger],
*,
run_id: Optional[str],
node_outputs: Dict[str, Any],
run_envelope: Optional[Dict[str, Any]],
node_id: str,
node_type: str,
status: str,
input_snap: Optional[Dict[str, Any]],
output: Any = None,
error: Optional[str] = None,
duration_ms: Optional[int] = None,
retry_count: Optional[int] = None,
skip_reason: Optional[str] = None,
loop_index: Optional[int] = None,
loop_node_id: Optional[str] = None,
loop_item: Optional[Any] = None,
exec_context: Optional[Dict[str, Any]] = None,
) -> None:
"""Append one execution line + one workflow-context snapshot (NDJSON)."""
if file_logger is None or not run_id:
return
ts = _ge_iso_timestamp()
snap = _merge_node_parameters_into_snap(input_snap, node_id=node_id, context=exec_context)
exec_rec: Dict[str, Any] = {
"timestamp": ts,
"runId": run_id,
"nodeId": node_id,
"nodeType": node_type,
"status": status,
"input": _stripBinaryValues(snap),
}
if skip_reason:
exec_rec["skipReason"] = skip_reason
if duration_ms is not None:
exec_rec["durationMs"] = duration_ms
if retry_count is not None:
exec_rec["retryCount"] = retry_count
if loop_index is not None:
exec_rec["loopIndex"] = loop_index
if loop_node_id is not None:
exec_rec["loopNodeId"] = loop_node_id
if loop_item is not None:
exec_rec["loopItem"] = _stripBinaryValues(loop_item)
if error is not None:
exec_rec["error"] = error
if output is not None:
exec_rec["output"] = (
_stripBinaryValues(output) if isinstance(output, dict) else {"value": _stripBinaryValues(output)}
)
await file_logger.append_node_execution_line(exec_rec)
ctx_rec: Dict[str, Any] = {
"timestamp": ts,
"runId": run_id,
"afterNodeId": node_id,
"afterNodeType": node_type,
"afterStatus": status,
"nodeOutputsSnapshot": _serializableOutputs(node_outputs),
"runEnvelope": _stripBinaryValues(dict(run_envelope or {})),
}
if loop_index is not None:
ctx_rec["loopIndex"] = loop_index
if loop_node_id is not None:
ctx_rec["loopNodeId"] = loop_node_id
await file_logger.append_context_snapshot_line(ctx_rec)
async def _executeWithRetry(executor, node, context, maxRetries: int = 0, retryDelaySeconds: float = 1.0):
"""Execute a node with optional retry policy from node parameters."""
params = node.get("parameters") or {}
retries = params.get("retryMaxAttempts", maxRetries)
delay = params.get("retryDelaySeconds", retryDelaySeconds)
attempt = 0
lastError = None
while attempt <= retries:
try:
result = await executor.execute(node, context)
return result, attempt
except (PauseForHumanTaskError, PauseForEmailWaitError, _SubscriptionInactiveException, _BillingContextError):
raise
except Exception as e:
lastError = e
attempt += 1
if attempt <= retries:
logger.warning(
"Node %s failed (attempt %d/%d), retrying in %.1fs: %s",
node.get("id"), attempt, retries + 1, delay, e,
)
await asyncio.sleep(delay)
delay = min(delay * 2, 60)
else:
raise lastError
raise lastError
def _substituteFeatureInstancePlaceholders(
graph: Dict[str, Any],
targetFeatureInstanceId: str,
) -> Dict[str, Any]:
"""Replace ``{{featureInstanceId}}`` placeholders in the serialised graph.
Works on the full JSON representation so that placeholders inside nested
parameter dicts, prompt strings, etc. are all caught. Already-resolved
concrete UUIDs (pre-baked by ``_copyTemplateWorkflows``) are left untouched
because the placeholder literal ``{{featureInstanceId}}`` will not match.
"""
raw = json.dumps(graph)
if "{{featureInstanceId}}" not in raw:
return graph
replaced = raw.replace("{{featureInstanceId}}", targetFeatureInstanceId)
logger.debug(
"_substituteFeatureInstancePlaceholders: resolved %d occurrence(s) -> %s",
raw.count("{{featureInstanceId}}"),
targetFeatureInstanceId,
)
return json.loads(replaced)
async def _run_post_loop_done_nodes(
*,
loop_node_id: str,
body_ids: Set[str],
items: List[Any],
ordered: List[Dict],
connectionMap: Dict[str, List],
nodeOutputs: Dict[str, Any],
context: Dict[str, Any],
services: Any,
automation2_interface: Optional[Any],
runId: Optional[str],
processed_in_loop: Set[str],
ge_file_logger: Optional[GraphicalEditorRunFileLogger] = None,
) -> Optional[Dict[str, Any]]:
"""After all loop iterations: merge upstream into loop output and run the Done (output 1) branch once."""
_prim_in = getLoopPrimaryInputSource(loop_node_id, connectionMap, body_ids)
_upstream_loop = nodeOutputs.get(_prim_in[0]) if _prim_in else None
_base_raw = unwrapTransit(_upstream_loop) if isinstance(_upstream_loop, dict) and _upstream_loop.get("_transit") else _upstream_loop
_prev_loop_out = nodeOutputs.get(loop_node_id)
# ``bodyResults`` lives on the plain iteration-state dict; after resume / edge
# cases the loop slot may still be wrapped in Transit — unwrap before read.
_prev_plain = _prev_loop_out
if isinstance(_prev_loop_out, dict) and _prev_loop_out.get("_transit"):
_prev_plain = unwrapTransit(_prev_loop_out)
_body_results = (
_prev_plain.get("bodyResults") if isinstance(_prev_plain, dict) else None
)
if not isinstance(_base_raw, dict):
raise RuntimeError(
f"flow.loop {loop_node_id}: primary upstream output must be a dict (JSON handover / node output); "
f"got {type(_base_raw).__name__}"
)
_merged_loop = {**_base_raw, "items": items, "count": len(items)}
if _body_results is not None:
_merged_loop["bodyResults"] = _body_results
nodeOutputs[loop_node_id] = wrapTransit(_merged_loop, {"loopCompleted": True, "loopNodeId": loop_node_id})
_done_all = getLoopDoneNodeIds(loop_node_id, connectionMap)
_done_only = _done_all - body_ids
_done_ordered = [n for n in ordered if n.get("id") in _done_only]
for _dn in _done_ordered:
_dnid = _dn.get("id")
if not _dnid or context.get("_stopped"):
break
if not _is_node_on_active_path(_dnid, connectionMap, nodeOutputs):
_skipSnap = {"_skipReason": "inactive_branch"}
for _sSrc, _, _ in connectionMap.get(_dnid, []):
if _sSrc in nodeOutputs:
_skipSnap[_sSrc] = nodeOutputs[_sSrc]
_skipSnap = _merge_node_parameters_into_snap(_skipSnap, node_id=_dnid, context=context)
_skId = _createStepLog(automation2_interface, runId, _dnid, _dn.get("type", ""), status="skipped", inputSnapshot=_skipSnap)
if _skId:
_updateStepLog(automation2_interface, _skId, "skipped")
await _ge_log_node_finished(
ge_file_logger,
run_id=runId,
node_outputs=nodeOutputs,
run_envelope=context.get("runEnvelope"),
exec_context=context,
node_id=_dnid,
node_type=_dn.get("type", ""),
status="skipped",
input_snap=_skipSnap,
skip_reason=str(_skipSnap.get("_skipReason") or "inactive_branch"),
)
continue
_dexec = _getExecutor(_dn.get("type", ""), services, automation2_interface)
if not _dexec:
nodeOutputs[_dnid] = None
continue
_dStart = time.time()
_dIn = {}
for _src, _, _ in connectionMap.get(_dnid, []):
if _src in nodeOutputs:
_dIn[_src] = nodeOutputs[_src]
_dIn = _merge_node_parameters_into_snap(_dIn, node_id=_dnid, context=context)
_dStepId = _createStepLog(automation2_interface, runId, _dnid, _dn.get("type", ""), "running", _dIn)
try:
_dres, _dRetry = await _executeWithRetry(_dexec, _dn, context)
_dres = _normalizeResult(_dres, _dn.get("type", ""))
nodeOutputs[_dnid] = _dres
_dDur = int((time.time() - _dStart) * 1000)
_dTok = _dres.get("tokensUsed", 0) if isinstance(_dres, dict) else 0
_updateStepLog(automation2_interface, _dStepId, "completed",
output=_dres if isinstance(_dres, dict) else {"value": _dres},
durationMs=_dDur, tokensUsed=_dTok, retryCount=_dRetry)
await _ge_log_node_finished(
ge_file_logger,
run_id=runId,
node_outputs=nodeOutputs,
run_envelope=context.get("runEnvelope"),
exec_context=context,
node_id=_dnid,
node_type=_dn.get("type", ""),
status="completed",
input_snap=_dIn,
output=_dres,
duration_ms=_dDur,
retry_count=_dRetry,
)
except PauseForHumanTaskError:
_updateStepLog(automation2_interface, _dStepId, "completed",
durationMs=int((time.time() - _dStart) * 1000))
await _ge_log_node_finished(
ge_file_logger,
run_id=runId,
node_outputs=nodeOutputs,
run_envelope=context.get("runEnvelope"),
exec_context=context,
node_id=_dnid,
node_type=_dn.get("type", ""),
status="completed",
input_snap=_dIn,
duration_ms=int((time.time() - _dStart) * 1000),
)
raise
except PauseForEmailWaitError:
_updateStepLog(automation2_interface, _dStepId, "completed",
durationMs=int((time.time() - _dStart) * 1000))
await _ge_log_node_finished(
ge_file_logger,
run_id=runId,
node_outputs=nodeOutputs,
run_envelope=context.get("runEnvelope"),
exec_context=context,
node_id=_dnid,
node_type=_dn.get("type", ""),
status="completed",
input_snap=_dIn,
duration_ms=int((time.time() - _dStart) * 1000),
)
raise
except (_SubscriptionInactiveException, _BillingContextError):
_dFailDur = int((time.time() - _dStart) * 1000)
_updateStepLog(automation2_interface, _dStepId, "failed",
error="Subscription/Billing error", durationMs=_dFailDur)
await _ge_log_node_finished(
ge_file_logger,
run_id=runId,
node_outputs=nodeOutputs,
run_envelope=context.get("runEnvelope"),
exec_context=context,
node_id=_dnid,
node_type=_dn.get("type", ""),
status="failed",
input_snap=_dIn,
error="Subscription/Billing error",
duration_ms=_dFailDur,
)
raise
except Exception as _dex:
_dFailDur2 = int((time.time() - _dStart) * 1000)
_updateStepLog(automation2_interface, _dStepId, "failed",
error=str(_dex), durationMs=_dFailDur2)
await _ge_log_node_finished(
ge_file_logger,
run_id=runId,
node_outputs=nodeOutputs,
run_envelope=context.get("runEnvelope"),
exec_context=context,
node_id=_dnid,
node_type=_dn.get("type", ""),
status="failed",
input_snap=_dIn,
error=str(_dex),
duration_ms=_dFailDur2,
)
raise
processed_in_loop.update(_done_only)
return None
async def executeGraph(
graph: Dict[str, Any],
services: Any,
workflowId: str = None,
instanceId: str = None,
userId: str = None,
mandateId: str = None,
automation2_interface: Optional[Any] = None,
initialNodeOutputs: Optional[Dict[str, Any]] = None,
startAfterNodeId: Optional[str] = None,
runId: Optional[str] = None,
run_envelope: Optional[Dict[str, Any]] = None,
label: Optional[str] = None,
targetFeatureInstanceId: Optional[str] = None,
) -> Dict[str, Any]:
"""
Execute automation2 graph. Returns { success, nodeOutputs, error?, stopped? }.
When an input node is reached and automation2_interface is provided, creates a task,
pauses the run, and returns { success: False, paused: True, taskId, runId }.
For resume: pass initialNodeOutputs (with result for the human node) and startAfterNodeId.
For fresh runs: pass run_envelope (unified start payload for the start node); normalized with userId into context.runEnvelope.
targetFeatureInstanceId: resolves {{featureInstanceId}} placeholders in the graph JSON before execution.
"""
logger.info(
"executeGraph start: instanceId=%s workflowId=%s userId=%s mandateId=%s resume=%s targetInstance=%s",
instanceId,
workflowId,
userId,
mandateId,
startAfterNodeId is not None,
targetFeatureInstanceId,
)
from modules.workflows.processing.shared.methodDiscovery import discoverMethods
discoverMethods(services)
from modules.workflows.automation2.pickNotPushMigration import (
materializeConnectionRefs,
materializePrimaryTextHandover,
materializeRecommendedDataPickRef,
normalizeFileCreatePresentationRefs,
)
from modules.workflows.automation2.featureInstanceRefMigration import (
materializeFeatureInstanceRefs,
)
if targetFeatureInstanceId:
graph = _substituteFeatureInstancePlaceholders(graph, targetFeatureInstanceId)
# Phase-5 Schicht-4: typed-ref envelopes are materialized FIRST so the
# subsequent connection-ref pass and validation see the canonical shape.
graph = materializeFeatureInstanceRefs(graph)
graph = materializeConnectionRefs(graph)
graph = materializePrimaryTextHandover(graph)
graph = materializeRecommendedDataPickRef(graph)
graph = normalizeFileCreatePresentationRefs(graph)
nodeTypeIds = _getNodeTypeIds(services)
logger.debug("executeGraph nodeTypeIds (%d): %s", len(nodeTypeIds), sorted(nodeTypeIds))
errors = validateGraph(graph, nodeTypeIds)
if errors:
logger.warning("executeGraph validation failed: %s", errors)
return {"success": False, "error": "; ".join(errors), "nodeOutputs": {}}
nodes, connections = parseGraph(graph)[:2]
connectionMap = buildConnectionMap(connections)
inputSources = {n["id"]: getInputSources(n["id"], connectionMap) for n in nodes if n.get("id")}
logger.info(
"executeGraph parsed: nodes=%d connections=%d connectionMap_targets=%s",
len(nodes),
len(connections),
list(connectionMap.keys()),
)
ordered = topoSort(nodes, connectionMap)
ordered_ids = [n.get("id") for n in ordered if n.get("id")]
logger.info("executeGraph topoSort order: %s", ordered_ids)
# Normalize resumed human-node output BEFORE copying into nodeOutputs — otherwise
# normalizeToSchema only updates initialNodeOutputs and loop/refs still see raw
# e.g. input.upload {files} without coerced DocumentList.documents.
is_resume = startAfterNodeId is not None
if is_resume and initialNodeOutputs and startAfterNodeId:
resumedNode = next((n for n in nodes if n.get("id") == startAfterNodeId), None)
if resumedNode:
resumedType = resumedNode.get("type", "")
resumedOutput = initialNodeOutputs.get(startAfterNodeId)
if isinstance(resumedOutput, dict):
schema = _outputSchemaForNode(resumedType)
if schema and schema != "Transit":
try:
initialNodeOutputs[startAfterNodeId] = normalizeToSchema(resumedOutput, schema)
except Exception as valErr:
logger.warning("executeGraph resume: schema validation failed for %s: %s", startAfterNodeId, valErr)
ge_file_logger: Optional[GraphicalEditorRunFileLogger] = None
nodeOutputs: Dict[str, Any] = dict(initialNodeOutputs or {})
if not runId and automation2_interface and workflowId and not is_resume:
run_context = {
"connectionMap": connectionMap,
"inputSources": inputSources,
"orderedNodeIds": ordered_ids,
}
if userId:
run_context["ownerId"] = userId
if mandateId:
run_context["mandateId"] = mandateId
if instanceId:
run_context["instanceId"] = instanceId
run_label = label
if not run_label and automation2_interface and workflowId:
try:
wfObj = automation2_interface.getWorkflow(workflowId)
if wfObj:
wfDict = wfObj if isinstance(wfObj, dict) else (
wfObj.model_dump() if hasattr(wfObj, "model_dump") else {}
)
run_label = wfDict.get("label")
except Exception:
pass
if not run_label:
ts = datetime.now(timezone.utc).strftime("%d.%m.%Y %H:%M")
run_label = f"Manuell ({ts})"
run = automation2_interface.createRun(
workflowId=workflowId,
nodeOutputs=nodeOutputs,
context=run_context,
label=run_label,
)
runId = run.get("id") if run else None
logger.info("executeGraph created run %s label=%s", runId, run_label)
if runId and graphical_editor_run_file_logging_enabled():
ge_file_logger = GraphicalEditorRunFileLogger.bootstrap_new_run(
automation2_interface,
runId,
run_context,
)
env_for_run = normalize_run_envelope(run_envelope, user_id=userId)
graph_nodes_by_id: Dict[str, Any] = {
str(n["id"]): n for n in nodes if n.get("id")
}
context = {
"workflowId": workflowId,
"instanceId": instanceId,
"userId": userId,
"mandateId": mandateId,
"nodeOutputs": nodeOutputs,
"connectionMap": connectionMap,
"inputSources": inputSources,
"services": services,
"_runId": runId,
"_orderedNodes": ordered,
"runEnvelope": env_for_run,
"graphNodesById": graph_nodes_by_id,
}
# Lets graph actions (e.g. ``context.setContext`` human-task mode) call
# ``createTask`` / ``updateRun`` without threading the interface through services.
if automation2_interface:
context["_automation2Interface"] = automation2_interface
# _context key in nodeOutputs for system variable resolution
nodeOutputs["_context"] = context
if runId:
_activeRunContexts[runId] = context
if (
graphical_editor_run_file_logging_enabled()
and automation2_interface
and runId
and ge_file_logger is None
):
ge_file_logger = GraphicalEditorRunFileLogger.ensure_attached(
automation2_interface,
runId,
)
skip_until_passed = bool(startAfterNodeId)
processed_in_loop: Set[str] = set()
_aggregateAccumulators: Dict[str, list] = {}
STEPLOG_BATCH_THRESHOLD = 100
AGGREGATE_FLUSH_THRESHOLD = 1000
_aggregateTempChunks: Dict[str, List[list]] = {}
# Check for loop resume: run was paused inside a loop, we're resuming for next iteration
run = automation2_interface.getRun(runId) if (runId and automation2_interface) else None
loop_resume_state = (run.get("context") or {}).get("_loopState") if run else None
if loop_resume_state and startAfterNodeId:
loop_node_id = loop_resume_state.get("loopNodeId")
next_index = loop_resume_state.get("currentIndex", -1) + 1
items = loop_resume_state.get("items") or []
body_ids = getLoopBodyNodeIds(loop_node_id, connectionMap) if loop_node_id else set()
body_ordered = [n for n in ordered if n.get("id") in body_ids]
processed_in_loop = set(body_ids) | {loop_node_id} if loop_node_id else set()
_resume_feedback_body_node_id = None
for _fb_src, _fb_so, _fb_ti in (connectionMap.get(loop_node_id) or []):
if _fb_src in body_ids and _fb_ti == 0:
_resume_feedback_body_node_id = _fb_src
break
if not _resume_feedback_body_node_id and body_ordered:
_resume_feedback_body_node_id = body_ordered[-1].get("id")
_resume_body_results: List[Any] = []
while next_index < len(items) and loop_node_id:
nodeOutputs[loop_node_id] = {
"items": items,
"count": len(items),
"currentItem": items[next_index],
"currentIndex": next_index,
}
context["_loopState"] = {"loopNodeId": loop_node_id, "currentIndex": next_index, "items": items}
for body_node in body_ordered:
bnid = body_node.get("id")
if not bnid or context.get("_stopped"):
break
if not _is_node_on_active_path(bnid, connectionMap, nodeOutputs):
continue
executor = _getExecutor(body_node.get("type", ""), services, automation2_interface)
if not executor:
nodeOutputs[bnid] = None
continue
_rStepStart = time.time()
_rInputSnap = {"_loopItem": items[next_index], "_loopIndex": next_index}
for _rSrc, _, _ in connectionMap.get(bnid, []):
if _rSrc in nodeOutputs:
_rInputSnap[_rSrc] = nodeOutputs[_rSrc]
_rInputSnap = _merge_node_parameters_into_snap(_rInputSnap, node_id=bnid, context=context)
_rStepId = _createStepLog(automation2_interface, runId, bnid, body_node.get("type", ""), "running", _rInputSnap)
try:
result, _rRetry = await _executeWithRetry(executor, body_node, context)
if body_node.get("type") == "data.aggregate":
if bnid not in _aggregateAccumulators:
_aggregateAccumulators[bnid] = []
accItems = result.get("items", [result]) if isinstance(result, dict) else [result]
_aggregateAccumulators[bnid].extend(accItems)
nodeOutputs[bnid] = result
_rDur = int((time.time() - _rStepStart) * 1000)
_updateStepLog(automation2_interface, _rStepId, "completed",
output=result if isinstance(result, dict) else {"value": result},
durationMs=_rDur, retryCount=_rRetry)
await _ge_log_node_finished(
ge_file_logger,
run_id=runId,
node_outputs=nodeOutputs,
run_envelope=context.get("runEnvelope"),
exec_context=context,
node_id=bnid,
node_type=body_node.get("type", ""),
status="completed",
input_snap=_rInputSnap,
output=result,
duration_ms=_rDur,
retry_count=_rRetry,
loop_index=next_index,
loop_node_id=loop_node_id,
loop_item=items[next_index],
)
logger.info("executeGraph loop resume body node %s done (iter %d, retries=%d)", bnid, next_index, _rRetry)
if _resume_feedback_body_node_id and bnid == _resume_feedback_body_node_id:
_resume_body_results.append(result)
except PauseForHumanTaskError as e:
_rPauseDur = int((time.time() - _rStepStart) * 1000)
_updateStepLog(automation2_interface, _rStepId, "completed",
durationMs=_rPauseDur)
await _ge_log_node_finished(
ge_file_logger,
run_id=runId,
node_outputs=nodeOutputs,
run_envelope=context.get("runEnvelope"),
exec_context=context,
node_id=bnid,
node_type=body_node.get("type", ""),
status="completed",
input_snap=_rInputSnap,
duration_ms=_rPauseDur,
loop_index=next_index,
loop_node_id=loop_node_id,
loop_item=items[next_index],
)
if automation2_interface:
run_ctx = dict(run.get("context") or {})
run_ctx["_loopState"] = {"loopNodeId": loop_node_id, "currentIndex": next_index, "items": items}
automation2_interface.updateRun(runId, status="paused", nodeOutputs=_serializableOutputs(nodeOutputs), currentNodeId=e.nodeId, context=run_ctx)
return {"success": False, "paused": True, "taskId": e.taskId, "runId": e.runId, "nodeId": e.nodeId, "nodeOutputs": _serializableOutputs(nodeOutputs)}
except PauseForEmailWaitError as e:
_rEmailDur = int((time.time() - _rStepStart) * 1000)
_updateStepLog(automation2_interface, _rStepId, "completed",
durationMs=_rEmailDur)
await _ge_log_node_finished(
ge_file_logger,
run_id=runId,
node_outputs=nodeOutputs,
run_envelope=context.get("runEnvelope"),
exec_context=context,
node_id=bnid,
node_type=body_node.get("type", ""),
status="completed",
input_snap=_rInputSnap,
duration_ms=_rEmailDur,
loop_index=next_index,
loop_node_id=loop_node_id,
loop_item=items[next_index],
)
raise
except (_SubscriptionInactiveException, _BillingContextError):
_rFailDurSb = int((time.time() - _rStepStart) * 1000)
_updateStepLog(automation2_interface, _rStepId, "failed",
error="Subscription/Billing error", durationMs=_rFailDurSb)
await _ge_log_node_finished(
ge_file_logger,
run_id=runId,
node_outputs=nodeOutputs,
run_envelope=context.get("runEnvelope"),
exec_context=context,
node_id=bnid,
node_type=body_node.get("type", ""),
status="failed",
input_snap=_rInputSnap,
error="Subscription/Billing error",
duration_ms=_rFailDurSb,
loop_index=next_index,
loop_node_id=loop_node_id,
loop_item=items[next_index],
)
raise
except Exception as ex:
_rFailDurEx = int((time.time() - _rStepStart) * 1000)
_updateStepLog(automation2_interface, _rStepId, "failed",
error=str(ex), durationMs=_rFailDurEx)
await _ge_log_node_finished(
ge_file_logger,
run_id=runId,
node_outputs=nodeOutputs,
run_envelope=context.get("runEnvelope"),
exec_context=context,
node_id=bnid,
node_type=body_node.get("type", ""),
status="failed",
input_snap=_rInputSnap,
error=str(ex),
duration_ms=_rFailDurEx,
loop_index=next_index,
loop_node_id=loop_node_id,
loop_item=items[next_index],
)
logger.exception("executeGraph loop body node %s FAILED: %s", bnid, ex)
nodeOutputs[bnid] = {"error": str(ex), "success": False}
if runId and automation2_interface:
automation2_interface.updateRun(runId, status="failed", nodeOutputs=_serializableOutputs(nodeOutputs))
if runId:
_activeRunContexts.pop(runId, None)
return {"success": False, "error": str(ex), "nodeOutputs": _serializableOutputs(nodeOutputs), "failedNode": bnid, "runId": runId}
next_index += 1
if loop_node_id:
for aggId, accItems in _aggregateAccumulators.items():
nodeOutputs[aggId] = {"items": accItems, "count": len(accItems), "_success": True}
_aggregateAccumulators.clear()
if _resume_body_results:
_rlo = nodeOutputs.get(loop_node_id)
if isinstance(_rlo, dict):
_rlo["bodyResults"] = _resume_body_results
nodeOutputs[loop_node_id] = _rlo
await _run_post_loop_done_nodes(
loop_node_id=loop_node_id,
body_ids=body_ids,
items=items,
ordered=ordered,
connectionMap=connectionMap,
nodeOutputs=nodeOutputs,
context=context,
services=services,
automation2_interface=automation2_interface,
runId=runId,
processed_in_loop=processed_in_loop,
ge_file_logger=ge_file_logger,
)
for i, node in enumerate(ordered):
if skip_until_passed:
if node.get("id") == startAfterNodeId:
skip_until_passed = False
continue
if node.get("id") in processed_in_loop:
continue
if context.get("_stopped"):
logger.info("executeGraph stopped early at step %d", i)
break
nodeId = node.get("id")
nodeType = node.get("type", "")
# flow.loop: the feedback edge (body → loop input 0) hasn't run yet on the first
# pass → would make _is_node_on_active_path return False. Only check the
# *primary* predecessor (the one outside the loop body).
if nodeType == "flow.loop":
_loop_body_ids = getLoopBodyNodeIds(nodeId, connectionMap)
_loop_primary = getLoopPrimaryInputSource(nodeId, connectionMap, _loop_body_ids)
_loop_check_map = (
{nodeId: [(_loop_primary[0], _loop_primary[1], 0)]}
if _loop_primary else connectionMap
)
_loop_active = _is_node_on_active_path(nodeId, _loop_check_map, nodeOutputs)
else:
_loop_active = _is_node_on_active_path(nodeId, connectionMap, nodeOutputs)
if not _loop_active:
logger.info("executeGraph step %d/%d: nodeId=%s SKIP (inactive branch)", i + 1, len(ordered), nodeId)
_skipInputSnap = {"_skipReason": "inactive_branch"}
for _sSrc, _, _ in connectionMap.get(nodeId, []):
if _sSrc in nodeOutputs:
_skipInputSnap[_sSrc] = nodeOutputs[_sSrc]
_skipInputSnap = _merge_node_parameters_into_snap(_skipInputSnap, node_id=nodeId, context=context)
_skipStepId = _createStepLog(automation2_interface, runId, nodeId, nodeType, status="skipped", inputSnapshot=_skipInputSnap)
if _skipStepId:
_updateStepLog(automation2_interface, _skipStepId, "skipped")
await _ge_log_node_finished(
ge_file_logger,
run_id=runId,
node_outputs=nodeOutputs,
run_envelope=context.get("runEnvelope"),
exec_context=context,
node_id=nodeId,
node_type=nodeType,
status="skipped",
input_snap=_skipInputSnap,
skip_reason=str(_skipInputSnap.get("_skipReason") or "inactive_branch"),
)
continue
executor = _getExecutor(nodeType, services, automation2_interface)
logger.info(
"executeGraph step %d/%d: nodeId=%s nodeType=%s executor=%s",
i + 1,
len(ordered),
nodeId,
nodeType,
type(executor).__name__ if executor else "None",
)
if not executor:
nodeOutputs[nodeId] = None
logger.debug("executeGraph node %s: no executor, output=None", nodeId)
continue
_stepStartMs = time.time()
_stepId = None
try:
if nodeType == "flow.loop":
_loopInputSnap = {}
for _lSrc, _, _ in connectionMap.get(nodeId, []):
if _lSrc in nodeOutputs:
_loopInputSnap[_lSrc] = nodeOutputs[_lSrc]
_loopInputSnap = _merge_node_parameters_into_snap(_loopInputSnap, node_id=nodeId, context=context)
_stepId = _createStepLog(automation2_interface, runId, nodeId, nodeType, "running", _loopInputSnap)
result = await executor.execute(node, context)
items = result.get("items") or []
body_ids = getLoopBodyNodeIds(nodeId, connectionMap)
body_ordered = [n for n in ordered if n.get("id") in body_ids]
processed_in_loop.update(body_ids)
processed_in_loop.add(nodeId)
_loopConcurrency = int((node.get("parameters") or {}).get("concurrency", 1))
_loopConcurrency = max(1, min(_loopConcurrency, 20))
_batchMode = len(items) > STEPLOG_BATCH_THRESHOLD
_aggLock = asyncio.Lock()
# Prefer the *last* body node wired to loop input 0 (feedback /
# pipeline end) — first matching inbound edge can be a shallow node.
_feedback_candidates = [
_fb_src
for _fb_src, _fb_so, _fb_ti in (connectionMap.get(nodeId) or [])
if _fb_src in body_ids and _fb_ti == 0
]
_feedback_body_node_id = _feedback_candidates[-1] if _feedback_candidates else None
if not _feedback_body_node_id and body_ordered:
_feedback_body_node_id = body_ordered[-1].get("id")
_bodyResultsPerIter: List[Any] = [None] * len(items)
async def _runLoopIteration(_idx: int, _item: Any) -> Optional[Dict]:
"""Execute all body nodes for one iteration. Returns error dict or None."""
_iterOutputs = dict(nodeOutputs)
_iterOutputs[nodeId] = {"items": items, "count": len(items), "currentItem": _item, "currentIndex": _idx}
_iterCtx = dict(context)
_iterCtx["nodeOutputs"] = _iterOutputs if _loopConcurrency > 1 else nodeOutputs
_iterCtx["_loopState"] = {"loopNodeId": nodeId, "currentIndex": _idx, "items": items}
if _loopConcurrency == 1:
nodeOutputs[nodeId] = _iterOutputs[nodeId]
context["_loopState"] = _iterCtx["_loopState"]
_activeOutputs = _iterOutputs if _loopConcurrency > 1 else nodeOutputs
_activeCtx = _iterCtx if _loopConcurrency > 1 else context
for body_node in body_ordered:
bnid = body_node.get("id")
if not bnid or context.get("_stopped"):
break
if not _is_node_on_active_path(bnid, connectionMap, _activeOutputs):
continue
bexec = _getExecutor(body_node.get("type", ""), services, automation2_interface)
if not bexec:
_activeOutputs[bnid] = None
continue
_bStepStart = time.time()
_bInputSnapAlways: Dict[str, Any] = {"_loopItem": _item, "_loopIndex": _idx}
for _bSnapSrc, _, _ in connectionMap.get(bnid, []):
if _bSnapSrc in _activeOutputs:
_bInputSnapAlways[_bSnapSrc] = _activeOutputs[_bSnapSrc]
_bInputSnapAlways = _merge_node_parameters_into_snap(
_bInputSnapAlways, node_id=bnid, context=context
)
_bStepId = None
if not _batchMode or _idx == 0 or _idx == len(items) - 1:
_bStepId = _createStepLog(
automation2_interface,
runId,
bnid,
body_node.get("type", ""),
"running",
_bInputSnapAlways,
)
try:
bres, _bRetry = await _executeWithRetry(bexec, body_node, _activeCtx)
if body_node.get("type") == "data.aggregate":
async with _aggLock:
if bnid not in _aggregateAccumulators:
_aggregateAccumulators[bnid] = []
accItems = bres.get("items", [bres]) if isinstance(bres, dict) else [bres]
_aggregateAccumulators[bnid].extend(accItems)
if len(_aggregateAccumulators[bnid]) >= AGGREGATE_FLUSH_THRESHOLD:
_aggregateTempChunks.setdefault(bnid, []).append(_aggregateAccumulators[bnid])
_aggregateAccumulators[bnid] = []
_activeOutputs[bnid] = bres
_bDur = int((time.time() - _bStepStart) * 1000)
if _bStepId:
_updateStepLog(automation2_interface, _bStepId, "completed",
output=bres if isinstance(bres, dict) else {"value": bres},
durationMs=_bDur, retryCount=_bRetry)
await _ge_log_node_finished(
ge_file_logger,
run_id=runId,
node_outputs=_activeOutputs,
run_envelope=context.get("runEnvelope"),
exec_context=context,
node_id=bnid,
node_type=body_node.get("type", ""),
status="completed",
input_snap=_bInputSnapAlways,
output=bres,
duration_ms=_bDur,
retry_count=_bRetry,
loop_index=_idx,
loop_node_id=nodeId,
loop_item=_item,
)
if _loopConcurrency == 1:
nodeOutputs[bnid] = bres
except PauseForHumanTaskError as e:
_bHd = int((time.time() - _bStepStart) * 1000)
if _bStepId:
_updateStepLog(automation2_interface, _bStepId, "completed",
durationMs=_bHd)
await _ge_log_node_finished(
ge_file_logger,
run_id=runId,
node_outputs=_activeOutputs,
run_envelope=context.get("runEnvelope"),
exec_context=context,
node_id=bnid,
node_type=body_node.get("type", ""),
status="completed",
input_snap=_bInputSnapAlways,
duration_ms=_bHd,
loop_index=_idx,
loop_node_id=nodeId,
loop_item=_item,
)
if runId and automation2_interface:
_run = automation2_interface.getRun(runId) or {}
_run_ctx = dict(_run.get("context") or {})
_run_ctx["_loopState"] = {"loopNodeId": nodeId, "currentIndex": _idx, "items": items}
automation2_interface.updateRun(e.runId, status="paused", nodeOutputs=_serializableOutputs(nodeOutputs), currentNodeId=e.nodeId, context=_run_ctx)
return {"_pause": True, "taskId": e.taskId, "runId": e.runId, "nodeId": e.nodeId}
except PauseForEmailWaitError:
_bEd = int((time.time() - _bStepStart) * 1000)
if _bStepId:
_updateStepLog(automation2_interface, _bStepId, "completed",
durationMs=_bEd)
await _ge_log_node_finished(
ge_file_logger,
run_id=runId,
node_outputs=_activeOutputs,
run_envelope=context.get("runEnvelope"),
exec_context=context,
node_id=bnid,
node_type=body_node.get("type", ""),
status="completed",
input_snap=_bInputSnapAlways,
duration_ms=_bEd,
loop_index=_idx,
loop_node_id=nodeId,
loop_item=_item,
)
raise
except (_SubscriptionInactiveException, _BillingContextError):
_bSb = int((time.time() - _bStepStart) * 1000)
if _bStepId:
_updateStepLog(automation2_interface, _bStepId, "failed",
error="Subscription/Billing error", durationMs=_bSb)
await _ge_log_node_finished(
ge_file_logger,
run_id=runId,
node_outputs=_activeOutputs,
run_envelope=context.get("runEnvelope"),
exec_context=context,
node_id=bnid,
node_type=body_node.get("type", ""),
status="failed",
input_snap=_bInputSnapAlways,
error="Subscription/Billing error",
duration_ms=_bSb,
loop_index=_idx,
loop_node_id=nodeId,
loop_item=_item,
)
raise
except Exception as ex:
_bFail = int((time.time() - _bStepStart) * 1000)
if _bStepId:
_updateStepLog(automation2_interface, _bStepId, "failed",
error=str(ex), durationMs=_bFail)
await _ge_log_node_finished(
ge_file_logger,
run_id=runId,
node_outputs=_activeOutputs,
run_envelope=context.get("runEnvelope"),
exec_context=context,
node_id=bnid,
node_type=body_node.get("type", ""),
status="failed",
input_snap=_bInputSnapAlways,
error=str(ex),
duration_ms=_bFail,
loop_index=_idx,
loop_node_id=nodeId,
loop_item=_item,
)
logger.exception("executeGraph loop body node %s FAILED (iter %d): %s", bnid, _idx, ex)
return {"_error": str(ex), "failedNode": bnid}
if _feedback_body_node_id:
async with _aggLock:
if _idx < len(_bodyResultsPerIter):
_bodyResultsPerIter[_idx] = _activeOutputs.get(_feedback_body_node_id)
if _batchMode and _idx > 0 and _idx % STEPLOG_BATCH_THRESHOLD == 0 and runId:
_emitStepEvent(runId, {"type": "loop_progress", "nodeId": nodeId, "iteration": _idx, "total": len(items)})
return None
if _loopConcurrency <= 1:
for idx, item in enumerate(items):
iterErr = await _runLoopIteration(idx, item)
if iterErr:
if iterErr.get("_pause"):
return {"success": False, "paused": True, "taskId": iterErr["taskId"], "runId": iterErr["runId"], "nodeId": iterErr["nodeId"], "nodeOutputs": _serializableOutputs(nodeOutputs)}
nodeOutputs[iterErr.get("failedNode", nodeId)] = {"error": iterErr["_error"], "success": False}
if runId and automation2_interface:
automation2_interface.updateRun(runId, status="failed", nodeOutputs=_serializableOutputs(nodeOutputs))
if runId:
_activeRunContexts.pop(runId, None)
return {"success": False, "error": iterErr["_error"], "nodeOutputs": _serializableOutputs(nodeOutputs), "failedNode": iterErr.get("failedNode"), "runId": runId}
else:
_sem = asyncio.Semaphore(_loopConcurrency)
async def _concurrentIter(_ci: int, _citem: Any):
async with _sem:
return await _runLoopIteration(_ci, _citem)
_tasks = [_concurrentIter(ci, citem) for ci, citem in enumerate(items)]
_results = await asyncio.gather(*_tasks, return_exceptions=True)
for _ri, _rval in enumerate(_results):
if isinstance(_rval, Exception):
logger.exception("Loop iteration %d raised: %s", _ri, _rval)
if runId and automation2_interface:
automation2_interface.updateRun(runId, status="failed", nodeOutputs=_serializableOutputs(nodeOutputs))
if runId:
_activeRunContexts.pop(runId, None)
return {"success": False, "error": str(_rval), "nodeOutputs": _serializableOutputs(nodeOutputs), "runId": runId}
if isinstance(_rval, dict):
if _rval.get("_pause"):
return {"success": False, "paused": True, "taskId": _rval["taskId"], "runId": _rval["runId"], "nodeId": _rval["nodeId"], "nodeOutputs": _serializableOutputs(nodeOutputs)}
if _rval.get("_error"):
if runId and automation2_interface:
automation2_interface.updateRun(runId, status="failed", nodeOutputs=_serializableOutputs(nodeOutputs))
if runId:
_activeRunContexts.pop(runId, None)
return {"success": False, "error": _rval["_error"], "nodeOutputs": _serializableOutputs(nodeOutputs), "failedNode": _rval.get("failedNode"), "runId": runId}
for aggId, accItems in _aggregateAccumulators.items():
allChunks = _aggregateTempChunks.pop(aggId, [])
finalItems = []
for chunk in allChunks:
finalItems.extend(chunk)
finalItems.extend(accItems)
nodeOutputs[aggId] = {"items": finalItems, "count": len(finalItems), "_success": True}
_aggregateAccumulators.clear()
# Always attach ``bodyResults`` (list per iteration, possibly None
# placeholders) so DataRefs to ``bodyResults`` resolve and
# ``context.mergeContext`` can fall back to the wired loop output.
_lo = nodeOutputs.get(nodeId)
if isinstance(_lo, dict):
_lo["bodyResults"] = _bodyResultsPerIter
nodeOutputs[nodeId] = _lo
await _run_post_loop_done_nodes(
loop_node_id=nodeId,
body_ids=body_ids,
items=items,
ordered=ordered,
connectionMap=connectionMap,
nodeOutputs=nodeOutputs,
context=context,
services=services,
automation2_interface=automation2_interface,
runId=runId,
processed_in_loop=processed_in_loop,
ge_file_logger=ge_file_logger,
)
_loopDurMs = int((time.time() - _stepStartMs) * 1000)
_loopStepOut = {
"iterationCount": len(items),
"items": len(items),
"concurrency": _loopConcurrency,
"batchMode": _batchMode,
}
_updateStepLog(automation2_interface, _stepId, "completed",
output=_loopStepOut,
durationMs=_loopDurMs)
await _ge_log_node_finished(
ge_file_logger,
run_id=runId,
node_outputs=nodeOutputs,
run_envelope=context.get("runEnvelope"),
exec_context=context,
node_id=nodeId,
node_type=nodeType,
status="completed",
input_snap=_loopInputSnap,
output=_loopStepOut,
duration_ms=_loopDurMs,
)
logger.info("executeGraph flow.loop done: %d iterations (concurrency=%d, batchMode=%s)", len(items), _loopConcurrency, _batchMode)
elif _isBarrierNode(nodeType):
if not _allMergePredecessorsReady(nodeId, connectionMap, nodeOutputs):
logger.info("executeGraph node %s (%s): waiting — not all predecessors ready, deferring", nodeId, nodeType)
nodeOutputs[nodeId] = None
continue
_stepStartMs = time.time()
_inputSnap = {}
for src, _, _ in connectionMap.get(nodeId, []):
if src in nodeOutputs:
_inputSnap[src] = nodeOutputs[src]
_inputSnap = _merge_node_parameters_into_snap(_inputSnap, node_id=nodeId, context=context)
_stepId = _createStepLog(automation2_interface, runId, nodeId, nodeType, "running", _inputSnap)
result, retryCount = await _executeWithRetry(executor, node, context)
result = _normalizeResult(result, nodeType)
nodeOutputs[nodeId] = result
_mergeDurMs = int((time.time() - _stepStartMs) * 1000)
_mergeTok = result.get("tokensUsed", 0) if isinstance(result, dict) else 0
_updateStepLog(automation2_interface, _stepId, "completed",
output=result if isinstance(result, dict) else {"value": result},
durationMs=_mergeDurMs, tokensUsed=_mergeTok, retryCount=retryCount)
await _ge_log_node_finished(
ge_file_logger,
run_id=runId,
node_outputs=nodeOutputs,
run_envelope=context.get("runEnvelope"),
exec_context=context,
node_id=nodeId,
node_type=nodeType,
status="completed",
input_snap=_inputSnap,
output=result,
duration_ms=_mergeDurMs,
retry_count=retryCount,
)
else:
_stepStartMs = time.time()
_inputSnap = {}
for src, _, _ in connectionMap.get(nodeId, []):
if src in nodeOutputs:
_inputSnap[src] = nodeOutputs[src]
_inputSnap = _merge_node_parameters_into_snap(_inputSnap, node_id=nodeId, context=context)
_stepId = _createStepLog(automation2_interface, runId, nodeId, nodeType, "running", _inputSnap)
result, retryCount = await _executeWithRetry(executor, node, context)
result = _normalizeResult(result, nodeType)
nodeOutputs[nodeId] = result
_durMs = int((time.time() - _stepStartMs) * 1000)
_tokens = result.get("tokensUsed", 0) if isinstance(result, dict) else 0
_updateStepLog(automation2_interface, _stepId, "completed",
output=result if isinstance(result, dict) else {"value": result},
durationMs=_durMs, tokensUsed=_tokens, retryCount=retryCount)
await _ge_log_node_finished(
ge_file_logger,
run_id=runId,
node_outputs=nodeOutputs,
run_envelope=context.get("runEnvelope"),
exec_context=context,
node_id=nodeId,
node_type=nodeType,
status="completed",
input_snap=_inputSnap,
output=result,
duration_ms=_durMs,
retry_count=retryCount,
)
logger.info(
"executeGraph node %s done: result_type=%s result_keys=%s retries=%d duration=%dms",
nodeId,
type(result).__name__,
list(result.keys()) if isinstance(result, dict) else "n/a",
retryCount,
_durMs,
)
except PauseForHumanTaskError as e:
_huPauseMs = int((time.time() - _stepStartMs) * 1000)
_updateStepLog(automation2_interface, _stepId, "completed",
durationMs=_huPauseMs)
_ge_in = locals().get("_inputSnap")
if _ge_in is None:
_ge_in = locals().get("_loopInputSnap") or {}
await _ge_log_node_finished(
ge_file_logger,
run_id=runId,
node_outputs=nodeOutputs,
run_envelope=context.get("runEnvelope"),
exec_context=context,
node_id=nodeId,
node_type=nodeType,
status="completed",
input_snap=_ge_in,
duration_ms=_huPauseMs,
)
logger.info("executeGraph paused for human task %s", e.taskId)
return {
"success": False,
"paused": True,
"taskId": e.taskId,
"runId": e.runId,
"nodeId": e.nodeId,
"nodeOutputs": _serializableOutputs(nodeOutputs),
}
except PauseForEmailWaitError as e:
_emailPauseMs = int((time.time() - _stepStartMs) * 1000)
_updateStepLog(automation2_interface, _stepId, "completed",
durationMs=_emailPauseMs)
_ge_email_in = locals().get("_inputSnap")
if _ge_email_in is None:
_ge_email_in = locals().get("_loopInputSnap") or {}
await _ge_log_node_finished(
ge_file_logger,
run_id=runId,
node_outputs=nodeOutputs,
run_envelope=context.get("runEnvelope"),
exec_context=context,
node_id=nodeId,
node_type=nodeType,
status="completed",
input_snap=_ge_email_in,
duration_ms=_emailPauseMs,
)
logger.info("executeGraph paused for email wait (run %s, node %s)", e.runId, e.nodeId)
try:
from modules.interfaces.interfaceDbApp import getRootInterface
from modules.features.graphicalEditor.emailPoller import ensureRunning
root = getRootInterface()
event_user = root.getUserByUsername("event") if root else None
if event_user:
ensureRunning(event_user)
except Exception as poll_err:
logger.warning("Could not start email poller: %s", poll_err)
paused_at = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
run_ctx = {
"connectionMap": context.get("connectionMap"),
"inputSources": context.get("inputSources"),
"orderedNodeIds": [n.get("id") for n in context.get("_orderedNodes", []) if n.get("id")],
"waitReason": "email",
"waitConfig": e.waitConfig,
"pausedAt": paused_at,
"lastCheckedAt": None,
"ownerId": context.get("userId"),
"mandateId": context.get("mandateId"),
"instanceId": context.get("instanceId"),
}
if automation2_interface and e.runId:
prev_ctx = dict((automation2_interface.getRun(e.runId) or {}).get("context") or {})
run_ctx = merge_run_context_with_ge_log_prefix(prev_ctx, run_ctx)
automation2_interface.updateRun(
e.runId,
status="paused",
nodeOutputs=_serializableOutputs(nodeOutputs),
currentNodeId=e.nodeId,
context=run_ctx,
)
return {
"success": False,
"paused": True,
"waitReason": "email",
"runId": e.runId,
"nodeId": e.nodeId,
"nodeOutputs": _serializableOutputs(nodeOutputs),
}
except Exception as e:
logger.exception("executeGraph node %s (%s) FAILED: %s", nodeId, nodeType, e)
nodeOutputs[nodeId] = {"error": str(e), "success": False}
_durMs = int((time.time() - _stepStartMs) * 1000)
_updateStepLog(automation2_interface, _stepId, "failed", error=str(e), durationMs=_durMs)
_ge_fail_in = locals().get("_inputSnap")
if _ge_fail_in is None:
_ge_fail_in = locals().get("_loopInputSnap") or {}
await _ge_log_node_finished(
ge_file_logger,
run_id=runId,
node_outputs=nodeOutputs,
run_envelope=context.get("runEnvelope"),
exec_context=context,
node_id=nodeId,
node_type=nodeType,
status="failed",
input_snap=_ge_fail_in,
error=str(e),
duration_ms=_durMs,
)
if runId and automation2_interface:
automation2_interface.updateRun(runId, status="failed", nodeOutputs=_serializableOutputs(nodeOutputs))
if runId:
_emitStepEvent(runId, {"type": "run_failed", "runId": runId, "status": "failed", "error": str(e), "failedNode": nodeId})
try:
_wfObj = automation2_interface.getWorkflow(workflowId) if automation2_interface and workflowId else None
_wfDict = _wfObj if isinstance(_wfObj, dict) else (
_wfObj.model_dump() if hasattr(_wfObj, "model_dump") else {}
) if _wfObj else {}
_shouldNotify = _wfDict.get("notifyOnFailure", True) if _wfDict else True
if _shouldNotify:
from modules.workflows.scheduler.mainScheduler import notifyRunFailed
notifyRunFailed(
workflowId or "", runId or "", str(e),
mandateId=mandateId,
workflowLabel=_wfDict.get("label"),
)
except Exception as notifyErr:
logger.warning(f"executeGraph: failure notification failed for run={runId}: {notifyErr}")
if runId:
_activeRunContexts.pop(runId, None)
return {
"success": False,
"error": str(e),
"nodeOutputs": _serializableOutputs(nodeOutputs),
"failedNode": nodeId,
"runId": runId,
}
_safeOutputs = _serializableOutputs(nodeOutputs)
_wasStopped = context.get("_stopped", False)
_finalStatus = "stopped" if _wasStopped else "completed"
if runId and automation2_interface:
automation2_interface.updateRun(runId, status=_finalStatus, nodeOutputs=_safeOutputs)
if runId:
_emitStepEvent(runId, {"type": "run_complete", "runId": runId, "status": _finalStatus})
_activeRunContexts.pop(runId, None)
logger.info(
"executeGraph complete: success=True nodeOutputs_keys=%s stopped=%s",
list(nodeOutputs.keys()),
_wasStopped,
)
return {
"success": True,
"nodeOutputs": _safeOutputs,
"stopped": _wasStopped,
"runId": runId,
}