gateway/modules/workflows/automation2/graphUtils.py

493 lines
19 KiB
Python

# Copyright (c) 2025 Patrick Motsch
# Graph parsing, validation, and topological sort for automation2.
import logging
from typing import Dict, List, Any, Tuple, Set, Optional
logger = logging.getLogger(__name__)
def parseGraph(graph: Dict[str, Any]) -> Tuple[List[Dict], List[Dict], Set[str]]:
"""
Parse graph into nodes, connections, and node IDs.
graph: { nodes: [...], connections: [...] }
Returns (nodes, connections, node_ids).
"""
nodes = graph.get("nodes") or []
connections = graph.get("connections") or []
nodeIds = {n.get("id") for n in nodes if n.get("id")}
logger.debug(
"parseGraph: nodes=%d connections=%d nodeIds=%s",
len(nodes),
len(connections),
sorted(nodeIds),
)
return nodes, connections, nodeIds
def buildConnectionMap(connections: List[Dict]) -> Dict[str, List[Tuple[str, int, int]]]:
"""
Build map: targetNodeId -> [(sourceNodeId, sourceOutput, targetInput), ...]
connection: { source, sourceOutput?, target, targetInput? }
"""
out: Dict[str, List[Tuple[str, int, int]]] = {}
for i, c in enumerate(connections):
src = c.get("source") or c.get("sourceNode")
tgt = c.get("target") or c.get("targetNode")
if not src or not tgt:
logger.debug("buildConnectionMap skip conn[%d]: missing source/target %r", i, c)
continue
so = c.get("sourceOutput", 0)
ti = c.get("targetInput", 0)
if tgt not in out:
out[tgt] = []
out[tgt].append((src, so, ti))
logger.debug("buildConnectionMap conn[%d]: %s -> %s (so=%d ti=%d)", i, src, tgt, so, ti)
logger.debug("buildConnectionMap result: %s", {k: v for k, v in out.items()})
return out
def getLoopBodyNodeIds(loopNodeId: str, connectionMap: Dict[str, List[Tuple[str, int, int]]]) -> Set[str]:
"""Nodes reachable from loop's output (BFS forward). Body = downstream nodes that receive from loop."""
from collections import deque
body = set()
# connectionMap: target -> [(source, sourceOutput, targetInput)]
rev: Dict[str, List[str]] = {} # source -> [targets]
for tgt, pairs in connectionMap.items():
for src, _, _ in pairs:
if src not in rev:
rev[src] = []
rev[src].append(tgt)
q = deque([loopNodeId])
while q:
nid = q.popleft()
for tgt in rev.get(nid, []):
if tgt not in body:
body.add(tgt)
q.append(tgt)
return body
def getInputSources(nodeId: str, connectionMap: Dict[str, List[Tuple[str, int, int]]]) -> Dict[int, Tuple[str, int]]:
"""
For a node, return targetInput -> (sourceNodeId, sourceOutput).
"""
result: Dict[int, Tuple[str, int]] = {}
for src, so, ti in connectionMap.get(nodeId, []):
result[ti] = (src, so)
return result
def getTriggerNodes(nodes: List[Dict]) -> List[Dict]:
"""Return nodes with category=trigger or type starting with trigger."""
return [n for n in nodes if (n.get("type", "").startswith("trigger.") or n.get("category") == "trigger")]
def validateGraph(graph: Dict[str, Any], nodeTypeIds: Set[str]) -> List[str]:
"""
Validate graph: all node IDs referenced in connections exist, all node types in registry.
Returns list of error messages (empty if valid).
"""
errors = []
nodes, connections, nodeIds = parseGraph(graph)
for n in nodes:
nid = n.get("id")
ntype = n.get("type")
if not nid:
errors.append("Node missing id")
continue
if not ntype:
errors.append(f"Node {nid} missing type")
continue
if ntype not in nodeTypeIds:
errors.append(f"Unknown node type '{ntype}' for node {nid}")
connMap = buildConnectionMap(connections)
allReferred = set()
for tgt, pairs in connMap.items():
allReferred.add(tgt)
for src, _, _ in pairs:
allReferred.add(src)
for nid in allReferred:
if nid not in nodeIds:
errors.append(f"Connection references non-existent node {nid}")
# Port compatibility: hard-fail (Pick-not-Push typed graph)
port_errors = _checkPortCompatibility(nodes, connMap)
if port_errors:
logger.warning("validateGraph port mismatches: %s", port_errors)
errors.extend(port_errors)
if errors:
logger.debug("validateGraph errors: %s", errors)
else:
logger.debug("validateGraph: OK")
return errors
def parse_graph_defined_schema(node: Dict[str, Any], parameter_key: str) -> Optional[Dict[str, Any]]:
"""
Build a JSON-serializable port schema dict from graph parameters (e.g. form ``fields``).
Used by tooling and future API surfaces; mirrors ``parse_graph_defined_output_schema`` logic.
"""
from modules.features.graphicalEditor.portTypes import deriveFormPayloadSchemaFromParam
sch = deriveFormPayloadSchemaFromParam(node, parameter_key)
if sch is None:
return None
return {
"name": sch.name,
"fields": [f.model_dump() for f in sch.fields],
}
def _checkPortCompatibility(
nodes: List[Dict],
connMap: Dict[str, List[Tuple[str, int, int]]],
) -> List[str]:
"""
Hard typed-port check: incompatible connections become validation errors.
"""
from modules.features.graphicalEditor.nodeDefinitions import STATIC_NODE_TYPES
from modules.features.graphicalEditor.portTypes import resolve_output_schema_name
nodeDefMap = {n["id"]: n for n in STATIC_NODE_TYPES}
nodeById = {n["id"]: n for n in nodes if n.get("id")}
warnings: List[str] = []
for tgt, pairs in connMap.items():
tgtNode = nodeById.get(tgt)
if not tgtNode:
continue
tgtDef = nodeDefMap.get(tgtNode.get("type", ""))
if not tgtDef:
continue
tgtInputPorts = tgtDef.get("inputPorts", {})
for src, srcOut, tgtIn in pairs:
srcNode = nodeById.get(src)
if not srcNode:
continue
srcDef = nodeDefMap.get(srcNode.get("type", ""))
if not srcDef:
continue
srcOutputPorts = srcDef.get("outputPorts", {})
srcPort = srcOutputPorts.get(srcOut, {}) or {}
tgtPort = tgtInputPorts.get(tgtIn, {}) or {}
if not isinstance(srcPort, dict):
continue
src_schema = resolve_output_schema_name(srcNode, srcPort)
accepts = tgtPort.get("accepts", [])
if not accepts or not src_schema:
continue
if src_schema in accepts:
continue
# Port that only declares Transit behaves as an untyped sink (legacy graphs).
if len(accepts) == 1 and accepts[0] == "Transit":
continue
if src_schema == "FormPayload_dynamic" and "FormPayload" in accepts:
continue
if src_schema.startswith("FormPayload") and "FormPayload" in accepts:
continue
warnings.append(
f"Port mismatch: {src}[out:{srcOut}] ({src_schema}) -> {tgt}[in:{tgtIn}] (accepts: {accepts})"
)
return warnings
def topoSort(nodes: List[Dict], connectionMap: Dict[str, List[Tuple[str, int, int]]]) -> List[Dict]:
"""
Topological sort: start from trigger nodes, then BFS by connections.
Returns ordered list of nodes (trigger first, then downstream).
"""
nodeById = {n["id"]: n for n in nodes if n.get("id")}
triggers = getTriggerNodes(nodes)
if not triggers:
return list(nodes)
visited: Set[str] = set()
order: List[Dict] = []
def bfs(startIds: List[str]) -> None:
from collections import deque
q = deque(startIds)
for nid in startIds:
visited.add(nid)
if nid in nodeById:
order.append(nodeById[nid])
while q:
nid = q.popleft()
# Find all nodes that receive from nid
for tgt, pairs in connectionMap.items():
for src, _, _ in pairs:
if src == nid and tgt not in visited:
visited.add(tgt)
q.append(tgt)
if tgt in nodeById:
order.append(nodeById[tgt])
triggerIds = [t["id"] for t in triggers]
logger.debug("topoSort triggers: %s", triggerIds)
bfs(triggerIds)
# Append any orphan nodes (e.g. disconnected)
for n in nodes:
if n.get("id") and n["id"] not in visited:
order.append(n)
logger.debug("topoSort order (%d nodes): %s", len(order), [n.get("id") for n in order])
return order
_WILDCARD_SEGMENT = "*"
def _get_by_path(data: Any, path: List[Any]) -> Any:
"""Traverse data by path (strings and ints); return None if not found.
Supports the iteration wildcard ``"*"`` as a path segment: when applied
to a list, the remainder of the path is mapped over each element and the
results are returned as a list (drops elements that resolve to ``None``).
This is the "typed Bindings-Resolver" iteration primitive defined for
Schicht 4 of the Typed Action Architecture.
"""
current = data
for i, seg in enumerate(path):
if current is None:
return None
if isinstance(seg, str) and seg == _WILDCARD_SEGMENT:
if not isinstance(current, (list, tuple)):
return None
tail = list(path[i + 1 :])
if not tail:
return list(current)
mapped: List[Any] = []
for item in current:
resolved = _get_by_path(item, tail)
if resolved is None:
continue
mapped.append(resolved)
return mapped
if isinstance(current, dict) and isinstance(seg, str) and seg in current:
current = current[seg]
elif isinstance(current, (list, tuple)) and isinstance(seg, (int, str)):
idx = int(seg) if isinstance(seg, str) and seg.isdigit() else seg
if isinstance(idx, int) and 0 <= idx < len(current):
current = current[idx]
else:
return None
else:
return None
return current
def _pathContainsWildcard(path: List[Any]) -> bool:
"""True if any segment is the iteration wildcard ``"*"``."""
return any(isinstance(seg, str) and seg == _WILDCARD_SEGMENT for seg in path)
# ---------------------------------------------------------------------------
# Phase-5 Schicht-4 — Typed-Ref envelope unwrap
# ---------------------------------------------------------------------------
#
# Workflow params can carry a typed-ref envelope like
# ``{"$type": "FeatureInstanceRef", "id": "<uuid>", "featureCode": "trustee"}``.
# Action implementations historically receive the canonical primitive (the
# referenced ``id``) as a string. ``_unwrapTypedRef`` extracts that primitive
# without losing the typed envelope shape on disk — the migration script
# (``featureInstanceRefMigration.materializeFeatureInstanceRefs``) writes the
# envelope, the resolver unwraps it on its way to the action.
_TYPED_REF_PRIMARY_FIELD = {
"FeatureInstanceRef": "id",
"ConnectionRef": "id",
"PromptTemplateRef": "id",
"ClickUpListRef": "listId",
"SharePointFileRef": "filePath",
"SharePointFolderRef": "folderPath",
}
def _isTypedRefEnvelope(value: Any) -> bool:
"""True if ``value`` looks like a typed-ref envelope ({\"$type\": \"<CatalogType>\", ...})."""
if not isinstance(value, dict):
return False
typeName = value.get("$type")
return isinstance(typeName, str) and typeName in _TYPED_REF_PRIMARY_FIELD
def _unwrapTypedRef(value: Any) -> Any:
"""If ``value`` is a typed-ref envelope, return its primary primitive.
Falls back to the original value for unknown / non-envelope inputs.
"""
if not _isTypedRefEnvelope(value):
return value
primary = _TYPED_REF_PRIMARY_FIELD[value["$type"]]
return value.get(primary, value)
def resolveParameterReferences(value: Any, nodeOutputs: Dict[str, Any]) -> Any:
"""
Resolve parameter references:
- {{nodeId.output}} or {{nodeId.output.path}} in strings (legacy)
- { "type": "ref", "nodeId": "...", "path": ["field", "nested"] } -> resolved value
- { "type": "value", "value": ... } -> value (then recursively resolve)
"""
import json
import re
if isinstance(value, dict):
# Phase-5 Schicht-4: typed-ref envelopes (FeatureInstanceRef etc.) on
# disk get unwrapped to their canonical primitive (e.g. ``id``) so
# legacy action signatures keep working. See ``_unwrapTypedRef``.
if _isTypedRefEnvelope(value):
return _unwrapTypedRef(value)
if value.get("type") == "ref":
node_id = value.get("nodeId")
path = value.get("path")
if node_id is not None and isinstance(path, (list, tuple)):
data = nodeOutputs.get(node_id)
# Unwrap transit envelopes to access the real data
if isinstance(data, dict) and data.get("_transit"):
data = data.get("data", data)
plist = list(path)
resolved = _get_by_path(data, plist)
if resolved is None and isinstance(data, dict) and plist:
if plist[0] == "payload" and len(plist) > 1:
# Strip explicit "payload" prefix (legacy DataPicker paths)
resolved = _get_by_path(data, plist[1:])
elif "payload" in data and isinstance(data["payload"], dict):
# Form nodes store fields under {"payload": {fieldName: …}}.
# DataPicker emits bare field paths like ["url"]; try under payload.
resolved = _get_by_path(data["payload"], plist)
return resolveParameterReferences(resolved, nodeOutputs)
return value
if value.get("type") == "value":
inner = value.get("value")
return resolveParameterReferences(inner, nodeOutputs)
if value.get("type") == "system":
variable = value.get("variable", "")
from modules.features.graphicalEditor.portTypes import resolveSystemVariable
return resolveSystemVariable(variable, nodeOutputs.get("_context", {}))
return {k: resolveParameterReferences(v, nodeOutputs) for k, v in value.items()}
if isinstance(value, str):
def repl(m):
ref = m.group(1).strip()
parts = ref.split(".")
nodeId = parts[0]
data = nodeOutputs.get(nodeId)
if data is None:
return m.group(0)
if len(parts) < 2:
return json.dumps(data) if isinstance(data, (dict, list)) else str(data)
rest = ".".join(parts[1:])
def _walk(root, keys):
cur = root
for k in keys:
if isinstance(cur, dict) and k in cur:
cur = cur[k]
elif isinstance(cur, (list, tuple)) and k.isdigit():
cur = cur[int(k)]
else:
return None
return cur
keys = rest.split(".")
result = _walk(data, keys)
# Form nodes store fields under {"payload": {field: …}}.
# Fall back to looking under "payload" when the direct path misses.
if result is None and isinstance(data, dict) and "payload" in data:
result = _walk(data["payload"], keys)
if result is None:
return m.group(0)
return str(result) if not isinstance(result, (dict, list)) else json.dumps(result, ensure_ascii=False)
return re.sub(r"\{\{\s*([^}]+)\s*\}\}", repl, value)
if isinstance(value, list):
# contextBuilder: list where every item is a `{"type":"ref",...}` envelope.
# Resolve each part; a single ref preserves the resolved type (str, list, dict).
if value and all(isinstance(v, dict) and v.get("type") == "ref" for v in value):
from modules.workflows.methods.methodAi._common import serialize_context
resolved_parts = [resolveParameterReferences(v, nodeOutputs) for v in value]
if len(resolved_parts) == 1:
return resolved_parts[0]
parts = [serialize_context(p) for p in resolved_parts]
return "\n\n".join(p for p in parts if p)
return [resolveParameterReferences(v, nodeOutputs) for v in value]
return value
def document_list_param_is_empty(val: Any) -> bool:
"""True when a documentList-style parameter has not been set (wire + DataRef may fill)."""
if val is None or val == "":
return True
if isinstance(val, list) and len(val) == 0:
return True
if isinstance(val, dict):
if val.get("documents") or val.get("references") or val.get("items"):
return False
if val.get("documentId") or val.get("id"):
return False
return True
return False
def extract_wired_document_list(inp: Any) -> Optional[Dict[str, Any]]:
"""
Build a DocumentList-shaped dict from an upstream node output (port wire).
Used when a parameter declares ``graphInherit.kind == "documentListWire"``.
"""
if inp is None:
return None
from modules.features.graphicalEditor.portTypes import (
unwrapTransit,
_coerce_document_list_upload_fields,
_file_record_to_document,
)
data = unwrapTransit(inp)
if isinstance(data, str):
one = _file_record_to_document(data)
return {"documents": [one], "count": 1} if one else None
if not isinstance(data, dict):
return None
d = dict(data)
_coerce_document_list_upload_fields(d)
if "currentItem" in d:
ci = d.get("currentItem")
if ci is not None:
nested = extract_wired_document_list(ci)
if nested:
return nested
docs = d.get("documents")
if isinstance(docs, list) and len(docs) > 0:
return {"documents": docs, "count": d.get("count", len(docs))}
raw_list = d.get("documentList")
if isinstance(raw_list, list) and len(raw_list) > 0 and isinstance(raw_list[0], dict):
return {"documents": raw_list, "count": len(raw_list)}
doc_id = d.get("documentId") or d.get("id")
if doc_id and str(doc_id).strip():
one: Dict[str, Any] = {"id": str(doc_id).strip()}
fn = d.get("fileName") or d.get("name")
if fn:
one["name"] = str(fn)
mt = d.get("mimeType")
if mt:
one["mimeType"] = str(mt)
return {"documents": [one], "count": 1}
files = d.get("files")
if isinstance(files, list) and files:
collected = []
for item in files:
conv = _file_record_to_document(item) if isinstance(item, dict) else None
if conv:
collected.append(conv)
if collected:
return {"documents": collected, "count": len(collected)}
return None