gateway/modules/workflows/methods/methodContext/actions/mergeContext.py

236 lines
8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# Copyright (c) 2026 Patrick Motsch
# All rights reserved.
"""Action ``context.mergeContext``.
Receives a list of results (e.g. from ``flow.loop`` ``bodyResults``) via the
``dataSource`` DataRef parameter and deep-merges them into a single dict.
``dataSource`` must be set explicitly (resolved DataRef). There is no implicit
fallback to ``_upstreamPayload`` or loop payloads.
"""
from __future__ import annotations
import copy
import json
import logging
from typing import Any, Dict, List, Optional
from modules.datamodels.datamodelChat import ActionResult
from modules.workflows.methods.methodContext.actions.extractContent import (
_joined_text_from_handover_payload,
)
logger = logging.getLogger(__name__)
def _deep_merge(target: Dict[str, Any], source: Dict[str, Any], conflicts: List[str], path: str = "") -> None:
for k, v in source.items():
full = f"{path}.{k}" if path else k
if k not in target:
target[k] = copy.deepcopy(v) if isinstance(v, (dict, list)) else v
continue
existing = target[k]
if isinstance(existing, dict) and isinstance(v, dict):
_deep_merge(existing, v, conflicts, full)
elif isinstance(existing, list) and isinstance(v, list):
target[k] = existing + v
else:
if existing != v:
conflicts.append(full)
target[k] = copy.deepcopy(v) if isinstance(v, (dict, list)) else v
def _coerce_to_list(value: Any) -> List[Any]:
"""Normalise ``value`` to a list of items to merge."""
if isinstance(value, list):
return value
if value is None:
return []
return [value]
def _strip_document_data(doc: Any) -> Any:
"""Keep document metadata but drop the raw blob so deep-merge stays small."""
if not isinstance(doc, dict):
return doc
out = dict(doc)
out["documentData"] = None
return out
def _merge_payload(item: Any) -> Optional[Dict[str, Any]]:
"""Return the dict to deep-merge for this item, or ``None`` to skip.
``documents[n].documentData`` is nulled before merging so large blobs
(e.g. ~34 MB handover-JSON per extractContent iteration) don't accumulate.
``imageDocumentsOnly`` is left intact — ``_deep_merge`` list-concats it
across iterations, giving downstream nodes all images from all iterations.
"""
if not isinstance(item, dict):
return None
if item.get("success") is False:
return None
out = dict(item)
if isinstance(out.get("documents"), list):
out["documents"] = [_strip_document_data(d) for d in out["documents"]]
return out
def _primary_text_from_item(it: Any) -> str:
"""Same sources as ``actionNodeExecutor`` / ``context.extractContent`` for primary text."""
if not isinstance(it, dict):
return ""
r = it.get("response")
if r is not None and str(r).strip():
return str(r).strip()
inner = it.get("data")
if isinstance(inner, dict):
r = inner.get("response")
if r is not None and str(r).strip():
return str(r).strip()
docs = it.get("documents")
if not isinstance(docs, list) or not docs:
return ""
doc0 = docs[0]
raw: Any = None
if isinstance(doc0, dict):
raw = doc0.get("documentData")
elif hasattr(doc0, "documentData"):
raw = getattr(doc0, "documentData", None)
if isinstance(raw, bytes):
try:
return raw.decode("utf-8").strip()
except (UnicodeDecodeError, ValueError):
return ""
if isinstance(raw, dict):
return (_joined_text_from_handover_payload(raw) or "").strip()
if isinstance(raw, str) and raw.strip():
s = raw.strip()
if s.startswith("{") and s.endswith("}"):
try:
parsed = json.loads(s)
if isinstance(parsed, dict):
return (_joined_text_from_handover_payload(parsed) or "").strip()
except (json.JSONDecodeError, TypeError):
pass
return s
return ""
def _sanitize_heading_title(name: str) -> str:
t = " ".join(name.replace("\r", " ").replace("\n", " ").split()).strip()
return t[:160] if len(t) > 160 else t
def _iteration_heading_from_item(it: Any) -> Optional[str]:
if not isinstance(it, dict):
return None
docs = it.get("documents")
if not isinstance(docs, list) or not docs:
return None
d0 = docs[0]
if not isinstance(d0, dict):
return None
name = d0.get("documentName")
if isinstance(name, str) and name.strip():
return _sanitize_heading_title(name.strip())
return None
def _synthesize_primary_response(merged: Dict[str, Any], inputs: List[Any]) -> str:
"""Flat text for ``ActionResult.response`` / file.create.
Prefer concatenating each input's primary text (loop bodyResults) so no
iteration is dropped — ``deep_merge`` overwrites scalar ``response`` with
the last item only; that merged value is a fallback when no per-item text
is found.
When several inputs are merged, prefix each chunk with a markdown ``###``
heading from ``documents[0].documentName`` so ``file.create`` renders clear
sections (CSV vs PDF vs …).
"""
chunks: List[str] = []
multi = len(inputs) > 1
for it in inputs:
t = _primary_text_from_item(it)
if not t:
continue
if multi:
h = _iteration_heading_from_item(it)
if h:
chunks.append(f"### {h}\n\n{t}")
continue
chunks.append(t)
if chunks:
return "\n\n".join(chunks)
if isinstance(merged, dict):
r = merged.get("response")
if r is not None and str(r).strip():
return str(r).strip()
if isinstance(merged, dict) and merged:
try:
return json.dumps(merged, ensure_ascii=False, indent=2, default=str)
except Exception:
return str(merged)
return ""
async def mergeContext(self, parameters: Dict[str, Any]) -> ActionResult:
try:
if "dataSource" not in parameters:
raise ValueError("dataSource is required (set a DataRef on the merge node)")
raw = parameters["dataSource"]
if isinstance(raw, str) and not raw.strip():
raw = None
if raw is None:
return ActionResult.isFailure(error="dataSource ist erforderlich (DataRef auf die Quelle setzen).")
if isinstance(raw, list) and len(raw) == 0:
return ActionResult.isFailure(error="Keine Datenquelle angegeben oder Datenquelle ist leer.")
items = _coerce_to_list(raw)
if not items:
return ActionResult.isFailure(error="Keine Datenquelle angegeben oder Datenquelle ist leer.")
merged: Dict[str, Any] = {}
conflicts: List[str] = []
inputs: List[Any] = []
for item in items:
if item is None:
continue
inputs.append(item)
payload = _merge_payload(item)
if payload:
_deep_merge(merged, payload, conflicts)
if not inputs:
return ActionResult.isFailure(error="Alle Einträge in der Datenquelle sind leer.")
primary = _synthesize_primary_response(merged, inputs)
merged["response"] = primary
_ps = primary if isinstance(primary, str) else repr(primary)
logger.info(
"mergeContext: inputs=%d merged_keys=%s primary_len=%d primary_preview=%r conflicts=%d",
len(inputs),
list(merged.keys())[:20],
len(_ps or ""),
(_ps[:200] + "") if len(_ps) > 200 else _ps,
len(conflicts),
)
data: Dict[str, Any] = {
"merged": merged,
"inputs": inputs,
"first": inputs[0] if inputs else None,
"count": len(inputs),
"conflicts": sorted(set(conflicts)) if conflicts else [],
"response": primary,
}
return ActionResult.isSuccess(data=data)
except Exception as exc:
logger.exception("mergeContext failed")
return ActionResult.isFailure(error=str(exc))