# Copyright (c) 2026 Patrick Motsch # All rights reserved. """Action ``context.mergeContext``. Receives a list of results (e.g. from ``flow.loop`` ``bodyResults``) via the ``dataSource`` DataRef parameter and deep-merges them into a single dict. ``dataSource`` must be set explicitly (resolved DataRef). There is no implicit fallback to ``_upstreamPayload`` or loop payloads. """ from __future__ import annotations import copy import json import logging from typing import Any, Dict, List, Optional from modules.datamodels.datamodelChat import ActionResult from modules.workflows.methods.methodContext.actions.extractContent import ( _joined_text_from_handover_payload, ) logger = logging.getLogger(__name__) def _deep_merge(target: Dict[str, Any], source: Dict[str, Any], conflicts: List[str], path: str = "") -> None: for k, v in source.items(): full = f"{path}.{k}" if path else k if k not in target: target[k] = copy.deepcopy(v) if isinstance(v, (dict, list)) else v continue existing = target[k] if isinstance(existing, dict) and isinstance(v, dict): _deep_merge(existing, v, conflicts, full) elif isinstance(existing, list) and isinstance(v, list): target[k] = existing + v else: if existing != v: conflicts.append(full) target[k] = copy.deepcopy(v) if isinstance(v, (dict, list)) else v def _coerce_to_list(value: Any) -> List[Any]: """Normalise ``value`` to a list of items to merge.""" if isinstance(value, list): return value if value is None: return [] return [value] def _strip_document_data(doc: Any) -> Any: """Keep document metadata but drop the raw blob so deep-merge stays small.""" if not isinstance(doc, dict): return doc out = dict(doc) out["documentData"] = None return out def _merge_payload(item: Any) -> Optional[Dict[str, Any]]: """Return the dict to deep-merge for this item, or ``None`` to skip. ``documents[n].documentData`` is nulled before merging so large blobs (e.g. ~3–4 MB handover-JSON per extractContent iteration) don't accumulate. ``imageDocumentsOnly`` is left intact — ``_deep_merge`` list-concats it across iterations, giving downstream nodes all images from all iterations. """ if not isinstance(item, dict): return None if item.get("success") is False: return None out = dict(item) if isinstance(out.get("documents"), list): out["documents"] = [_strip_document_data(d) for d in out["documents"]] return out def _primary_text_from_item(it: Any) -> str: """Same sources as ``actionNodeExecutor`` / ``context.extractContent`` for primary text.""" if not isinstance(it, dict): return "" r = it.get("response") if r is not None and str(r).strip(): return str(r).strip() inner = it.get("data") if isinstance(inner, dict): r = inner.get("response") if r is not None and str(r).strip(): return str(r).strip() docs = it.get("documents") if not isinstance(docs, list) or not docs: return "" doc0 = docs[0] raw: Any = None if isinstance(doc0, dict): raw = doc0.get("documentData") elif hasattr(doc0, "documentData"): raw = getattr(doc0, "documentData", None) if isinstance(raw, bytes): try: return raw.decode("utf-8").strip() except (UnicodeDecodeError, ValueError): return "" if isinstance(raw, dict): return (_joined_text_from_handover_payload(raw) or "").strip() if isinstance(raw, str) and raw.strip(): s = raw.strip() if s.startswith("{") and s.endswith("}"): try: parsed = json.loads(s) if isinstance(parsed, dict): return (_joined_text_from_handover_payload(parsed) or "").strip() except (json.JSONDecodeError, TypeError): pass return s return "" def _sanitize_heading_title(name: str) -> str: t = " ".join(name.replace("\r", " ").replace("\n", " ").split()).strip() return t[:160] if len(t) > 160 else t def _iteration_heading_from_item(it: Any) -> Optional[str]: if not isinstance(it, dict): return None docs = it.get("documents") if not isinstance(docs, list) or not docs: return None d0 = docs[0] if not isinstance(d0, dict): return None name = d0.get("documentName") if isinstance(name, str) and name.strip(): return _sanitize_heading_title(name.strip()) return None def _synthesize_primary_response(merged: Dict[str, Any], inputs: List[Any]) -> str: """Flat text for ``ActionResult.response`` / file.create. Prefer concatenating each input's primary text (loop bodyResults) so no iteration is dropped — ``deep_merge`` overwrites scalar ``response`` with the last item only; that merged value is a fallback when no per-item text is found. When several inputs are merged, prefix each chunk with a markdown ``###`` heading from ``documents[0].documentName`` so ``file.create`` renders clear sections (CSV vs PDF vs …). """ chunks: List[str] = [] multi = len(inputs) > 1 for it in inputs: t = _primary_text_from_item(it) if not t: continue if multi: h = _iteration_heading_from_item(it) if h: chunks.append(f"### {h}\n\n{t}") continue chunks.append(t) if chunks: return "\n\n".join(chunks) if isinstance(merged, dict): r = merged.get("response") if r is not None and str(r).strip(): return str(r).strip() if isinstance(merged, dict) and merged: try: return json.dumps(merged, ensure_ascii=False, indent=2, default=str) except Exception: return str(merged) return "" async def mergeContext(self, parameters: Dict[str, Any]) -> ActionResult: try: if "dataSource" not in parameters: raise ValueError("dataSource is required (set a DataRef on the merge node)") raw = parameters["dataSource"] if isinstance(raw, str) and not raw.strip(): raw = None if raw is None: return ActionResult.isFailure(error="dataSource ist erforderlich (DataRef auf die Quelle setzen).") if isinstance(raw, list) and len(raw) == 0: return ActionResult.isFailure(error="Keine Datenquelle angegeben oder Datenquelle ist leer.") items = _coerce_to_list(raw) if not items: return ActionResult.isFailure(error="Keine Datenquelle angegeben oder Datenquelle ist leer.") merged: Dict[str, Any] = {} conflicts: List[str] = [] inputs: List[Any] = [] for item in items: if item is None: continue inputs.append(item) payload = _merge_payload(item) if payload: _deep_merge(merged, payload, conflicts) if not inputs: return ActionResult.isFailure(error="Alle Einträge in der Datenquelle sind leer.") primary = _synthesize_primary_response(merged, inputs) merged["response"] = primary _ps = primary if isinstance(primary, str) else repr(primary) logger.info( "mergeContext: inputs=%d merged_keys=%s primary_len=%d primary_preview=%r conflicts=%d", len(inputs), list(merged.keys())[:20], len(_ps or ""), (_ps[:200] + "…") if len(_ps) > 200 else _ps, len(conflicts), ) data: Dict[str, Any] = { "merged": merged, "inputs": inputs, "first": inputs[0] if inputs else None, "count": len(inputs), "conflicts": sorted(set(conflicts)) if conflicts else [], "response": primary, } return ActionResult.isSuccess(data=data) except Exception as exc: logger.exception("mergeContext failed") return ActionResult.isFailure(error=str(exc))