236 lines
8 KiB
Python
236 lines
8 KiB
Python
# Copyright (c) 2026 Patrick Motsch
|
||
# All rights reserved.
|
||
"""Action ``context.mergeContext``.
|
||
|
||
Receives a list of results (e.g. from ``flow.loop`` ``bodyResults``) via the
|
||
``dataSource`` DataRef parameter and deep-merges them into a single dict.
|
||
|
||
``dataSource`` must be set explicitly (resolved DataRef). There is no implicit
|
||
fallback to ``_upstreamPayload`` or loop payloads.
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import copy
|
||
import json
|
||
import logging
|
||
from typing import Any, Dict, List, Optional
|
||
|
||
from modules.datamodels.datamodelChat import ActionResult
|
||
from modules.workflows.methods.methodContext.actions.extractContent import (
|
||
_joined_text_from_handover_payload,
|
||
)
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
|
||
def _deep_merge(target: Dict[str, Any], source: Dict[str, Any], conflicts: List[str], path: str = "") -> None:
|
||
for k, v in source.items():
|
||
full = f"{path}.{k}" if path else k
|
||
if k not in target:
|
||
target[k] = copy.deepcopy(v) if isinstance(v, (dict, list)) else v
|
||
continue
|
||
existing = target[k]
|
||
if isinstance(existing, dict) and isinstance(v, dict):
|
||
_deep_merge(existing, v, conflicts, full)
|
||
elif isinstance(existing, list) and isinstance(v, list):
|
||
target[k] = existing + v
|
||
else:
|
||
if existing != v:
|
||
conflicts.append(full)
|
||
target[k] = copy.deepcopy(v) if isinstance(v, (dict, list)) else v
|
||
|
||
|
||
def _coerce_to_list(value: Any) -> List[Any]:
|
||
"""Normalise ``value`` to a list of items to merge."""
|
||
if isinstance(value, list):
|
||
return value
|
||
if value is None:
|
||
return []
|
||
return [value]
|
||
|
||
|
||
def _strip_document_data(doc: Any) -> Any:
|
||
"""Keep document metadata but drop the raw blob so deep-merge stays small."""
|
||
if not isinstance(doc, dict):
|
||
return doc
|
||
out = dict(doc)
|
||
out["documentData"] = None
|
||
return out
|
||
|
||
|
||
def _merge_payload(item: Any) -> Optional[Dict[str, Any]]:
|
||
"""Return the dict to deep-merge for this item, or ``None`` to skip.
|
||
|
||
``documents[n].documentData`` is nulled before merging so large blobs
|
||
(e.g. ~3–4 MB handover-JSON per extractContent iteration) don't accumulate.
|
||
``imageDocumentsOnly`` is left intact — ``_deep_merge`` list-concats it
|
||
across iterations, giving downstream nodes all images from all iterations.
|
||
"""
|
||
if not isinstance(item, dict):
|
||
return None
|
||
if item.get("success") is False:
|
||
return None
|
||
out = dict(item)
|
||
if isinstance(out.get("documents"), list):
|
||
out["documents"] = [_strip_document_data(d) for d in out["documents"]]
|
||
return out
|
||
|
||
|
||
def _primary_text_from_item(it: Any) -> str:
|
||
"""Same sources as ``actionNodeExecutor`` / ``context.extractContent`` for primary text."""
|
||
if not isinstance(it, dict):
|
||
return ""
|
||
r = it.get("response")
|
||
if r is not None and str(r).strip():
|
||
return str(r).strip()
|
||
inner = it.get("data")
|
||
if isinstance(inner, dict):
|
||
r = inner.get("response")
|
||
if r is not None and str(r).strip():
|
||
return str(r).strip()
|
||
docs = it.get("documents")
|
||
if not isinstance(docs, list) or not docs:
|
||
return ""
|
||
doc0 = docs[0]
|
||
raw: Any = None
|
||
if isinstance(doc0, dict):
|
||
raw = doc0.get("documentData")
|
||
elif hasattr(doc0, "documentData"):
|
||
raw = getattr(doc0, "documentData", None)
|
||
if isinstance(raw, bytes):
|
||
try:
|
||
return raw.decode("utf-8").strip()
|
||
except (UnicodeDecodeError, ValueError):
|
||
return ""
|
||
if isinstance(raw, dict):
|
||
return (_joined_text_from_handover_payload(raw) or "").strip()
|
||
if isinstance(raw, str) and raw.strip():
|
||
s = raw.strip()
|
||
if s.startswith("{") and s.endswith("}"):
|
||
try:
|
||
parsed = json.loads(s)
|
||
if isinstance(parsed, dict):
|
||
return (_joined_text_from_handover_payload(parsed) or "").strip()
|
||
except (json.JSONDecodeError, TypeError):
|
||
pass
|
||
return s
|
||
return ""
|
||
|
||
|
||
def _sanitize_heading_title(name: str) -> str:
|
||
t = " ".join(name.replace("\r", " ").replace("\n", " ").split()).strip()
|
||
return t[:160] if len(t) > 160 else t
|
||
|
||
|
||
def _iteration_heading_from_item(it: Any) -> Optional[str]:
|
||
if not isinstance(it, dict):
|
||
return None
|
||
docs = it.get("documents")
|
||
if not isinstance(docs, list) or not docs:
|
||
return None
|
||
d0 = docs[0]
|
||
if not isinstance(d0, dict):
|
||
return None
|
||
name = d0.get("documentName")
|
||
if isinstance(name, str) and name.strip():
|
||
return _sanitize_heading_title(name.strip())
|
||
return None
|
||
|
||
|
||
def _synthesize_primary_response(merged: Dict[str, Any], inputs: List[Any]) -> str:
|
||
"""Flat text for ``ActionResult.response`` / file.create.
|
||
|
||
Prefer concatenating each input's primary text (loop bodyResults) so no
|
||
iteration is dropped — ``deep_merge`` overwrites scalar ``response`` with
|
||
the last item only; that merged value is a fallback when no per-item text
|
||
is found.
|
||
|
||
When several inputs are merged, prefix each chunk with a markdown ``###``
|
||
heading from ``documents[0].documentName`` so ``file.create`` renders clear
|
||
sections (CSV vs PDF vs …).
|
||
"""
|
||
chunks: List[str] = []
|
||
multi = len(inputs) > 1
|
||
for it in inputs:
|
||
t = _primary_text_from_item(it)
|
||
if not t:
|
||
continue
|
||
if multi:
|
||
h = _iteration_heading_from_item(it)
|
||
if h:
|
||
chunks.append(f"### {h}\n\n{t}")
|
||
continue
|
||
chunks.append(t)
|
||
if chunks:
|
||
return "\n\n".join(chunks)
|
||
|
||
if isinstance(merged, dict):
|
||
r = merged.get("response")
|
||
if r is not None and str(r).strip():
|
||
return str(r).strip()
|
||
|
||
if isinstance(merged, dict) and merged:
|
||
try:
|
||
return json.dumps(merged, ensure_ascii=False, indent=2, default=str)
|
||
except Exception:
|
||
return str(merged)
|
||
return ""
|
||
|
||
|
||
async def mergeContext(self, parameters: Dict[str, Any]) -> ActionResult:
|
||
try:
|
||
if "dataSource" not in parameters:
|
||
raise ValueError("dataSource is required (set a DataRef on the merge node)")
|
||
raw = parameters["dataSource"]
|
||
if isinstance(raw, str) and not raw.strip():
|
||
raw = None
|
||
if raw is None:
|
||
return ActionResult.isFailure(error="dataSource ist erforderlich (DataRef auf die Quelle setzen).")
|
||
if isinstance(raw, list) and len(raw) == 0:
|
||
return ActionResult.isFailure(error="Keine Datenquelle angegeben oder Datenquelle ist leer.")
|
||
|
||
items = _coerce_to_list(raw)
|
||
|
||
if not items:
|
||
return ActionResult.isFailure(error="Keine Datenquelle angegeben oder Datenquelle ist leer.")
|
||
|
||
merged: Dict[str, Any] = {}
|
||
conflicts: List[str] = []
|
||
inputs: List[Any] = []
|
||
|
||
for item in items:
|
||
if item is None:
|
||
continue
|
||
inputs.append(item)
|
||
payload = _merge_payload(item)
|
||
if payload:
|
||
_deep_merge(merged, payload, conflicts)
|
||
|
||
if not inputs:
|
||
return ActionResult.isFailure(error="Alle Einträge in der Datenquelle sind leer.")
|
||
|
||
primary = _synthesize_primary_response(merged, inputs)
|
||
merged["response"] = primary
|
||
|
||
_ps = primary if isinstance(primary, str) else repr(primary)
|
||
logger.info(
|
||
"mergeContext: inputs=%d merged_keys=%s primary_len=%d primary_preview=%r conflicts=%d",
|
||
len(inputs),
|
||
list(merged.keys())[:20],
|
||
len(_ps or ""),
|
||
(_ps[:200] + "…") if len(_ps) > 200 else _ps,
|
||
len(conflicts),
|
||
)
|
||
data: Dict[str, Any] = {
|
||
"merged": merged,
|
||
"inputs": inputs,
|
||
"first": inputs[0] if inputs else None,
|
||
"count": len(inputs),
|
||
"conflicts": sorted(set(conflicts)) if conflicts else [],
|
||
"response": primary,
|
||
}
|
||
return ActionResult.isSuccess(data=data)
|
||
except Exception as exc:
|
||
logger.exception("mergeContext failed")
|
||
return ActionResult.isFailure(error=str(exc))
|