gateway/tests/unit/workflow/test_merge_context_handover.py

178 lines
6 KiB
Python

# Unit tests: context.mergeContext primary text from extract handover (documents[0]).
import json
import pytest
from modules.workflows.methods.methodContext.actions.extractContent import HANDOVER_KIND
from modules.workflows.methods.methodContext.actions.mergeContext import mergeContext
def _handover(text: str) -> dict:
return {
"kind": HANDOVER_KIND,
"fileOrder": ["f1"],
"files": {
"f1": {
"parts": [
{"typeGroup": "text", "data": text, "id": "t1"},
]
}
},
}
@pytest.mark.asyncio
async def test_mergeContext_requires_dataSource():
result = await mergeContext(object(), {})
assert not result.success
err = result.error or ""
assert "dataSource" in err or "erforderlich" in err.lower()
@pytest.mark.asyncio
async def test_mergeContext_handover_only_in_documents_yields_data_response():
item = {
"success": True,
"data": {},
"documents": [
{
"documentName": "handover.json",
"mimeType": "application/json",
"documentData": _handover("only-from-handover"),
}
],
}
result = await mergeContext(object(), {"dataSource": [item]})
assert result.success
assert result.data
assert result.data.get("response") == "only-from-handover"
@pytest.mark.asyncio
async def test_mergeContext_handover_json_string_in_documentData():
payload = _handover("from-json-string")
item = {
"success": True,
"data": {},
"documents": [
{
"documentName": "handover.json",
"mimeType": "application/json",
"documentData": json.dumps(payload),
}
],
}
result = await mergeContext(object(), {"dataSource": [item]})
assert result.success
assert result.data.get("response") == "from-json-string"
@pytest.mark.asyncio
async def test_mergeContext_joins_multiple_handover_items():
items = [
{
"success": True,
"data": {},
"documents": [{"documentData": _handover("alpha"), "documentName": "a.json"}],
},
{
"success": True,
"data": {},
"documents": [{"documentData": _handover("beta"), "documentName": "b.json"}],
},
]
result = await mergeContext(object(), {"dataSource": items})
assert result.success
assert result.data.get("response") == "### a.json\n\nalpha\n\n### b.json\n\nbeta"
@pytest.mark.asyncio
async def test_mergeContext_merged_response_wins_over_handover_chunks():
items = [
{
"success": True,
"data": {"response": "merged-wins"},
"documents": [{"documentData": _handover("ignored"), "documentName": "a.json"}],
},
]
result = await mergeContext(object(), {"dataSource": items})
assert result.success
assert result.data.get("response") == "merged-wins"
@pytest.mark.asyncio
async def test_mergeContext_concatenates_each_iteration_data_response_not_only_last():
"""deep_merge overwrites ``response``; synthesis must still include every loop body result."""
items = [
{"success": True, "data": {"response": "chunk-aaa"}},
{"success": True, "data": {"response": "chunk-bbb"}},
{"success": True, "data": {"response": "chunk-ccc"}},
]
result = await mergeContext(object(), {"dataSource": items})
assert result.success
r = result.data.get("response") or ""
assert "chunk-aaa" in r
assert "chunk-bbb" in r
assert "chunk-ccc" in r
assert r == "chunk-aaa\n\nchunk-bbb\n\nchunk-ccc"
assert result.data["merged"]["response"] == r
@pytest.mark.asyncio
async def test_mergeContext_primary_serializes_as_plain_text_for_file_create():
from modules.workflows.methods.methodAi._common import serialize_context
items = [
{"success": True, "data": {"response": "section-one"}},
{"success": True, "data": {"response": "section-two"}},
]
result = await mergeContext(object(), {"dataSource": items})
primary = result.data.get("response")
assert isinstance(primary, str)
assert serialize_context(primary) == primary
@pytest.mark.asyncio
async def test_mergeContext_strips_document_data_from_merged_documents():
"""documentData must be None in merged.documents — blobs must not accumulate."""
big_blob = "x" * 100_000
items = [
{
"success": True,
"data": {"response": "a"},
"documents": [
{"documentName": "a.json", "mimeType": "application/json", "documentData": big_blob},
],
},
{
"success": True,
"data": {"response": "b"},
"documents": [
{"documentName": "b.json", "mimeType": "application/json", "documentData": big_blob},
],
},
]
result = await mergeContext(object(), {"dataSource": items})
assert result.success
merged_docs = result.data["merged"].get("documents") or []
assert len(merged_docs) >= 1
for doc in merged_docs:
assert doc.get("documentData") is None, "documentData must be stripped before deep-merge"
@pytest.mark.asyncio
async def test_mergeContext_accumulates_image_documents_only_across_iterations():
"""imageDocumentsOnly from every iteration must be list-concat in merged."""
img_a = {"documentName": "img_a.png", "mimeType": "image/png", "documentData": "aaa="}
img_b = {"documentName": "img_b.png", "mimeType": "image/png", "documentData": "bbb="}
items = [
{"success": True, "data": {"response": "a"}, "imageDocumentsOnly": [img_a]},
{"success": True, "data": {"response": "b"}, "imageDocumentsOnly": [img_b]},
]
result = await mergeContext(object(), {"dataSource": items})
assert result.success
imgs = result.data["merged"].get("imageDocumentsOnly") or []
names = [d.get("documentName") for d in imgs]
assert "img_a.png" in names
assert "img_b.png" in names