gateway/tests/unit/workflow/test_extract_content_handover.py

63 lines
2.1 KiB
Python

# Unit tests: unified extractContent handover (text vs image sidecars).
import base64
from modules.workflows.methods.methodContext.actions import extractContent as ec
def test_joined_text_from_handover_orders_text_parts_only():
payload = {
"kind": ec.HANDOVER_KIND,
"fileOrder": ["f1"],
"files": {
"f1": {
"parts": [
{"typeGroup": "text", "data": " A\n", "id": "x"},
{"typeGroup": "container", "data": "", "id": "c"},
{"typeGroup": "text", "data": "B", "id": "y"},
]
}
},
}
assert ec._joined_text_from_handover_payload(payload) == "A\n\nB"
def test_split_images_moves_pixels_to_blob_docs():
raw = b"fake-binary-image"
b64 = base64.b64encode(raw).decode("ascii")
payload = {
"kind": ec.HANDOVER_KIND,
"schemaVersion": 1,
"fileOrder": ["f1"],
"files": {
"f1": {
"parts": [
{"typeGroup": "text", "data": "x", "id": "t1"},
{
"typeGroup": "image",
"mimeType": "image/png",
"data": b64,
"id": "p1-img",
"metadata": {},
},
]
}
},
}
stripped, blobs = ec._split_images_to_sidecar_documents(payload, document_name_stem="abc")
assert len(blobs) == 1
assert blobs[0].mimeType == "image/png"
assert blobs[0].documentData == raw
assert blobs[0].documentName.endswith(".png")
assert blobs[0].documentName.startswith("extract_media_")
meta = blobs[0].validationMetadata or {}
assert meta.get("handoverRole") == "extractedMedia"
img_parts = [
p
for p in stripped["files"]["f1"]["parts"]
if isinstance(p, dict) and (p.get("typeGroup") or "") == "image"
]
assert len(img_parts) == 1
assert img_parts[0]["data"] == ""
assert img_parts[0]["handoverMediaDocumentName"] == blobs[0].documentName
assert "image" in stripped["files"]["f1"]["byTypeGroup"]