# Unit tests: unified extractContent handover (text vs image sidecars). import base64 from modules.workflows.methods.methodContext.actions import extractContent as ec def test_joined_text_from_handover_orders_text_parts_only(): payload = { "kind": ec.HANDOVER_KIND, "fileOrder": ["f1"], "files": { "f1": { "parts": [ {"typeGroup": "text", "data": " A\n", "id": "x"}, {"typeGroup": "container", "data": "", "id": "c"}, {"typeGroup": "text", "data": "B", "id": "y"}, ] } }, } assert ec._joined_text_from_handover_payload(payload) == "A\n\nB" def test_split_images_moves_pixels_to_blob_docs(): raw = b"fake-binary-image" b64 = base64.b64encode(raw).decode("ascii") payload = { "kind": ec.HANDOVER_KIND, "schemaVersion": 1, "fileOrder": ["f1"], "files": { "f1": { "parts": [ {"typeGroup": "text", "data": "x", "id": "t1"}, { "typeGroup": "image", "mimeType": "image/png", "data": b64, "id": "p1-img", "metadata": {}, }, ] } }, } stripped, blobs = ec._split_images_to_sidecar_documents(payload, document_name_stem="abc") assert len(blobs) == 1 assert blobs[0].mimeType == "image/png" assert blobs[0].documentData == raw assert blobs[0].documentName.endswith(".png") assert blobs[0].documentName.startswith("extract_media_") meta = blobs[0].validationMetadata or {} assert meta.get("handoverRole") == "extractedMedia" img_parts = [ p for p in stripped["files"]["f1"]["parts"] if isinstance(p, dict) and (p.get("typeGroup") or "") == "image" ] assert len(img_parts) == 1 assert img_parts[0]["data"] == "" assert img_parts[0]["handoverMediaDocumentName"] == blobs[0].documentName assert "image" in stripped["files"]["f1"]["byTypeGroup"]