# Phase 7: UDM tools (getUdmStructure, walkUdmBlocks, filterUdmByType). from modules.serviceCenter.services.serviceAgent.coreTools._documentTools import ( _filterUdmByTypeImpl, _getUdmStructureText, _parseUdmJson, _walkUdmBlocksImpl, ) def test_parseUdmJson_dict(): d = {"id": "1", "role": "document", "children": []} assert _parseUdmJson(d) == d def test_parseUdmJson_string(): raw = '{"id":"x","role":"document","children":[]}' assert _parseUdmJson(raw)["id"] == "x" def test_getUdmStructure_text(): udm = { "id": "d1", "role": "document", "sourceType": "pdf", "children": [ {"id": "p1", "role": "page", "index": 0, "label": "P1", "children": [{"id": "c1", "contentType": "text", "raw": "hi"}]}, ], } text = _getUdmStructureText(udm) assert "pdf" in text assert "contentBlocks=1" in text def test_walkUdm_blocks(): udm = { "id": "d1", "role": "document", "children": [ {"id": "p1", "role": "page", "children": [ {"id": "t1", "contentType": "text", "raw": "a"}, {"id": "i1", "contentType": "image", "raw": ""}, ]}, ], } out = [] _walkUdmBlocksImpl(udm, out, "document") assert len(out) == 2 assert {b["contentType"] for b in out} == {"text", "image"} def test_filter_udm_by_type(): udm = { "id": "d1", "role": "document", "children": [ {"id": "p1", "role": "page", "children": [ {"id": "t1", "contentType": "text"}, {"id": "x1", "contentType": "table"}, ]}, ], } r = _filterUdmByTypeImpl(udm, "table") assert r["count"] == 1 assert r["nodes"][0]["id"] == "x1"