# Copyright (c) 2025 Patrick Motsch # All rights reserved. from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy from modules.serviceCenter.services.serviceExtraction.subPipeline import runExtraction from modules.serviceCenter.services.serviceExtraction.subRegistry import ChunkerRegistry, getExtractorRegistry def test_run_extraction_html_with_udm(): reg = getExtractorRegistry() chunkers = ChunkerRegistry() html = b"

Hello

" opts = ExtractionOptions( mergeStrategy=MergeStrategy(), outputFormat="both", outputDetail="full", ) ec = runExtraction(reg, chunkers, html, "t.html", "text/html", opts) assert ec.parts assert ec.udm is not None assert ec.udm.sourceType == "html" assert ec.udm.children def test_run_extraction_parts_only_no_udm(): reg = getExtractorRegistry() chunkers = ChunkerRegistry() html = b"" opts = ExtractionOptions(mergeStrategy=MergeStrategy(), outputFormat="parts") ec = runExtraction(reg, chunkers, html, "t.html", "text/html", opts) assert ec.parts assert ec.udm is None def test_get_extractor_registry_singleton(): a = getExtractorRegistry() b = getExtractorRegistry() assert a is b