# Copyright (c) 2025 Patrick Motsch # All rights reserved. """ Smoke test: RendererPdf with every JSON section/element shape the pipeline supports. Canonical section types (datamodelJson.supportedSectionTypes): table, bullet_list, heading, paragraph, code_block, image. PDF renderer additionally handles element types: reference, extracted_text (Phase 5D). """ from __future__ import annotations from types import SimpleNamespace import pytest from modules.serviceCenter.services.serviceGeneration.renderers.rendererPdf import ( REPORTLAB_AVAILABLE, RendererPdf, _normalizePdfMonospaceText, _prepareCodeBlockPlainText, ) # 1×1 transparent PNG _MIN_PNG_B64 = ( "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8BQDwAEhQGAhKmMIQAAAABJRU5ErkJggg==" ) def _fakeServices(): """RendererPdf calls services.utils.debugLogToFile; avoid None.""" def _noop(msg, tag=None): pass return SimpleNamespace(utils=SimpleNamespace(debugLogToFile=_noop)) def _fullDocumentJson() -> dict: """One document covering all supported content_type values plus reference/extracted_text elements.""" return { "metadata": { "split_strategy": "single_document", "source_documents": [], "extraction_method": "smoke_test", "title": "PDF Renderer Smoke", "language": "de", }, "documents": [ { "id": "doc_smoke", "title": "PDF Renderer Smoke", "filename": "pdf_renderer_smoke.pdf", "sections": [ { "id": "sec_h1", "content_type": "heading", "order": 1, "elements": [ { "content": { "text": "H1 with **bold** and a very long subtitle line that should wrap cleanly without overlapping", "level": 1, } } ], }, { "id": "sec_h2", "content_type": "heading", "order": 2, "elements": [{"content": {"text": "H2 *italic* and `inline code`", "level": 2}}], }, { "id": "sec_para", "content_type": "paragraph", "order": 3, "elements": [ { "content": { "text": ( "Paragraph: **strong**, *emphasis*, __under-like bold__, " "_single underscores_, and `var = 1`." ) } } ], }, { "id": "sec_bullets", "content_type": "bullet_list", "order": 4, "elements": [ { "content": { "items": [ "Bullet **one**", {"text": "Bullet two with *italic*"}, ], "list_type": "bullet", } } ], }, { "id": "sec_numbered", "content_type": "bullet_list", "order": 5, "elements": [ { "content": { "items": [{"text": "First numbered"}, {"text": "Second **numbered**"}], "list_type": "numbered", } } ], }, { "id": "sec_table", "content_type": "table", "order": 6, "elements": [ { "content": { "headers": ["Col A", "Col B", "Col C"], "rows": [ ["Short", "Medium length cell", "**Bold** in cell"], ["R2", "Data", "`code`"], ], } } ], }, { "id": "sec_code", "content_type": "code_block", "order": 7, "elements": [ { "content": { "language": "python", "code": ( 'def hello():\n print(" & ampersand")\n return 42\n' "\n# tree (Unicode box drawing must not produce tofu in PDF)\n" "Reports/\n\u251c\u2500\u2500 2025/\n\u2502 \u2514\u2500\u2500 file.txt\n" ), } } ], }, { "id": "sec_image", "content_type": "image", "order": 8, "elements": [ { "content": { "base64Data": _MIN_PNG_B64, "altText": "Smoke pixel", "caption": "Minimal PNG (1×1)", } } ], }, { "id": "sec_reference", "content_type": "paragraph", "order": 9, "elements": [ { "type": "reference", "label": "External spec", "documentReference": "urn:smoke:ref", } ], }, { "id": "sec_extracted", "content_type": "paragraph", "order": 10, "elements": [ { "type": "extracted_text", "content": "Extracted **body** with formatting.", "source": "fixture/source.md", } ], }, ], } ], } @pytest.mark.asyncio async def test_renderer_pdf_all_json_elements(tmp_path): if not REPORTLAB_AVAILABLE: pytest.skip("reportlab is not installed") renderer = RendererPdf(services=_fakeServices()) payload = _fullDocumentJson() docs = await renderer.render( extractedContent=payload, title="PDF_Renderer_Smoke", userPrompt=None, aiService=None, ) assert len(docs) == 1 out = docs[0] assert out.mimeType == "application/pdf" assert out.documentData[:4] == b"%PDF" assert out.filename.endswith(".pdf") outPath = tmp_path / "pdf_renderer_smoke.pdf" outPath.write_bytes(out.documentData) assert outPath.stat().st_size > 500 def test_prepare_code_block_preserves_indentation_spaces(): raw = "def x():\n return 1\n two leading on line" assert " return" in _prepareCodeBlockPlainText(raw) assert "\t" not in _prepareCodeBlockPlainText("a\tb") def test_normalize_pdf_monospace_replaces_box_drawing(): raw = "\u2500\u2502\u251c\u2514\u252c\nReports/\n" norm = _normalizePdfMonospaceText(raw) assert "\u2500" not in norm assert "\u2502" not in norm assert "Reports/" in norm def test_pdf_heading_font_sizes_strictly_decrease(): """H3 must not fall back to H1 styles (previous bug: ## smaller than ###).""" renderer = RendererPdf(services=_fakeServices()) styles = renderer._getDefaultStyleSet() assert styles["heading1"]["font_size"] > styles["heading2"]["font_size"] > styles["heading3"]["font_size"] assert renderer._defaultHeadingStyleDef(2)["font_size"] > renderer._defaultHeadingStyleDef(3)["font_size"] if REPORTLAB_AVAILABLE: s1 = renderer._createHeadingStyle(styles, 1).fontSize s2 = renderer._createHeadingStyle(styles, 2).fontSize s3 = renderer._createHeadingStyle(styles, 3).fontSize assert s1 > s2 > s3 partial = {"heading1": styles["heading1"], "heading2": styles["heading2"]} assert renderer._createHeadingStyle(partial, 3).fontSize < renderer._createHeadingStyle(partial, 2).fontSize def test_inline_code_angle_brackets_escaped_in_font_span(): """Paths like `...//` must not break ReportLab XML inside Courier.""" renderer = RendererPdf(services=_fakeServices()) xml = renderer._markdownInlineToReportlabXml("unter `Eingabe//` speichern") assert 'name="Courier"' in xml assert "<Slug>" in xml def test_emoji_codepoints_wrapped_in_emoji_font_span(): """Emoji codepoints must be wrapped in so ReportLab swaps to the Noto Emoji TTF instead of producing missing-glyph squares.""" if not REPORTLAB_AVAILABLE: pytest.skip("reportlab is not installed") renderer = RendererPdf(services=_fakeServices()) xml = renderer._markdownInlineToReportlabXml("Status: \U0001F600 done \U0001F389") # Either the font registered (preferred) and wrapped, or font missing and # text passes through unchanged. Both branches must keep the body readable. from modules.serviceCenter.services.serviceGeneration.renderers._pdfFontFallback import ( _initialize as _emojiInit, ) if _emojiInit(): assert 'name="NotoEmoji"' in xml assert "\U0001F600" in xml assert "\U0001F389" in xml else: assert "\U0001F600" in xml # Bold + emoji must produce nested font tag inside ... xmlBold = renderer._markdownInlineToReportlabXml("**OK \U00002705**") assert "" in xmlBold and "" in xmlBold if _emojiInit(): assert 'name="NotoEmoji"' in xmlBold