# Copyright (c) 2025 Patrick Motsch # All rights reserved. """Inline emoji-font fallback for the ReportLab-based PDF renderer. The default ReportLab core fonts (Helvetica, Times, Courier) only cover WinAnsi (Latin-1 + a handful of typographic glyphs). Codepoints from the Unicode Symbols / Pictographs / Emoji blocks render as a missing-glyph square ("tofu") or are dropped entirely. This module bundles a single TrueType emoji font (Noto Emoji, monochrome, SIL Open Font License) and exposes `wrapEmojiSpansInXml` which rewrites already-built ReportLab mini-XML so any character that the emoji font can draw is wrapped in `...`. ReportLab's Paragraph parser supports nested tags, so emoji spans nest cleanly inside , , and markup produced elsewhere. ReportLab does not natively color emoji (CBDT/COLR/SBIX glyph tables are not honoured by its TTF backend) — Noto Emoji is intentionally a monochrome outline font, which is the only flavour that will render at all. """ from __future__ import annotations import logging import os import re from typing import FrozenSet, Optional logger = logging.getLogger(__name__) EMOJI_FONT_NAME = "NotoEmoji" _RENDERER_DIR = os.path.dirname(os.path.abspath(__file__)) _GATEWAY_ROOT = os.path.abspath(os.path.join(_RENDERER_DIR, "..", "..", "..", "..", "..")) _FONT_PATH = os.path.join(_GATEWAY_ROOT, "assets", "fonts", "NotoEmoji-Regular.ttf") # Below 0x2000 the WinAnsi-style core fonts already cover Latin-1, common # diacritics and basic punctuation. We only swap to the emoji font for # higher codepoints so umlauts, copyright, NBSP, etc. stay visually # consistent with surrounding body text. _EMOJI_RANGE_START = 0x2000 _supportedCodepoints: Optional[FrozenSet[int]] = None _initAttempted = False def _initialize() -> bool: """Register the emoji TTF with ReportLab and capture its cmap. Lazy + idempotent: the renderer may instantiate before reportlab is imported in the worker process, and tests that don't generate PDFs must not pay the registration cost. """ global _initAttempted, _supportedCodepoints if _initAttempted: return _supportedCodepoints is not None _initAttempted = True if not os.path.exists(_FONT_PATH): logger.warning( "Emoji font not found at %s — emoji codepoints in PDFs will render as tofu", _FONT_PATH, ) return False try: from reportlab.pdfbase import pdfmetrics from reportlab.pdfbase.ttfonts import TTFont except ImportError: logger.warning("reportlab not installed; cannot register emoji font") return False try: font = TTFont(EMOJI_FONT_NAME, _FONT_PATH) pdfmetrics.registerFont(font) # `face.charToGlyph` is built lazily on first use; force population # so the mapping is available for our coverage check below. cmap = getattr(font.face, "charToGlyph", None) or {} if not cmap: from fontTools.ttLib import TTFont as FtTTFont cmap = FtTTFont(_FONT_PATH).getBestCmap() _supportedCodepoints = frozenset( cp for cp in cmap.keys() if cp >= _EMOJI_RANGE_START ) logger.info( "Registered emoji font '%s' with %d renderable codepoints (>= U+%04X)", EMOJI_FONT_NAME, len(_supportedCodepoints), _EMOJI_RANGE_START, ) return True except Exception as exc: logger.warning("Failed to register emoji font: %s", exc) _supportedCodepoints = None return False _TAG_RE = re.compile(r"<[^>]+>") def wrapEmojiSpansInXml(xml: str) -> str: """Wrap consecutive emoji codepoints with . Operates on already-XML-escaped ReportLab markup. Tag markers (`<...>`) are skipped so we never insert a font tag inside another tag's attribute list. Codepoints that the emoji font cannot draw pass through unchanged so the default body font still gets a chance (e.g. U+200D zero-width-joiner has no glyph in Noto Emoji and would otherwise render as tofu inside a forced span). """ if not xml: return xml if not _initialize() or not _supportedCodepoints: return xml cps = _supportedCodepoints out: list[str] = [] i = 0 n = len(xml) while i < n: # Skip past any markup tag verbatim — emojis inside attribute # values would be unusual but harmless; the simpler invariant # "we never split a `<...>` token" keeps the rewrite safe. if xml[i] == "<": tagEnd = xml.find(">", i) if tagEnd == -1: out.append(xml[i:]) break out.append(xml[i : tagEnd + 1]) i = tagEnd + 1 continue if ord(xml[i]) in cps: j = i while j < n and xml[j] != "<" and ord(xml[j]) in cps: j += 1 out.append(f'') out.append(xml[i:j]) out.append("") i = j continue out.append(xml[i]) i += 1 return "".join(out)