# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""Inline emoji-font fallback for the ReportLab-based PDF renderer.
The default ReportLab core fonts (Helvetica, Times, Courier) only cover
WinAnsi (Latin-1 + a handful of typographic glyphs). Codepoints from the
Unicode Symbols / Pictographs / Emoji blocks render as a missing-glyph
square ("tofu") or are dropped entirely.
This module bundles a single TrueType emoji font (Noto Emoji, monochrome,
SIL Open Font License) and exposes `wrapEmojiSpansInXml` which rewrites
already-built ReportLab mini-XML so any character that the emoji font can
draw is wrapped in `...`. ReportLab's
Paragraph parser supports nested tags, so emoji spans nest cleanly
inside , , and markup produced elsewhere.
ReportLab does not natively color emoji (CBDT/COLR/SBIX glyph tables are
not honoured by its TTF backend) — Noto Emoji is intentionally a
monochrome outline font, which is the only flavour that will render at all.
"""
from __future__ import annotations
import logging
import os
import re
from typing import FrozenSet, Optional
logger = logging.getLogger(__name__)
EMOJI_FONT_NAME = "NotoEmoji"
_RENDERER_DIR = os.path.dirname(os.path.abspath(__file__))
_GATEWAY_ROOT = os.path.abspath(os.path.join(_RENDERER_DIR, "..", "..", "..", "..", ".."))
_FONT_PATH = os.path.join(_GATEWAY_ROOT, "assets", "fonts", "NotoEmoji-Regular.ttf")
# Below 0x2000 the WinAnsi-style core fonts already cover Latin-1, common
# diacritics and basic punctuation. We only swap to the emoji font for
# higher codepoints so umlauts, copyright, NBSP, etc. stay visually
# consistent with surrounding body text.
_EMOJI_RANGE_START = 0x2000
_supportedCodepoints: Optional[FrozenSet[int]] = None
_initAttempted = False
def _initialize() -> bool:
"""Register the emoji TTF with ReportLab and capture its cmap.
Lazy + idempotent: the renderer may instantiate before reportlab is
imported in the worker process, and tests that don't generate PDFs
must not pay the registration cost.
"""
global _initAttempted, _supportedCodepoints
if _initAttempted:
return _supportedCodepoints is not None
_initAttempted = True
if not os.path.exists(_FONT_PATH):
logger.warning(
"Emoji font not found at %s — emoji codepoints in PDFs will render as tofu",
_FONT_PATH,
)
return False
try:
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont
except ImportError:
logger.warning("reportlab not installed; cannot register emoji font")
return False
try:
font = TTFont(EMOJI_FONT_NAME, _FONT_PATH)
pdfmetrics.registerFont(font)
# `face.charToGlyph` is built lazily on first use; force population
# so the mapping is available for our coverage check below.
cmap = getattr(font.face, "charToGlyph", None) or {}
if not cmap:
from fontTools.ttLib import TTFont as FtTTFont
cmap = FtTTFont(_FONT_PATH).getBestCmap()
_supportedCodepoints = frozenset(
cp for cp in cmap.keys() if cp >= _EMOJI_RANGE_START
)
logger.info(
"Registered emoji font '%s' with %d renderable codepoints (>= U+%04X)",
EMOJI_FONT_NAME,
len(_supportedCodepoints),
_EMOJI_RANGE_START,
)
return True
except Exception as exc:
logger.warning("Failed to register emoji font: %s", exc)
_supportedCodepoints = None
return False
_TAG_RE = re.compile(r"<[^>]+>")
def wrapEmojiSpansInXml(xml: str) -> str:
"""Wrap consecutive emoji codepoints with ….
Operates on already-XML-escaped ReportLab markup. Tag markers
(`<...>`) are skipped so we never insert a font tag inside another
tag's attribute list. Codepoints that the emoji font cannot draw
pass through unchanged so the default body font still gets a chance
(e.g. U+200D zero-width-joiner has no glyph in Noto Emoji and would
otherwise render as tofu inside a forced span).
"""
if not xml:
return xml
if not _initialize() or not _supportedCodepoints:
return xml
cps = _supportedCodepoints
out: list[str] = []
i = 0
n = len(xml)
while i < n:
# Skip past any markup tag verbatim — emojis inside attribute
# values would be unusual but harmless; the simpler invariant
# "we never split a `<...>` token" keeps the rewrite safe.
if xml[i] == "<":
tagEnd = xml.find(">", i)
if tagEnd == -1:
out.append(xml[i:])
break
out.append(xml[i : tagEnd + 1])
i = tagEnd + 1
continue
if ord(xml[i]) in cps:
j = i
while j < n and xml[j] != "<" and ord(xml[j]) in cps:
j += 1
out.append(f'')
out.append(xml[i:j])
out.append("")
i = j
continue
out.append(xml[i])
i += 1
return "".join(out)