# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""Inline emoji-font fallback for the ReportLab-based PDF renderer.

The default ReportLab core fonts (Helvetica, Times, Courier) only cover
WinAnsi (Latin-1 + a handful of typographic glyphs). Codepoints from the
Unicode Symbols / Pictographs / Emoji blocks render as a missing-glyph
square ("tofu") or are dropped entirely.

This module bundles a single TrueType emoji font (Noto Emoji, monochrome,
SIL Open Font License) and exposes `wrapEmojiSpansInXml` which rewrites
already-built ReportLab mini-XML so any character that the emoji font can
draw is wrapped in `<font name="NotoEmoji">...</font>`. ReportLab's
Paragraph parser supports nested <font> tags, so emoji spans nest cleanly
inside <b>, <i>, and <font name="Courier"> markup produced elsewhere.

ReportLab does not natively color emoji (CBDT/COLR/SBIX glyph tables are
not honoured by its TTF backend) — Noto Emoji is intentionally a
monochrome outline font, which is the only flavour that will render at all.
"""

from __future__ import annotations

import logging
import os
import re
from typing import FrozenSet, Optional

logger = logging.getLogger(__name__)

EMOJI_FONT_NAME = "NotoEmoji"

_RENDERER_DIR = os.path.dirname(os.path.abspath(__file__))
_GATEWAY_ROOT = os.path.abspath(os.path.join(_RENDERER_DIR, "..", "..", "..", "..", ".."))
_FONT_PATH = os.path.join(_GATEWAY_ROOT, "assets", "fonts", "NotoEmoji-Regular.ttf")

# Below 0x2000 the WinAnsi-style core fonts already cover Latin-1, common
# diacritics and basic punctuation. We only swap to the emoji font for
# higher codepoints so umlauts, copyright, NBSP, etc. stay visually
# consistent with surrounding body text.
_EMOJI_RANGE_START = 0x2000

_supportedCodepoints: Optional[FrozenSet[int]] = None
_initAttempted = False


def _initialize() -> bool:
    """Register the emoji TTF with ReportLab and capture its cmap.

    Lazy + idempotent: the renderer may instantiate before reportlab is
    imported in the worker process, and tests that don't generate PDFs
    must not pay the registration cost.
    """
    global _initAttempted, _supportedCodepoints
    if _initAttempted:
        return _supportedCodepoints is not None
    _initAttempted = True

    if not os.path.exists(_FONT_PATH):
        logger.warning(
            "Emoji font not found at %s — emoji codepoints in PDFs will render as tofu",
            _FONT_PATH,
        )
        return False

    try:
        from reportlab.pdfbase import pdfmetrics
        from reportlab.pdfbase.ttfonts import TTFont
    except ImportError:
        logger.warning("reportlab not installed; cannot register emoji font")
        return False

    try:
        font = TTFont(EMOJI_FONT_NAME, _FONT_PATH)
        pdfmetrics.registerFont(font)
        # `face.charToGlyph` is built lazily on first use; force population
        # so the mapping is available for our coverage check below.
        cmap = getattr(font.face, "charToGlyph", None) or {}
        if not cmap:
            from fontTools.ttLib import TTFont as FtTTFont
            cmap = FtTTFont(_FONT_PATH).getBestCmap()
        _supportedCodepoints = frozenset(
            cp for cp in cmap.keys() if cp >= _EMOJI_RANGE_START
        )
        logger.info(
            "Registered emoji font '%s' with %d renderable codepoints (>= U+%04X)",
            EMOJI_FONT_NAME,
            len(_supportedCodepoints),
            _EMOJI_RANGE_START,
        )
        return True
    except Exception as exc:
        logger.warning("Failed to register emoji font: %s", exc)
        _supportedCodepoints = None
        return False


_TAG_RE = re.compile(r"<[^>]+>")


def wrapEmojiSpansInXml(xml: str) -> str:
    """Wrap consecutive emoji codepoints with <font name="NotoEmoji">…</font>.

    Operates on already-XML-escaped ReportLab markup. Tag markers
    (`<...>`) are skipped so we never insert a font tag inside another
    tag's attribute list. Codepoints that the emoji font cannot draw
    pass through unchanged so the default body font still gets a chance
    (e.g. U+200D zero-width-joiner has no glyph in Noto Emoji and would
    otherwise render as tofu inside a forced <font> span).
    """
    if not xml:
        return xml
    if not _initialize() or not _supportedCodepoints:
        return xml

    cps = _supportedCodepoints
    out: list[str] = []
    i = 0
    n = len(xml)
    while i < n:
        # Skip past any markup tag verbatim — emojis inside attribute
        # values would be unusual but harmless; the simpler invariant
        # "we never split a `<...>` token" keeps the rewrite safe.
        if xml[i] == "<":
            tagEnd = xml.find(">", i)
            if tagEnd == -1:
                out.append(xml[i:])
                break
            out.append(xml[i : tagEnd + 1])
            i = tagEnd + 1
            continue

        if ord(xml[i]) in cps:
            j = i
            while j < n and xml[j] != "<" and ord(xml[j]) in cps:
                j += 1
            out.append(f'<font name="{EMOJI_FONT_NAME}">')
            out.append(xml[i:j])
            out.append("</font>")
            i = j
            continue

        out.append(xml[i])
        i += 1
    return "".join(out)