platform-core/modules/shared/i18nRegistry.py

# Copyright (c) 2026 PowerOn AG
# All rights reserved.
"""
Gateway i18n registry: t(), @i18nModel, runtime translation cache.

All UI-visible texts in the gateway (HTTPException details, model labels,
API messages) are tagged with t() and registered at import time.
At runtime, t() returns the cached translation for the current request language.

Boot-time DB sync and label discovery live in i18nBootSync.py (called by app.py).
This module has ZERO dependencies on other platform-core modules outside shared/.
"""

from __future__ import annotations

import logging
import re
from contextvars import ContextVar
from dataclasses import dataclass, field as dataclass_field
from typing import Any, Dict, List, Optional, Tuple, Type

from pydantic import BaseModel

logger = logging.getLogger(__name__)


# Matches {placeholderName} tokens used by t(...) param substitution in the
# frontend (LanguageContext._applyParams) and the gateway. Allows ASCII
# identifiers and digits, no spaces.
_PLACEHOLDER_PATTERN = re.compile(r"\{[A-Za-z_][A-Za-z0-9_]*\}")


def _enforceSourcePlaceholders(sourceKey: str, translatedValue: str) -> Tuple[str, bool]:
    """Repair a translated value so its placeholder tokens match the source key.

    Background: AI translators occasionally translate the *names* of
    placeholders even when instructed not to (e.g. ``{konten}`` -> ``{accounts}``).
    The frontend then cannot substitute params and the user sees raw
    ``{accounts}`` in the UI.

    Strategy (positional, conservative):
      - if the source has no placeholders -> nothing to do
      - if source and translation have the same set of tokens -> nothing to do
      - if both have the *same number* of tokens but different names -> swap
        each translation token with the source token at the same position
      - if counts differ -> leave the translation untouched (too risky to
        guess; surfaced as a logger.warning by the caller if desired)

    Returns ``(repairedValue, wasChanged)``.
    """
    if not sourceKey or not translatedValue:
        return translatedValue, False
    sourceTokens = _PLACEHOLDER_PATTERN.findall(sourceKey)
    if not sourceTokens:
        return translatedValue, False
    valueTokens = _PLACEHOLDER_PATTERN.findall(translatedValue)
    if not valueTokens:
        return translatedValue, False
    if sourceTokens == valueTokens:
        return translatedValue, False
    if len(sourceTokens) != len(valueTokens):
        return translatedValue, False
    parts = _PLACEHOLDER_PATTERN.split(translatedValue)
    rebuilt = parts[0]
    for idx, srcTok in enumerate(sourceTokens):
        rebuilt += srcTok + parts[idx + 1]
    return rebuilt, True


def _extractRegistrySourceText(obj: Any) -> str:
    """Resolve a str or multilingual dict to one canonical registry key string."""
    if isinstance(obj, str):
        return obj
    if isinstance(obj, dict):
        return obj.get("xx") or next(iter(obj.values()), "") or ""
    return ""


# ---------------------------------------------------------------------------
# Registry (populated at import time by t() and @i18nModel)
# ---------------------------------------------------------------------------

@dataclass
class _I18nRegistryEntry:
    context: str
    value: str


_REGISTRY: Dict[str, _I18nRegistryEntry] = {}

# ---------------------------------------------------------------------------
# Translation cache (populated at boot by loadCache)
# ---------------------------------------------------------------------------

_CACHE: Dict[str, Dict[str, str]] = {}

# ---------------------------------------------------------------------------
# Per-request language (set by middleware)
# ---------------------------------------------------------------------------

_CURRENT_LANGUAGE: ContextVar[str] = ContextVar("i18n_lang", default="de")

# ---------------------------------------------------------------------------
# Model labels (backwards-compatible with getModelLabels / getModelLabel)
# ---------------------------------------------------------------------------

MODEL_LABELS: Dict[str, Dict[str, Any]] = {}


# ---------------------------------------------------------------------------
# t() -- tag and translate
# ---------------------------------------------------------------------------

def t(key: str, context: str = "api", value: str = "") -> str:
    """Tag a UI-visible text for i18n and return the translation.

    At import time: registers the key with context and AI description.
    At runtime: returns the cached translation for _CURRENT_LANGUAGE.
    Falls back to [key] so missing translations are visible in the UI.
    """
    if key not in _REGISTRY:
        _REGISTRY[key] = _I18nRegistryEntry(context=context, value=value)
    lang = _CURRENT_LANGUAGE.get()
    if lang == "de":
        return key
    return _CACHE.get(lang, {}).get(key, f"[{key}]")


def resolveJobMessage(messageData: Optional[Dict[str, Any]], lang: Optional[str] = None) -> Optional[str]:
    """Translate a structured BackgroundJob progress payload.

    ``messageData`` shape (written by ``JobProgressCallback`` when callers
    pass ``messageKey`` / ``messageParams``)::

        {"key": "{n} Dateien verarbeitet, {indexed} indexiert",
         "params": {"n": 145, "indexed": 106}}

    The walker call sites use a string-literal ``messageKey=``; the matching
    ``t("…")`` literal lives in the feature's progress-key registration
    module (e.g. ``serviceKnowledge/_progressMessages.py``,
    ``features/trustee/mainTrustee.py``) so the boot sync picks it up.

    This helper is the **server-side** translation hop so route handlers can
    deliver a fully rendered ``progressMessage`` string to the frontend --
    the frontend never calls ``t()`` on backend-supplied keys.
    """
    if not messageData or not isinstance(messageData, dict):
        return None
    key = messageData.get("key")
    if not isinstance(key, str) or not key:
        return None
    params = messageData.get("params") or {}

    if lang is not None:
        token = _CURRENT_LANGUAGE.set(lang)
        try:
            template = t(key)
        finally:
            _CURRENT_LANGUAGE.reset(token)
    else:
        template = t(key)

    if isinstance(params, dict) and params:
        try:
            return template.format(**params)
        except (KeyError, IndexError, ValueError):
            return template
    return template


def resolveText(value: Any, lang: Optional[str] = None) -> str:
    """Resolve any value to a translated string for the current request language.

    Accepts str, dict, TextMultilingual, or None.
    - str: translate via t() (treats as i18n key / German plaintext key)
    - dict: multilingual user content — pick ``lang`` (or current context), then ``xx``, then first value
    - object with model_dump(): convert to dict first (TextMultilingual)
    - None/empty: return ""

    If ``lang`` is given, it temporarily overrides the context language for this call
    (used by schedulers that have an explicit user language).

    Missing i18n translations for string keys use t()'s ``[key]`` fallback.
    """
    if lang is not None:
        token = _CURRENT_LANGUAGE.set(lang)
        try:
            return _resolveTextImpl(value)
        finally:
            _CURRENT_LANGUAGE.reset(token)
    return _resolveTextImpl(value)


def _resolveTextImpl(value: Any) -> str:
    if value is None:
        return ""
    if isinstance(value, str):
        if not value.strip():
            return ""
        return t(value)
    if hasattr(value, "model_dump"):
        value = value.model_dump()
    if isinstance(value, dict):
        if not value:
            return ""
        lang = _CURRENT_LANGUAGE.get()
        text = value.get(lang) or value.get("xx")
        if text:
            return str(text)
        first = next((v for v in value.values() if v), None)
        return str(first) if first else ""
    return str(value)


def apiRouteContext(routeModuleName: str):
    """Return a callable that registers + translates HTTPException details.

    The key is registered eagerly in ``_REGISTRY`` the moment ``_apiMsg(key)``
    is evaluated (module-level ``detail=routeApiMsg("…")`` runs at import time).
    At runtime ``t()`` returns the cached translation for the current language.
    """
    _ctx = f"api.{routeModuleName}"

    def _apiMsg(key: str, value: str = "") -> str:
        if key not in _REGISTRY:
            _REGISTRY[key] = _I18nRegistryEntry(context=_ctx, value=value)
        return t(key, _ctx, value)
    return _apiMsg


# ---------------------------------------------------------------------------
# @i18nModel -- class decorator for Pydantic models
# ---------------------------------------------------------------------------

def i18nModel(modelLabel: str, aiContext: str = ""):
    """Class decorator: registers model and field labels for i18n.

    1. Registers t(modelLabel, "table.<ClassName>", aiContext or docstring)
    2. For each Field with json_schema_extra["label"]:
       Registers t(label, "table.<ClassName>.<fieldName>", field.description)
    3. Populates MODEL_LABELS for getModelLabels()/getModelLabel() in attributeUtils
    """
    def _decorator(cls: Type[BaseModel]) -> Type[BaseModel]:
        className = cls.__name__
        ctx = aiContext or _extractDocstringFirstLine(cls)
        t(modelLabel, f"table.{className}", ctx)

        attributes: Dict[str, str] = {}
        for fieldName, fieldInfo in cls.model_fields.items():
            extra = fieldInfo.json_schema_extra
            if not isinstance(extra, dict):
                continue
            label = extra.get("label")
            if label:
                desc = fieldInfo.description or ""
                t(label, f"table.{className}.{fieldName}", desc)
                attributes[fieldName] = label
            else:
                attributes[fieldName] = fieldName

            # Render-hint label tokens (frontend_format_labels) are user-visible
            # strings that appear in tables/forms (e.g. boolean labels
            # ["Ja","-","Nein"], unit suffixes ["KB","MB","GB",...]). Register
            # each non-empty token under a per-field context so they appear in
            # the xx base set and get AI-translated like every other UI string.
            formatLabels = extra.get("frontend_format_labels")
            if isinstance(formatLabels, list):
                fmtCtx = f"table.{className}.{fieldName}.format"
                for token in formatLabels:
                    if isinstance(token, str) and token.strip():
                        t(token, fmtCtx, "")

        # Pydantic v2 computed fields (@computed_field) — same handling as
        # regular model_fields so labels and frontend_format_labels are
        # registered for i18n and appear in MODEL_LABELS.
        computedFields = getattr(cls, "model_computed_fields", {}) or {}
        for fieldName, computedInfo in computedFields.items():
            extra = getattr(computedInfo, "json_schema_extra", None)
            if callable(extra) or not isinstance(extra, dict):
                attributes.setdefault(fieldName, fieldName)
                continue
            label = extra.get("label")
            if label:
                desc = getattr(computedInfo, "description", "") or ""
                t(label, f"table.{className}.{fieldName}", desc)
                attributes[fieldName] = label
            else:
                attributes.setdefault(fieldName, fieldName)

            formatLabels = extra.get("frontend_format_labels")
            if isinstance(formatLabels, list):
                fmtCtx = f"table.{className}.{fieldName}.format"
                for token in formatLabels:
                    if isinstance(token, str) and token.strip():
                        t(token, fmtCtx, "")

        MODEL_LABELS[className] = {
            "model": modelLabel,
            "attributes": attributes,
        }
        return cls
    return _decorator


def _extractDocstringFirstLine(cls: type) -> str:
    doc = cls.__doc__
    if not doc:
        return ""
    return doc.strip().split("\n")[0].strip()


# ---------------------------------------------------------------------------
# Language setter (called by middleware)
# ---------------------------------------------------------------------------

def setLanguage(lang: str):
    """Set the language for the current request context."""
    _CURRENT_LANGUAGE.set(lang)


def getCurrentLanguage() -> str:
    """Get the language for the current request context."""
    return _CURRENT_LANGUAGE.get()


def normalizePrimaryLanguageTag(tag: str, fallback: str = "de") -> str:
    """Primary language subtag from ``Accept-Language`` or a single BCP47 tag.

    Supports 2-letter (ISO 639-1) and 3-letter (ISO 639-2/3) primaries such as ``gsw``.
    Strips region/variant: ``de-CH`` → ``de``, ``zh-Hans-CN`` → ``zh``.
    """
    if not tag or not isinstance(tag, str):
        return fallback
    first = tag.split(",")[0].split(";")[0].strip()
    if not first:
        return fallback
    primary = first.split("-")[0].split("_")[0].lower()
    if primary.isalpha() and 2 <= len(primary) <= 8:
        return primary
    return fallback