# Copyright (c) 2026 PowerOn AG # All rights reserved. """ Gateway i18n registry: t(), @i18nModel, runtime translation cache. All UI-visible texts in the gateway (HTTPException details, model labels, API messages) are tagged with t() and registered at import time. At runtime, t() returns the cached translation for the current request language. Boot-time DB sync and label discovery live in i18nBootSync.py (called by app.py). This module has ZERO dependencies on other platform-core modules outside shared/. """ from __future__ import annotations import logging import re from contextvars import ContextVar from dataclasses import dataclass, field as dataclass_field from typing import Any, Dict, List, Optional, Tuple, Type from pydantic import BaseModel logger = logging.getLogger(__name__) # Matches {placeholderName} tokens used by t(...) param substitution in the # frontend (LanguageContext._applyParams) and the gateway. Allows ASCII # identifiers and digits, no spaces. _PLACEHOLDER_PATTERN = re.compile(r"\{[A-Za-z_][A-Za-z0-9_]*\}") def _enforceSourcePlaceholders(sourceKey: str, translatedValue: str) -> Tuple[str, bool]: """Repair a translated value so its placeholder tokens match the source key. Background: AI translators occasionally translate the *names* of placeholders even when instructed not to (e.g. ``{konten}`` -> ``{accounts}``). The frontend then cannot substitute params and the user sees raw ``{accounts}`` in the UI. Strategy (positional, conservative): - if the source has no placeholders -> nothing to do - if source and translation have the same set of tokens -> nothing to do - if both have the *same number* of tokens but different names -> swap each translation token with the source token at the same position - if counts differ -> leave the translation untouched (too risky to guess; surfaced as a logger.warning by the caller if desired) Returns ``(repairedValue, wasChanged)``. """ if not sourceKey or not translatedValue: return translatedValue, False sourceTokens = _PLACEHOLDER_PATTERN.findall(sourceKey) if not sourceTokens: return translatedValue, False valueTokens = _PLACEHOLDER_PATTERN.findall(translatedValue) if not valueTokens: return translatedValue, False if sourceTokens == valueTokens: return translatedValue, False if len(sourceTokens) != len(valueTokens): return translatedValue, False parts = _PLACEHOLDER_PATTERN.split(translatedValue) rebuilt = parts[0] for idx, srcTok in enumerate(sourceTokens): rebuilt += srcTok + parts[idx + 1] return rebuilt, True def _extractRegistrySourceText(obj: Any) -> str: """Resolve a str or multilingual dict to one canonical registry key string.""" if isinstance(obj, str): return obj if isinstance(obj, dict): return obj.get("xx") or next(iter(obj.values()), "") or "" return "" # --------------------------------------------------------------------------- # Registry (populated at import time by t() and @i18nModel) # --------------------------------------------------------------------------- @dataclass class _I18nRegistryEntry: context: str value: str _REGISTRY: Dict[str, _I18nRegistryEntry] = {} # --------------------------------------------------------------------------- # Translation cache (populated at boot by loadCache) # --------------------------------------------------------------------------- _CACHE: Dict[str, Dict[str, str]] = {} # --------------------------------------------------------------------------- # Per-request language (set by middleware) # --------------------------------------------------------------------------- _CURRENT_LANGUAGE: ContextVar[str] = ContextVar("i18n_lang", default="de") # --------------------------------------------------------------------------- # Model labels (backwards-compatible with getModelLabels / getModelLabel) # --------------------------------------------------------------------------- MODEL_LABELS: Dict[str, Dict[str, Any]] = {} # --------------------------------------------------------------------------- # t() -- tag and translate # --------------------------------------------------------------------------- def t(key: str, context: str = "api", value: str = "") -> str: """Tag a UI-visible text for i18n and return the translation. At import time: registers the key with context and AI description. At runtime: returns the cached translation for _CURRENT_LANGUAGE. Falls back to [key] so missing translations are visible in the UI. """ if key not in _REGISTRY: _REGISTRY[key] = _I18nRegistryEntry(context=context, value=value) lang = _CURRENT_LANGUAGE.get() if lang == "de": return key return _CACHE.get(lang, {}).get(key, f"[{key}]") def resolveJobMessage(messageData: Optional[Dict[str, Any]], lang: Optional[str] = None) -> Optional[str]: """Translate a structured BackgroundJob progress payload. ``messageData`` shape (written by ``JobProgressCallback`` when callers pass ``messageKey`` / ``messageParams``):: {"key": "{n} Dateien verarbeitet, {indexed} indexiert", "params": {"n": 145, "indexed": 106}} The walker call sites use a string-literal ``messageKey=``; the matching ``t("…")`` literal lives in the feature's progress-key registration module (e.g. ``serviceKnowledge/_progressMessages.py``, ``features/trustee/mainTrustee.py``) so the boot sync picks it up. This helper is the **server-side** translation hop so route handlers can deliver a fully rendered ``progressMessage`` string to the frontend -- the frontend never calls ``t()`` on backend-supplied keys. """ if not messageData or not isinstance(messageData, dict): return None key = messageData.get("key") if not isinstance(key, str) or not key: return None params = messageData.get("params") or {} if lang is not None: token = _CURRENT_LANGUAGE.set(lang) try: template = t(key) finally: _CURRENT_LANGUAGE.reset(token) else: template = t(key) if isinstance(params, dict) and params: try: return template.format(**params) except (KeyError, IndexError, ValueError): return template return template def resolveText(value: Any, lang: Optional[str] = None) -> str: """Resolve any value to a translated string for the current request language. Accepts str, dict, TextMultilingual, or None. - str: translate via t() (treats as i18n key / German plaintext key) - dict: multilingual user content — pick ``lang`` (or current context), then ``xx``, then first value - object with model_dump(): convert to dict first (TextMultilingual) - None/empty: return "" If ``lang`` is given, it temporarily overrides the context language for this call (used by schedulers that have an explicit user language). Missing i18n translations for string keys use t()'s ``[key]`` fallback. """ if lang is not None: token = _CURRENT_LANGUAGE.set(lang) try: return _resolveTextImpl(value) finally: _CURRENT_LANGUAGE.reset(token) return _resolveTextImpl(value) def _resolveTextImpl(value: Any) -> str: if value is None: return "" if isinstance(value, str): if not value.strip(): return "" return t(value) if hasattr(value, "model_dump"): value = value.model_dump() if isinstance(value, dict): if not value: return "" lang = _CURRENT_LANGUAGE.get() text = value.get(lang) or value.get("xx") if text: return str(text) first = next((v for v in value.values() if v), None) return str(first) if first else "" return str(value) def apiRouteContext(routeModuleName: str): """Return a callable that registers + translates HTTPException details. The key is registered eagerly in ``_REGISTRY`` the moment ``_apiMsg(key)`` is evaluated (module-level ``detail=routeApiMsg("…")`` runs at import time). At runtime ``t()`` returns the cached translation for the current language. """ _ctx = f"api.{routeModuleName}" def _apiMsg(key: str, value: str = "") -> str: if key not in _REGISTRY: _REGISTRY[key] = _I18nRegistryEntry(context=_ctx, value=value) return t(key, _ctx, value) return _apiMsg # --------------------------------------------------------------------------- # @i18nModel -- class decorator for Pydantic models # --------------------------------------------------------------------------- def i18nModel(modelLabel: str, aiContext: str = ""): """Class decorator: registers model and field labels for i18n. 1. Registers t(modelLabel, "table.", aiContext or docstring) 2. For each Field with json_schema_extra["label"]: Registers t(label, "table..", field.description) 3. Populates MODEL_LABELS for getModelLabels()/getModelLabel() in attributeUtils """ def _decorator(cls: Type[BaseModel]) -> Type[BaseModel]: className = cls.__name__ ctx = aiContext or _extractDocstringFirstLine(cls) t(modelLabel, f"table.{className}", ctx) attributes: Dict[str, str] = {} for fieldName, fieldInfo in cls.model_fields.items(): extra = fieldInfo.json_schema_extra if not isinstance(extra, dict): continue label = extra.get("label") if label: desc = fieldInfo.description or "" t(label, f"table.{className}.{fieldName}", desc) attributes[fieldName] = label else: attributes[fieldName] = fieldName # Render-hint label tokens (frontend_format_labels) are user-visible # strings that appear in tables/forms (e.g. boolean labels # ["Ja","-","Nein"], unit suffixes ["KB","MB","GB",...]). Register # each non-empty token under a per-field context so they appear in # the xx base set and get AI-translated like every other UI string. formatLabels = extra.get("frontend_format_labels") if isinstance(formatLabels, list): fmtCtx = f"table.{className}.{fieldName}.format" for token in formatLabels: if isinstance(token, str) and token.strip(): t(token, fmtCtx, "") # Pydantic v2 computed fields (@computed_field) — same handling as # regular model_fields so labels and frontend_format_labels are # registered for i18n and appear in MODEL_LABELS. computedFields = getattr(cls, "model_computed_fields", {}) or {} for fieldName, computedInfo in computedFields.items(): extra = getattr(computedInfo, "json_schema_extra", None) if callable(extra) or not isinstance(extra, dict): attributes.setdefault(fieldName, fieldName) continue label = extra.get("label") if label: desc = getattr(computedInfo, "description", "") or "" t(label, f"table.{className}.{fieldName}", desc) attributes[fieldName] = label else: attributes.setdefault(fieldName, fieldName) formatLabels = extra.get("frontend_format_labels") if isinstance(formatLabels, list): fmtCtx = f"table.{className}.{fieldName}.format" for token in formatLabels: if isinstance(token, str) and token.strip(): t(token, fmtCtx, "") MODEL_LABELS[className] = { "model": modelLabel, "attributes": attributes, } return cls return _decorator def _extractDocstringFirstLine(cls: type) -> str: doc = cls.__doc__ if not doc: return "" return doc.strip().split("\n")[0].strip() # --------------------------------------------------------------------------- # Language setter (called by middleware) # --------------------------------------------------------------------------- def setLanguage(lang: str): """Set the language for the current request context.""" _CURRENT_LANGUAGE.set(lang) def getCurrentLanguage() -> str: """Get the language for the current request context.""" return _CURRENT_LANGUAGE.get() def normalizePrimaryLanguageTag(tag: str, fallback: str = "de") -> str: """Primary language subtag from ``Accept-Language`` or a single BCP47 tag. Supports 2-letter (ISO 639-1) and 3-letter (ISO 639-2/3) primaries such as ``gsw``. Strips region/variant: ``de-CH`` → ``de``, ``zh-Hans-CN`` → ``zh``. """ if not tag or not isinstance(tag, str): return fallback first = tag.split(",")[0].split(";")[0].strip() if not first: return fallback primary = first.split("-")[0].split("_")[0].lower() if primary.isalpha() and 2 <= len(primary) <= 8: return primary return fallback