platform-core/modules/shared/i18nRegistry.py
ValueOn AG 4a60086c80
Some checks failed
Deploy Plattform-Core (Int) / test (push) Failing after 15s
Deploy Plattform-Core (Int) / deploy (push) Has been skipped
cp adapted to 2026 poweron
2026-06-09 09:53:31 +02:00

341 lines
13 KiB
Python

# Copyright (c) 2026 PowerOn AG
# All rights reserved.
"""
Gateway i18n registry: t(), @i18nModel, runtime translation cache.
All UI-visible texts in the gateway (HTTPException details, model labels,
API messages) are tagged with t() and registered at import time.
At runtime, t() returns the cached translation for the current request language.
Boot-time DB sync and label discovery live in i18nBootSync.py (called by app.py).
This module has ZERO dependencies on other platform-core modules outside shared/.
"""
from __future__ import annotations
import logging
import re
from contextvars import ContextVar
from dataclasses import dataclass, field as dataclass_field
from typing import Any, Dict, List, Optional, Tuple, Type
from pydantic import BaseModel
logger = logging.getLogger(__name__)
# Matches {placeholderName} tokens used by t(...) param substitution in the
# frontend (LanguageContext._applyParams) and the gateway. Allows ASCII
# identifiers and digits, no spaces.
_PLACEHOLDER_PATTERN = re.compile(r"\{[A-Za-z_][A-Za-z0-9_]*\}")
def _enforceSourcePlaceholders(sourceKey: str, translatedValue: str) -> Tuple[str, bool]:
"""Repair a translated value so its placeholder tokens match the source key.
Background: AI translators occasionally translate the *names* of
placeholders even when instructed not to (e.g. ``{konten}`` -> ``{accounts}``).
The frontend then cannot substitute params and the user sees raw
``{accounts}`` in the UI.
Strategy (positional, conservative):
- if the source has no placeholders -> nothing to do
- if source and translation have the same set of tokens -> nothing to do
- if both have the *same number* of tokens but different names -> swap
each translation token with the source token at the same position
- if counts differ -> leave the translation untouched (too risky to
guess; surfaced as a logger.warning by the caller if desired)
Returns ``(repairedValue, wasChanged)``.
"""
if not sourceKey or not translatedValue:
return translatedValue, False
sourceTokens = _PLACEHOLDER_PATTERN.findall(sourceKey)
if not sourceTokens:
return translatedValue, False
valueTokens = _PLACEHOLDER_PATTERN.findall(translatedValue)
if not valueTokens:
return translatedValue, False
if sourceTokens == valueTokens:
return translatedValue, False
if len(sourceTokens) != len(valueTokens):
return translatedValue, False
parts = _PLACEHOLDER_PATTERN.split(translatedValue)
rebuilt = parts[0]
for idx, srcTok in enumerate(sourceTokens):
rebuilt += srcTok + parts[idx + 1]
return rebuilt, True
def _extractRegistrySourceText(obj: Any) -> str:
"""Resolve a str or multilingual dict to one canonical registry key string."""
if isinstance(obj, str):
return obj
if isinstance(obj, dict):
return obj.get("xx") or next(iter(obj.values()), "") or ""
return ""
# ---------------------------------------------------------------------------
# Registry (populated at import time by t() and @i18nModel)
# ---------------------------------------------------------------------------
@dataclass
class _I18nRegistryEntry:
context: str
value: str
_REGISTRY: Dict[str, _I18nRegistryEntry] = {}
# ---------------------------------------------------------------------------
# Translation cache (populated at boot by loadCache)
# ---------------------------------------------------------------------------
_CACHE: Dict[str, Dict[str, str]] = {}
# ---------------------------------------------------------------------------
# Per-request language (set by middleware)
# ---------------------------------------------------------------------------
_CURRENT_LANGUAGE: ContextVar[str] = ContextVar("i18n_lang", default="de")
# ---------------------------------------------------------------------------
# Model labels (backwards-compatible with getModelLabels / getModelLabel)
# ---------------------------------------------------------------------------
MODEL_LABELS: Dict[str, Dict[str, Any]] = {}
# ---------------------------------------------------------------------------
# t() -- tag and translate
# ---------------------------------------------------------------------------
def t(key: str, context: str = "api", value: str = "") -> str:
"""Tag a UI-visible text for i18n and return the translation.
At import time: registers the key with context and AI description.
At runtime: returns the cached translation for _CURRENT_LANGUAGE.
Falls back to [key] so missing translations are visible in the UI.
"""
if key not in _REGISTRY:
_REGISTRY[key] = _I18nRegistryEntry(context=context, value=value)
lang = _CURRENT_LANGUAGE.get()
if lang == "de":
return key
return _CACHE.get(lang, {}).get(key, f"[{key}]")
def resolveJobMessage(messageData: Optional[Dict[str, Any]], lang: Optional[str] = None) -> Optional[str]:
"""Translate a structured BackgroundJob progress payload.
``messageData`` shape (written by ``JobProgressCallback`` when callers
pass ``messageKey`` / ``messageParams``)::
{"key": "{n} Dateien verarbeitet, {indexed} indexiert",
"params": {"n": 145, "indexed": 106}}
The walker call sites use a string-literal ``messageKey=``; the matching
``t("")`` literal lives in the feature's progress-key registration
module (e.g. ``serviceKnowledge/_progressMessages.py``,
``features/trustee/mainTrustee.py``) so the boot sync picks it up.
This helper is the **server-side** translation hop so route handlers can
deliver a fully rendered ``progressMessage`` string to the frontend --
the frontend never calls ``t()`` on backend-supplied keys.
"""
if not messageData or not isinstance(messageData, dict):
return None
key = messageData.get("key")
if not isinstance(key, str) or not key:
return None
params = messageData.get("params") or {}
if lang is not None:
token = _CURRENT_LANGUAGE.set(lang)
try:
template = t(key)
finally:
_CURRENT_LANGUAGE.reset(token)
else:
template = t(key)
if isinstance(params, dict) and params:
try:
return template.format(**params)
except (KeyError, IndexError, ValueError):
return template
return template
def resolveText(value: Any, lang: Optional[str] = None) -> str:
"""Resolve any value to a translated string for the current request language.
Accepts str, dict, TextMultilingual, or None.
- str: translate via t() (treats as i18n key / German plaintext key)
- dict: multilingual user content — pick ``lang`` (or current context), then ``xx``, then first value
- object with model_dump(): convert to dict first (TextMultilingual)
- None/empty: return ""
If ``lang`` is given, it temporarily overrides the context language for this call
(used by schedulers that have an explicit user language).
Missing i18n translations for string keys use t()'s ``[key]`` fallback.
"""
if lang is not None:
token = _CURRENT_LANGUAGE.set(lang)
try:
return _resolveTextImpl(value)
finally:
_CURRENT_LANGUAGE.reset(token)
return _resolveTextImpl(value)
def _resolveTextImpl(value: Any) -> str:
if value is None:
return ""
if isinstance(value, str):
if not value.strip():
return ""
return t(value)
if hasattr(value, "model_dump"):
value = value.model_dump()
if isinstance(value, dict):
if not value:
return ""
lang = _CURRENT_LANGUAGE.get()
text = value.get(lang) or value.get("xx")
if text:
return str(text)
first = next((v for v in value.values() if v), None)
return str(first) if first else ""
return str(value)
def apiRouteContext(routeModuleName: str):
"""Return a callable that registers + translates HTTPException details.
The key is registered eagerly in ``_REGISTRY`` the moment ``_apiMsg(key)``
is evaluated (module-level ``detail=routeApiMsg("")`` runs at import time).
At runtime ``t()`` returns the cached translation for the current language.
"""
_ctx = f"api.{routeModuleName}"
def _apiMsg(key: str, value: str = "") -> str:
if key not in _REGISTRY:
_REGISTRY[key] = _I18nRegistryEntry(context=_ctx, value=value)
return t(key, _ctx, value)
return _apiMsg
# ---------------------------------------------------------------------------
# @i18nModel -- class decorator for Pydantic models
# ---------------------------------------------------------------------------
def i18nModel(modelLabel: str, aiContext: str = ""):
"""Class decorator: registers model and field labels for i18n.
1. Registers t(modelLabel, "table.<ClassName>", aiContext or docstring)
2. For each Field with json_schema_extra["label"]:
Registers t(label, "table.<ClassName>.<fieldName>", field.description)
3. Populates MODEL_LABELS for getModelLabels()/getModelLabel() in attributeUtils
"""
def _decorator(cls: Type[BaseModel]) -> Type[BaseModel]:
className = cls.__name__
ctx = aiContext or _extractDocstringFirstLine(cls)
t(modelLabel, f"table.{className}", ctx)
attributes: Dict[str, str] = {}
for fieldName, fieldInfo in cls.model_fields.items():
extra = fieldInfo.json_schema_extra
if not isinstance(extra, dict):
continue
label = extra.get("label")
if label:
desc = fieldInfo.description or ""
t(label, f"table.{className}.{fieldName}", desc)
attributes[fieldName] = label
else:
attributes[fieldName] = fieldName
# Render-hint label tokens (frontend_format_labels) are user-visible
# strings that appear in tables/forms (e.g. boolean labels
# ["Ja","-","Nein"], unit suffixes ["KB","MB","GB",...]). Register
# each non-empty token under a per-field context so they appear in
# the xx base set and get AI-translated like every other UI string.
formatLabels = extra.get("frontend_format_labels")
if isinstance(formatLabels, list):
fmtCtx = f"table.{className}.{fieldName}.format"
for token in formatLabels:
if isinstance(token, str) and token.strip():
t(token, fmtCtx, "")
# Pydantic v2 computed fields (@computed_field) — same handling as
# regular model_fields so labels and frontend_format_labels are
# registered for i18n and appear in MODEL_LABELS.
computedFields = getattr(cls, "model_computed_fields", {}) or {}
for fieldName, computedInfo in computedFields.items():
extra = getattr(computedInfo, "json_schema_extra", None)
if callable(extra) or not isinstance(extra, dict):
attributes.setdefault(fieldName, fieldName)
continue
label = extra.get("label")
if label:
desc = getattr(computedInfo, "description", "") or ""
t(label, f"table.{className}.{fieldName}", desc)
attributes[fieldName] = label
else:
attributes.setdefault(fieldName, fieldName)
formatLabels = extra.get("frontend_format_labels")
if isinstance(formatLabels, list):
fmtCtx = f"table.{className}.{fieldName}.format"
for token in formatLabels:
if isinstance(token, str) and token.strip():
t(token, fmtCtx, "")
MODEL_LABELS[className] = {
"model": modelLabel,
"attributes": attributes,
}
return cls
return _decorator
def _extractDocstringFirstLine(cls: type) -> str:
doc = cls.__doc__
if not doc:
return ""
return doc.strip().split("\n")[0].strip()
# ---------------------------------------------------------------------------
# Language setter (called by middleware)
# ---------------------------------------------------------------------------
def setLanguage(lang: str):
"""Set the language for the current request context."""
_CURRENT_LANGUAGE.set(lang)
def getCurrentLanguage() -> str:
"""Get the language for the current request context."""
return _CURRENT_LANGUAGE.get()
def normalizePrimaryLanguageTag(tag: str, fallback: str = "de") -> str:
"""Primary language subtag from ``Accept-Language`` or a single BCP47 tag.
Supports 2-letter (ISO 639-1) and 3-letter (ISO 639-2/3) primaries such as ``gsw``.
Strips region/variant: ``de-CH`` → ``de``, ``zh-Hans-CN`` → ``zh``.
"""
if not tag or not isinstance(tag, str):
return fallback
first = tag.split(",")[0].split(";")[0].strip()
if not first:
return fallback
primary = first.split("-")[0].split("_")[0].lower()
if primary.isalpha() and 2 <= len(primary) <= 8:
return primary
return fallback