341 lines
13 KiB
Python
341 lines
13 KiB
Python
# Copyright (c) 2026 PowerOn AG
|
|
# All rights reserved.
|
|
"""
|
|
Gateway i18n registry: t(), @i18nModel, runtime translation cache.
|
|
|
|
All UI-visible texts in the gateway (HTTPException details, model labels,
|
|
API messages) are tagged with t() and registered at import time.
|
|
At runtime, t() returns the cached translation for the current request language.
|
|
|
|
Boot-time DB sync and label discovery live in i18nBootSync.py (called by app.py).
|
|
This module has ZERO dependencies on other platform-core modules outside shared/.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
import re
|
|
from contextvars import ContextVar
|
|
from dataclasses import dataclass, field as dataclass_field
|
|
from typing import Any, Dict, List, Optional, Tuple, Type
|
|
|
|
from pydantic import BaseModel
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
# Matches {placeholderName} tokens used by t(...) param substitution in the
|
|
# frontend (LanguageContext._applyParams) and the gateway. Allows ASCII
|
|
# identifiers and digits, no spaces.
|
|
_PLACEHOLDER_PATTERN = re.compile(r"\{[A-Za-z_][A-Za-z0-9_]*\}")
|
|
|
|
|
|
def _enforceSourcePlaceholders(sourceKey: str, translatedValue: str) -> Tuple[str, bool]:
|
|
"""Repair a translated value so its placeholder tokens match the source key.
|
|
|
|
Background: AI translators occasionally translate the *names* of
|
|
placeholders even when instructed not to (e.g. ``{konten}`` -> ``{accounts}``).
|
|
The frontend then cannot substitute params and the user sees raw
|
|
``{accounts}`` in the UI.
|
|
|
|
Strategy (positional, conservative):
|
|
- if the source has no placeholders -> nothing to do
|
|
- if source and translation have the same set of tokens -> nothing to do
|
|
- if both have the *same number* of tokens but different names -> swap
|
|
each translation token with the source token at the same position
|
|
- if counts differ -> leave the translation untouched (too risky to
|
|
guess; surfaced as a logger.warning by the caller if desired)
|
|
|
|
Returns ``(repairedValue, wasChanged)``.
|
|
"""
|
|
if not sourceKey or not translatedValue:
|
|
return translatedValue, False
|
|
sourceTokens = _PLACEHOLDER_PATTERN.findall(sourceKey)
|
|
if not sourceTokens:
|
|
return translatedValue, False
|
|
valueTokens = _PLACEHOLDER_PATTERN.findall(translatedValue)
|
|
if not valueTokens:
|
|
return translatedValue, False
|
|
if sourceTokens == valueTokens:
|
|
return translatedValue, False
|
|
if len(sourceTokens) != len(valueTokens):
|
|
return translatedValue, False
|
|
parts = _PLACEHOLDER_PATTERN.split(translatedValue)
|
|
rebuilt = parts[0]
|
|
for idx, srcTok in enumerate(sourceTokens):
|
|
rebuilt += srcTok + parts[idx + 1]
|
|
return rebuilt, True
|
|
|
|
|
|
def _extractRegistrySourceText(obj: Any) -> str:
|
|
"""Resolve a str or multilingual dict to one canonical registry key string."""
|
|
if isinstance(obj, str):
|
|
return obj
|
|
if isinstance(obj, dict):
|
|
return obj.get("xx") or next(iter(obj.values()), "") or ""
|
|
return ""
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Registry (populated at import time by t() and @i18nModel)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
@dataclass
|
|
class _I18nRegistryEntry:
|
|
context: str
|
|
value: str
|
|
|
|
|
|
_REGISTRY: Dict[str, _I18nRegistryEntry] = {}
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Translation cache (populated at boot by loadCache)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
_CACHE: Dict[str, Dict[str, str]] = {}
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Per-request language (set by middleware)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
_CURRENT_LANGUAGE: ContextVar[str] = ContextVar("i18n_lang", default="de")
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Model labels (backwards-compatible with getModelLabels / getModelLabel)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
MODEL_LABELS: Dict[str, Dict[str, Any]] = {}
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# t() -- tag and translate
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def t(key: str, context: str = "api", value: str = "") -> str:
|
|
"""Tag a UI-visible text for i18n and return the translation.
|
|
|
|
At import time: registers the key with context and AI description.
|
|
At runtime: returns the cached translation for _CURRENT_LANGUAGE.
|
|
Falls back to [key] so missing translations are visible in the UI.
|
|
"""
|
|
if key not in _REGISTRY:
|
|
_REGISTRY[key] = _I18nRegistryEntry(context=context, value=value)
|
|
lang = _CURRENT_LANGUAGE.get()
|
|
if lang == "de":
|
|
return key
|
|
return _CACHE.get(lang, {}).get(key, f"[{key}]")
|
|
|
|
|
|
def resolveJobMessage(messageData: Optional[Dict[str, Any]], lang: Optional[str] = None) -> Optional[str]:
|
|
"""Translate a structured BackgroundJob progress payload.
|
|
|
|
``messageData`` shape (written by ``JobProgressCallback`` when callers
|
|
pass ``messageKey`` / ``messageParams``)::
|
|
|
|
{"key": "{n} Dateien verarbeitet, {indexed} indexiert",
|
|
"params": {"n": 145, "indexed": 106}}
|
|
|
|
The walker call sites use a string-literal ``messageKey=``; the matching
|
|
``t("…")`` literal lives in the feature's progress-key registration
|
|
module (e.g. ``serviceKnowledge/_progressMessages.py``,
|
|
``features/trustee/mainTrustee.py``) so the boot sync picks it up.
|
|
|
|
This helper is the **server-side** translation hop so route handlers can
|
|
deliver a fully rendered ``progressMessage`` string to the frontend --
|
|
the frontend never calls ``t()`` on backend-supplied keys.
|
|
"""
|
|
if not messageData or not isinstance(messageData, dict):
|
|
return None
|
|
key = messageData.get("key")
|
|
if not isinstance(key, str) or not key:
|
|
return None
|
|
params = messageData.get("params") or {}
|
|
|
|
if lang is not None:
|
|
token = _CURRENT_LANGUAGE.set(lang)
|
|
try:
|
|
template = t(key)
|
|
finally:
|
|
_CURRENT_LANGUAGE.reset(token)
|
|
else:
|
|
template = t(key)
|
|
|
|
if isinstance(params, dict) and params:
|
|
try:
|
|
return template.format(**params)
|
|
except (KeyError, IndexError, ValueError):
|
|
return template
|
|
return template
|
|
|
|
|
|
def resolveText(value: Any, lang: Optional[str] = None) -> str:
|
|
"""Resolve any value to a translated string for the current request language.
|
|
|
|
Accepts str, dict, TextMultilingual, or None.
|
|
- str: translate via t() (treats as i18n key / German plaintext key)
|
|
- dict: multilingual user content — pick ``lang`` (or current context), then ``xx``, then first value
|
|
- object with model_dump(): convert to dict first (TextMultilingual)
|
|
- None/empty: return ""
|
|
|
|
If ``lang`` is given, it temporarily overrides the context language for this call
|
|
(used by schedulers that have an explicit user language).
|
|
|
|
Missing i18n translations for string keys use t()'s ``[key]`` fallback.
|
|
"""
|
|
if lang is not None:
|
|
token = _CURRENT_LANGUAGE.set(lang)
|
|
try:
|
|
return _resolveTextImpl(value)
|
|
finally:
|
|
_CURRENT_LANGUAGE.reset(token)
|
|
return _resolveTextImpl(value)
|
|
|
|
|
|
def _resolveTextImpl(value: Any) -> str:
|
|
if value is None:
|
|
return ""
|
|
if isinstance(value, str):
|
|
if not value.strip():
|
|
return ""
|
|
return t(value)
|
|
if hasattr(value, "model_dump"):
|
|
value = value.model_dump()
|
|
if isinstance(value, dict):
|
|
if not value:
|
|
return ""
|
|
lang = _CURRENT_LANGUAGE.get()
|
|
text = value.get(lang) or value.get("xx")
|
|
if text:
|
|
return str(text)
|
|
first = next((v for v in value.values() if v), None)
|
|
return str(first) if first else ""
|
|
return str(value)
|
|
|
|
|
|
def apiRouteContext(routeModuleName: str):
|
|
"""Return a callable that registers + translates HTTPException details.
|
|
|
|
The key is registered eagerly in ``_REGISTRY`` the moment ``_apiMsg(key)``
|
|
is evaluated (module-level ``detail=routeApiMsg("…")`` runs at import time).
|
|
At runtime ``t()`` returns the cached translation for the current language.
|
|
"""
|
|
_ctx = f"api.{routeModuleName}"
|
|
|
|
def _apiMsg(key: str, value: str = "") -> str:
|
|
if key not in _REGISTRY:
|
|
_REGISTRY[key] = _I18nRegistryEntry(context=_ctx, value=value)
|
|
return t(key, _ctx, value)
|
|
return _apiMsg
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# @i18nModel -- class decorator for Pydantic models
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def i18nModel(modelLabel: str, aiContext: str = ""):
|
|
"""Class decorator: registers model and field labels for i18n.
|
|
|
|
1. Registers t(modelLabel, "table.<ClassName>", aiContext or docstring)
|
|
2. For each Field with json_schema_extra["label"]:
|
|
Registers t(label, "table.<ClassName>.<fieldName>", field.description)
|
|
3. Populates MODEL_LABELS for getModelLabels()/getModelLabel() in attributeUtils
|
|
"""
|
|
def _decorator(cls: Type[BaseModel]) -> Type[BaseModel]:
|
|
className = cls.__name__
|
|
ctx = aiContext or _extractDocstringFirstLine(cls)
|
|
t(modelLabel, f"table.{className}", ctx)
|
|
|
|
attributes: Dict[str, str] = {}
|
|
for fieldName, fieldInfo in cls.model_fields.items():
|
|
extra = fieldInfo.json_schema_extra
|
|
if not isinstance(extra, dict):
|
|
continue
|
|
label = extra.get("label")
|
|
if label:
|
|
desc = fieldInfo.description or ""
|
|
t(label, f"table.{className}.{fieldName}", desc)
|
|
attributes[fieldName] = label
|
|
else:
|
|
attributes[fieldName] = fieldName
|
|
|
|
# Render-hint label tokens (frontend_format_labels) are user-visible
|
|
# strings that appear in tables/forms (e.g. boolean labels
|
|
# ["Ja","-","Nein"], unit suffixes ["KB","MB","GB",...]). Register
|
|
# each non-empty token under a per-field context so they appear in
|
|
# the xx base set and get AI-translated like every other UI string.
|
|
formatLabels = extra.get("frontend_format_labels")
|
|
if isinstance(formatLabels, list):
|
|
fmtCtx = f"table.{className}.{fieldName}.format"
|
|
for token in formatLabels:
|
|
if isinstance(token, str) and token.strip():
|
|
t(token, fmtCtx, "")
|
|
|
|
# Pydantic v2 computed fields (@computed_field) — same handling as
|
|
# regular model_fields so labels and frontend_format_labels are
|
|
# registered for i18n and appear in MODEL_LABELS.
|
|
computedFields = getattr(cls, "model_computed_fields", {}) or {}
|
|
for fieldName, computedInfo in computedFields.items():
|
|
extra = getattr(computedInfo, "json_schema_extra", None)
|
|
if callable(extra) or not isinstance(extra, dict):
|
|
attributes.setdefault(fieldName, fieldName)
|
|
continue
|
|
label = extra.get("label")
|
|
if label:
|
|
desc = getattr(computedInfo, "description", "") or ""
|
|
t(label, f"table.{className}.{fieldName}", desc)
|
|
attributes[fieldName] = label
|
|
else:
|
|
attributes.setdefault(fieldName, fieldName)
|
|
|
|
formatLabels = extra.get("frontend_format_labels")
|
|
if isinstance(formatLabels, list):
|
|
fmtCtx = f"table.{className}.{fieldName}.format"
|
|
for token in formatLabels:
|
|
if isinstance(token, str) and token.strip():
|
|
t(token, fmtCtx, "")
|
|
|
|
MODEL_LABELS[className] = {
|
|
"model": modelLabel,
|
|
"attributes": attributes,
|
|
}
|
|
return cls
|
|
return _decorator
|
|
|
|
|
|
def _extractDocstringFirstLine(cls: type) -> str:
|
|
doc = cls.__doc__
|
|
if not doc:
|
|
return ""
|
|
return doc.strip().split("\n")[0].strip()
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Language setter (called by middleware)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def setLanguage(lang: str):
|
|
"""Set the language for the current request context."""
|
|
_CURRENT_LANGUAGE.set(lang)
|
|
|
|
|
|
def getCurrentLanguage() -> str:
|
|
"""Get the language for the current request context."""
|
|
return _CURRENT_LANGUAGE.get()
|
|
|
|
|
|
def normalizePrimaryLanguageTag(tag: str, fallback: str = "de") -> str:
|
|
"""Primary language subtag from ``Accept-Language`` or a single BCP47 tag.
|
|
|
|
Supports 2-letter (ISO 639-1) and 3-letter (ISO 639-2/3) primaries such as ``gsw``.
|
|
Strips region/variant: ``de-CH`` → ``de``, ``zh-Hans-CN`` → ``zh``.
|
|
"""
|
|
if not tag or not isinstance(tag, str):
|
|
return fallback
|
|
first = tag.split(",")[0].split(";")[0].strip()
|
|
if not first:
|
|
return fallback
|
|
primary = first.split("-")[0].split("_")[0].lower()
|
|
if primary.isalpha() and 2 <= len(primary) <= 8:
|
|
return primary
|
|
return fallback
|
|
|