gateway/modules/shared/voiceCatalog.py
2026-04-19 00:36:45 +02:00

136 lines
6.5 KiB
Python

# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Voice / Language Catalog — Single Source of Truth.
Every voice-related component (TTS connector, AI tools, REST routes, frontend
language pickers) consumes this catalog. Hard-coded language lists or ad-hoc
ISO→BCP-47 maps elsewhere are forbidden — extend the catalog instead.
Schema per entry:
bcp47 BCP-47 locale code, e.g. "de-DE", "ru-RU"
iso ISO-639-1 short code, e.g. "de", "ru"
label Native display label ("Deutsch", "Русский")
flag Emoji flag (or empty string for region-neutral codes)
defaultVoice Curated Google TTS voice name; None means "let Google
pick automatically based on bcp47 + ssml_gender".
"""
from __future__ import annotations
from dataclasses import asdict, dataclass
from typing import Dict, List, Optional
@dataclass(frozen=True)
class VoiceLanguage:
bcp47: str
iso: str
label: str
flag: str
defaultVoice: Optional[str]
# Order matters for UI: most common first, then alphabetical groups.
VOICE_LANGUAGES: List[VoiceLanguage] = [
VoiceLanguage("de-DE", "de", "Deutsch", "🇩🇪", "de-DE-Wavenet-A"),
VoiceLanguage("de-CH", "de", "Deutsch (Schweiz)", "🇨🇭", "de-DE-Wavenet-A"),
VoiceLanguage("de-AT", "de", "Deutsch (Österreich)", "🇦🇹", "de-DE-Wavenet-A"),
VoiceLanguage("en-US", "en", "English (US)", "🇺🇸", "en-US-Wavenet-C"),
VoiceLanguage("en-GB", "en", "English (UK)", "🇬🇧", "en-GB-Wavenet-A"),
VoiceLanguage("en-AU", "en", "English (Australia)", "🇦🇺", "en-AU-Wavenet-A"),
VoiceLanguage("fr-FR", "fr", "Français", "🇫🇷", "fr-FR-Wavenet-A"),
VoiceLanguage("fr-CA", "fr", "Français (Canada)", "🇨🇦", "fr-CA-Wavenet-A"),
VoiceLanguage("it-IT", "it", "Italiano", "🇮🇹", "it-IT-Wavenet-A"),
VoiceLanguage("es-ES", "es", "Español", "🇪🇸", "es-ES-Wavenet-B"),
VoiceLanguage("es-US", "es", "Español (US)", "🇺🇸", "es-US-Wavenet-A"),
VoiceLanguage("pt-BR", "pt", "Português (Brasil)", "🇧🇷", "pt-BR-Wavenet-A"),
VoiceLanguage("pt-PT", "pt", "Português (Portugal)", "🇵🇹", "pt-PT-Wavenet-A"),
VoiceLanguage("nl-NL", "nl", "Nederlands", "🇳🇱", "nl-NL-Wavenet-A"),
VoiceLanguage("pl-PL", "pl", "Polski", "🇵🇱", "pl-PL-Wavenet-A"),
VoiceLanguage("ru-RU", "ru", "Русский", "🇷🇺", "ru-RU-Wavenet-A"),
VoiceLanguage("uk-UA", "uk", "Українська", "🇺🇦", "uk-UA-Wavenet-A"),
VoiceLanguage("cs-CZ", "cs", "Čeština", "🇨🇿", "cs-CZ-Wavenet-A"),
VoiceLanguage("sk-SK", "sk", "Slovenčina", "🇸🇰", "sk-SK-Wavenet-A"),
VoiceLanguage("hu-HU", "hu", "Magyar", "🇭🇺", "hu-HU-Wavenet-A"),
VoiceLanguage("ro-RO", "ro", "Română", "🇷🇴", "ro-RO-Wavenet-A"),
VoiceLanguage("el-GR", "el", "Ελληνικά", "🇬🇷", "el-GR-Wavenet-A"),
VoiceLanguage("sv-SE", "sv", "Svenska", "🇸🇪", "sv-SE-Wavenet-A"),
VoiceLanguage("da-DK", "da", "Dansk", "🇩🇰", "da-DK-Wavenet-A"),
VoiceLanguage("nb-NO", "nb", "Norsk", "🇳🇴", "nb-NO-Wavenet-A"),
VoiceLanguage("fi-FI", "fi", "Suomi", "🇫🇮", "fi-FI-Wavenet-A"),
VoiceLanguage("tr-TR", "tr", "Türkçe", "🇹🇷", "tr-TR-Wavenet-A"),
VoiceLanguage("ar-XA", "ar", "العربية", "", "ar-XA-Wavenet-A"),
VoiceLanguage("hi-IN", "hi", "हिन्दी", "🇮🇳", "hi-IN-Wavenet-A"),
VoiceLanguage("ja-JP", "ja", "日本語", "🇯🇵", "ja-JP-Wavenet-A"),
VoiceLanguage("ko-KR", "ko", "한국어", "🇰🇷", "ko-KR-Wavenet-A"),
VoiceLanguage("zh-CN", "zh", "中文 (简体)", "🇨🇳", "cmn-CN-Wavenet-A"),
VoiceLanguage("vi-VN", "vi", "Tiếng Việt", "🇻🇳", "vi-VN-Wavenet-A"),
VoiceLanguage("th-TH", "th", "ไทย", "🇹🇭", "th-TH-Standard-A"),
VoiceLanguage("id-ID", "id", "Bahasa Indonesia", "🇮🇩", "id-ID-Wavenet-A"),
]
# ---------------------------------------------------------------------------
# Lookup indexes (built once at import).
# ---------------------------------------------------------------------------
_BY_BCP47: Dict[str, VoiceLanguage] = {v.bcp47.lower(): v for v in VOICE_LANGUAGES}
_BY_ISO: Dict[str, VoiceLanguage] = {}
for _v in VOICE_LANGUAGES:
_BY_ISO.setdefault(_v.iso.lower(), _v)
def listVoiceLanguages() -> List[VoiceLanguage]:
"""Return the canonical, ordered list of supported voice languages."""
return list(VOICE_LANGUAGES)
def getCatalogPayload() -> List[Dict[str, Optional[str]]]:
"""Return the catalog as plain dicts — ready for JSON serialization."""
return [asdict(v) for v in VOICE_LANGUAGES]
def getByBcp47(code: Optional[str]) -> Optional[VoiceLanguage]:
if not code:
return None
return _BY_BCP47.get(code.strip().lower())
def getByIso(code: Optional[str]) -> Optional[VoiceLanguage]:
if not code:
return None
return _BY_ISO.get(code.strip().lower())
def getDefaultVoice(bcp47: Optional[str]) -> Optional[str]:
"""Return the curated default Google TTS voice for a BCP-47 code, else None.
None means: caller must omit `name` in VoiceSelectionParams so Google
auto-selects a voice for the language code.
"""
entry = getByBcp47(bcp47)
return entry.defaultVoice if entry else None
def isoToBcp47(iso: Optional[str]) -> Optional[str]:
"""Map an ISO-639-1 short code to the canonical BCP-47 locale.
Already-qualified BCP-47 inputs are passed through unchanged (canonicalised
to the catalog form when known). Unknown ISO codes fall back to
``<iso>-<ISO>`` (e.g. "fa""fa-FA") so callers always get a parseable
locale, but unknown codes carry no curated voice.
"""
if not iso:
return None
normalized = iso.strip()
if not normalized:
return None
if "-" in normalized:
canonical = getByBcp47(normalized)
return canonical.bcp47 if canonical else normalized
isoLower = normalized.lower()
entry = _BY_ISO.get(isoLower)
if entry:
return entry.bcp47
return f"{isoLower}-{isoLower.upper()}"