From 3ea85fe57eaeed7a1657840b5e8f783992f2cf07 Mon Sep 17 00:00:00 2001
From: ValueOn AG
Date: Sun, 19 Apr 2026 00:36:45 +0200
Subject: [PATCH] centralized language catalog
---
modules/connectors/connectorVoiceGoogle.py | 115 +++++----------
modules/interfaces/interfaceVoiceObjects.py | 35 +----
modules/routes/routeVoiceGoogle.py | 36 ++---
modules/routes/routeVoiceUser.py | 11 +-
.../serviceAgent/coreTools/_mediaTools.py | 12 +-
modules/shared/voiceCatalog.py | 136 ++++++++++++++++++
6 files changed, 197 insertions(+), 148 deletions(-)
create mode 100644 modules/shared/voiceCatalog.py
diff --git a/modules/connectors/connectorVoiceGoogle.py b/modules/connectors/connectorVoiceGoogle.py
index 1dc0912b..f875c72c 100644
--- a/modules/connectors/connectorVoiceGoogle.py
+++ b/modules/connectors/connectorVoiceGoogle.py
@@ -15,6 +15,7 @@ from google.cloud import speech
from google.cloud import translate_v2 as translate
from google.cloud import texttospeech
from modules.shared.configuration import APP_CONFIG
+from modules.shared.voiceCatalog import getDefaultVoice as _catalogDefaultVoice
logger = logging.getLogger(__name__)
@@ -940,33 +941,26 @@ class ConnectorGoogleSpeech:
stripped = voiceName.strip()
return bool(stripped) and "-" not in stripped
- async def textToSpeech(self, text: str, languageCode: str = "de-DE", voiceName: str = None) -> Dict[str, Any]:
+ async def textToSpeech(self, text: str, languageCode: str = "de-DE", voiceName: Optional[str] = None) -> Dict[str, Any]:
"""
Convert text to speech using Google Cloud Text-to-Speech.
-
+
Args:
text: Text to convert to speech
- language_code: Language code (e.g., 'de-DE', 'en-US')
- voice_name: Specific voice name (optional)
-
+ languageCode: BCP-47 language code (e.g., 'de-DE', 'en-US', 'ru-RU')
+ voiceName: Specific voice name (optional). If omitted, a curated
+ default is used; if no curated default exists for the language,
+ Google selects a default voice automatically based on
+ languageCode + ssml_gender (no hard failure).
+
Returns:
Dict with success status and audio data
"""
try:
logger.info(f"Converting text to speech: '{text[:50]}...' in {languageCode}")
-
- # Build the voice request
+
selectedVoice = voiceName or self._getDefaultVoice(languageCode)
-
- if not selectedVoice:
- return {
- "success": False,
- "error": f"No voice specified for language {languageCode}. Please select a voice."
- }
-
- logger.info(f"Using TTS voice: {selectedVoice} for language: {languageCode}")
-
- isGeminiVoice = self._isGeminiTtsSpeakerVoiceName(selectedVoice)
+ isGeminiVoice = self._isGeminiTtsSpeakerVoiceName(selectedVoice) if selectedVoice else False
if isGeminiVoice:
synthesisInput = texttospeech.SynthesisInput(
@@ -981,11 +975,23 @@ class ConnectorGoogleSpeech:
)
else:
synthesisInput = texttospeech.SynthesisInput(text=text)
- voice = texttospeech.VoiceSelectionParams(
- language_code=languageCode,
- name=selectedVoice,
- ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL,
- )
+ voiceKwargs: Dict[str, Any] = {
+ "language_code": languageCode,
+ "ssml_gender": texttospeech.SsmlVoiceGender.NEUTRAL,
+ }
+ if selectedVoice:
+ voiceKwargs["name"] = selectedVoice
+ else:
+ logger.info(
+ f"TTS: no curated voice for '{languageCode}', "
+ f"letting Google auto-select by language + gender"
+ )
+ voice = texttospeech.VoiceSelectionParams(**voiceKwargs)
+
+ logger.info(
+ f"Using TTS voice: {selectedVoice or ''} "
+ f"for language: {languageCode}"
+ )
audioConfig = texttospeech.AudioConfig(
audio_encoding=texttospeech.AudioEncoding.MP3
@@ -994,16 +1000,15 @@ class ConnectorGoogleSpeech:
response = self.tts_client.synthesize_speech(
input=synthesisInput,
voice=voice,
- audio_config=audioConfig
+ audio_config=audioConfig,
)
- # Return the audio content
return {
"success": True,
"audio_content": response.audio_content,
"audio_format": "mp3",
"language_code": languageCode,
- "voice_name": voice.name
+ "voice_name": selectedVoice or "",
}
except Exception as e:
@@ -1018,59 +1023,15 @@ class ConnectorGoogleSpeech:
"error": f"Text-to-Speech failed: {detail}{extra}",
}
- def _getDefaultVoice(self, languageCode: str) -> str:
+ def _getDefaultVoice(self, languageCode: str) -> Optional[str]:
+ """Return the curated default Google TTS voice for `languageCode`.
+
+ Delegates to the central voice catalog; returns None when no curated
+ voice exists, in which case the caller omits `name` and Google
+ auto-selects based on languageCode + ssml_gender.
"""
- Get default voice name for a language code.
- Falls back to a Wavenet voice for common languages.
- """
- _defaults = {
- "de-DE": "de-DE-Wavenet-A",
- "de-CH": "de-DE-Wavenet-A",
- "en-US": "en-US-Wavenet-C",
- "en-GB": "en-GB-Wavenet-A",
- "fr-FR": "fr-FR-Wavenet-A",
- "it-IT": "it-IT-Wavenet-A",
- }
- return _defaults.get(languageCode)
-
- async def getAvailableLanguages(self) -> Dict[str, Any]:
- """
- Get available languages from Google Cloud Text-to-Speech.
-
- Returns:
- Dict containing success status and list of available languages
- """
- try:
- logger.info("🌐 Getting available languages from Google Cloud TTS")
-
- # List voices from Google Cloud TTS
- response = self.tts_client.list_voices()
-
- # Extract unique language codes
- # Note: Google TTS API doesn't provide language descriptions, only codes
- language_codes = set()
- for voice in response.voices:
- if voice.language_codes:
- language_codes.update(voice.language_codes)
-
- # Convert to sorted list of language codes
- available_languages = sorted(list(language_codes))
-
- logger.info(f"✅ Found {len(available_languages)} available languages")
-
- return {
- "success": True,
- "languages": available_languages
- }
-
- except Exception as e:
- logger.error(f"❌ Failed to get available languages: {e}")
- return {
- "success": False,
- "error": str(e),
- "languages": []
- }
-
+ return _catalogDefaultVoice(languageCode)
+
async def getAvailableVoices(self, languageCode: Optional[str] = None) -> Dict[str, Any]:
"""
Get available voices from Google Cloud Text-to-Speech.
diff --git a/modules/interfaces/interfaceVoiceObjects.py b/modules/interfaces/interfaceVoiceObjects.py
index 69962259..d0b6f461 100644
--- a/modules/interfaces/interfaceVoiceObjects.py
+++ b/modules/interfaces/interfaceVoiceObjects.py
@@ -338,36 +338,11 @@ class VoiceObjects:
"error": str(e)
}
- # Language and Voice Information
-
- async def getAvailableLanguages(self) -> Dict[str, Any]:
- """
- Get available languages from Google Cloud Text-to-Speech.
-
- Returns:
- Dict containing success status and list of available languages
- """
- try:
- logger.info("🌐 Getting available languages from Google Cloud TTS")
-
- connector = self._getGoogleSpeechConnector()
- result = await connector.getAvailableLanguages()
-
- if result["success"]:
- logger.info(f"✅ Found {len(result['languages'])} available languages")
- else:
- logger.warning(f"⚠️ Failed to get languages: {result.get('error', 'Unknown error')}")
-
- return result
-
- except Exception as e:
- logger.error(f"❌ Error getting available languages: {e}")
- return {
- "success": False,
- "error": str(e),
- "languages": []
- }
-
+ # Voice Information
+ # Note: Available languages live in the central voice catalog
+ # (modules.shared.voiceCatalog); voice picks per language stay live from
+ # Google so users can see all available speakers per locale.
+
async def getAvailableVoices(self, languageCode: Optional[str] = None) -> Dict[str, Any]:
"""
Get available voices from Google Cloud Text-to-Speech.
diff --git a/modules/routes/routeVoiceGoogle.py b/modules/routes/routeVoiceGoogle.py
index dfa1a15e..8987e73f 100644
--- a/modules/routes/routeVoiceGoogle.py
+++ b/modules/routes/routeVoiceGoogle.py
@@ -17,6 +17,7 @@ from typing import Optional, Dict, Any, List
from modules.auth import getCurrentUser, getRequestContext, RequestContext, limiter
from modules.datamodels.datamodelUam import User
from modules.interfaces.interfaceVoiceObjects import getVoiceInterface, VoiceObjects
+from modules.shared.voiceCatalog import getCatalogPayload
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/voice-google", tags=["Voice Google"])
@@ -61,32 +62,15 @@ def _getVoiceInterface(currentUser: User) -> VoiceObjects:
@router.get("/languages")
async def get_available_languages(currentUser: User = Depends(getCurrentUser)):
- """Get available languages from Google Cloud Text-to-Speech."""
- try:
- logger.info("🌐 Getting available languages from Google Cloud TTS")
-
- voiceInterface = _getVoiceInterface(currentUser)
- result = await voiceInterface.getAvailableLanguages()
-
- if result["success"]:
- return {
- "success": True,
- "languages": result["languages"]
- }
- else:
- raise HTTPException(
- status_code=400,
- detail=f"Failed to get languages: {result.get('error', 'Unknown error')}"
- )
-
- except HTTPException:
- raise
- except Exception as e:
- logger.error(f"❌ Get languages error: {e}")
- raise HTTPException(
- status_code=500,
- detail=f"Failed to get available languages: {str(e)}"
- )
+ """Return the curated voice/language catalog (single source of truth).
+
+ Each entry: {bcp47, iso, label, flag, defaultVoice}. Same payload as
+ /api/voice/languages — both endpoints back the same catalog.
+ """
+ return {
+ "success": True,
+ "languages": getCatalogPayload(),
+ }
@router.get("/voices")
async def get_available_voices(
diff --git a/modules/routes/routeVoiceUser.py b/modules/routes/routeVoiceUser.py
index a3c3fda7..4edbdf0d 100644
--- a/modules/routes/routeVoiceUser.py
+++ b/modules/routes/routeVoiceUser.py
@@ -18,6 +18,7 @@ from modules.datamodels.datamodelUam import User, UserVoicePreferences, _normali
from modules.interfaces.interfaceDbApp import getRootInterface
from modules.interfaces.interfaceVoiceObjects import getVoiceInterface
from modules.shared.i18nRegistry import apiRouteContext
+from modules.shared.voiceCatalog import getCatalogPayload
routeApiMsg = apiRouteContext("routeVoiceUser")
logger = logging.getLogger(__name__)
@@ -101,11 +102,11 @@ async def getVoiceLanguages(
request: Request,
currentUser: User = Depends(getCurrentUser),
) -> Dict[str, Any]:
- """Return available TTS languages (user-level, no instance context needed)."""
- voiceInterface = getVoiceInterface(currentUser)
- languagesResult = await voiceInterface.getAvailableLanguages()
- languageList = languagesResult.get("languages", []) if isinstance(languagesResult, dict) else languagesResult
- return {"languages": languageList}
+ """Return the curated voice/language catalog (single source of truth).
+
+ Each entry: {bcp47, iso, label, flag, defaultVoice}.
+ """
+ return {"languages": getCatalogPayload()}
@router.get("/voices")
diff --git a/modules/serviceCenter/services/serviceAgent/coreTools/_mediaTools.py b/modules/serviceCenter/services/serviceAgent/coreTools/_mediaTools.py
index da6e616c..83f6e990 100644
--- a/modules/serviceCenter/services/serviceAgent/coreTools/_mediaTools.py
+++ b/modules/serviceCenter/services/serviceAgent/coreTools/_mediaTools.py
@@ -395,25 +395,17 @@ def _registerMediaTools(registry: ToolRegistry, services):
try:
from modules.interfaces.interfaceVoiceObjects import getVoiceInterface
+ from modules.shared.voiceCatalog import isoToBcp47
mandateId = context.get("mandateId", "")
voiceInterface = getVoiceInterface(currentUser=None, mandateId=mandateId)
- _ISO_TO_BCP47 = {
- "de": "de-DE", "en": "en-US", "fr": "fr-FR", "it": "it-IT",
- "es": "es-ES", "pt": "pt-BR", "nl": "nl-NL", "pl": "pl-PL",
- "ru": "ru-RU", "ja": "ja-JP", "zh": "zh-CN", "ko": "ko-KR",
- "ar": "ar-XA", "hi": "hi-IN", "tr": "tr-TR", "sv": "sv-SE",
- }
-
if language == "auto":
try:
snippet = cleanText[:500]
detectResult = await voiceInterface.detectLanguage(snippet)
if detectResult and detectResult.get("success"):
detected = detectResult.get("language", "de")
- language = _ISO_TO_BCP47.get(detected, detected)
- if "-" not in language:
- language = _ISO_TO_BCP47.get(language, f"{language}-{language.upper()}")
+ language = isoToBcp47(detected) or "de-DE"
logger.info(f"textToSpeech: auto-detected language '{detected}' -> '{language}'")
else:
language = "de-DE"
diff --git a/modules/shared/voiceCatalog.py b/modules/shared/voiceCatalog.py
new file mode 100644
index 00000000..2e98902e
--- /dev/null
+++ b/modules/shared/voiceCatalog.py
@@ -0,0 +1,136 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+"""
+Voice / Language Catalog — Single Source of Truth.
+
+Every voice-related component (TTS connector, AI tools, REST routes, frontend
+language pickers) consumes this catalog. Hard-coded language lists or ad-hoc
+ISO→BCP-47 maps elsewhere are forbidden — extend the catalog instead.
+
+Schema per entry:
+ bcp47 BCP-47 locale code, e.g. "de-DE", "ru-RU"
+ iso ISO-639-1 short code, e.g. "de", "ru"
+ label Native display label ("Deutsch", "Русский")
+ flag Emoji flag (or empty string for region-neutral codes)
+ defaultVoice Curated Google TTS voice name; None means "let Google
+ pick automatically based on bcp47 + ssml_gender".
+"""
+
+from __future__ import annotations
+
+from dataclasses import asdict, dataclass
+from typing import Dict, List, Optional
+
+
+@dataclass(frozen=True)
+class VoiceLanguage:
+ bcp47: str
+ iso: str
+ label: str
+ flag: str
+ defaultVoice: Optional[str]
+
+
+# Order matters for UI: most common first, then alphabetical groups.
+VOICE_LANGUAGES: List[VoiceLanguage] = [
+ VoiceLanguage("de-DE", "de", "Deutsch", "🇩🇪", "de-DE-Wavenet-A"),
+ VoiceLanguage("de-CH", "de", "Deutsch (Schweiz)", "🇨🇭", "de-DE-Wavenet-A"),
+ VoiceLanguage("de-AT", "de", "Deutsch (Österreich)", "🇦🇹", "de-DE-Wavenet-A"),
+ VoiceLanguage("en-US", "en", "English (US)", "🇺🇸", "en-US-Wavenet-C"),
+ VoiceLanguage("en-GB", "en", "English (UK)", "🇬🇧", "en-GB-Wavenet-A"),
+ VoiceLanguage("en-AU", "en", "English (Australia)", "🇦🇺", "en-AU-Wavenet-A"),
+ VoiceLanguage("fr-FR", "fr", "Français", "🇫🇷", "fr-FR-Wavenet-A"),
+ VoiceLanguage("fr-CA", "fr", "Français (Canada)", "🇨🇦", "fr-CA-Wavenet-A"),
+ VoiceLanguage("it-IT", "it", "Italiano", "🇮🇹", "it-IT-Wavenet-A"),
+ VoiceLanguage("es-ES", "es", "Español", "🇪🇸", "es-ES-Wavenet-B"),
+ VoiceLanguage("es-US", "es", "Español (US)", "🇺🇸", "es-US-Wavenet-A"),
+ VoiceLanguage("pt-BR", "pt", "Português (Brasil)", "🇧🇷", "pt-BR-Wavenet-A"),
+ VoiceLanguage("pt-PT", "pt", "Português (Portugal)", "🇵🇹", "pt-PT-Wavenet-A"),
+ VoiceLanguage("nl-NL", "nl", "Nederlands", "🇳🇱", "nl-NL-Wavenet-A"),
+ VoiceLanguage("pl-PL", "pl", "Polski", "🇵🇱", "pl-PL-Wavenet-A"),
+ VoiceLanguage("ru-RU", "ru", "Русский", "🇷🇺", "ru-RU-Wavenet-A"),
+ VoiceLanguage("uk-UA", "uk", "Українська", "🇺🇦", "uk-UA-Wavenet-A"),
+ VoiceLanguage("cs-CZ", "cs", "Čeština", "🇨🇿", "cs-CZ-Wavenet-A"),
+ VoiceLanguage("sk-SK", "sk", "Slovenčina", "🇸🇰", "sk-SK-Wavenet-A"),
+ VoiceLanguage("hu-HU", "hu", "Magyar", "🇭🇺", "hu-HU-Wavenet-A"),
+ VoiceLanguage("ro-RO", "ro", "Română", "🇷🇴", "ro-RO-Wavenet-A"),
+ VoiceLanguage("el-GR", "el", "Ελληνικά", "🇬🇷", "el-GR-Wavenet-A"),
+ VoiceLanguage("sv-SE", "sv", "Svenska", "🇸🇪", "sv-SE-Wavenet-A"),
+ VoiceLanguage("da-DK", "da", "Dansk", "🇩🇰", "da-DK-Wavenet-A"),
+ VoiceLanguage("nb-NO", "nb", "Norsk", "🇳🇴", "nb-NO-Wavenet-A"),
+ VoiceLanguage("fi-FI", "fi", "Suomi", "🇫🇮", "fi-FI-Wavenet-A"),
+ VoiceLanguage("tr-TR", "tr", "Türkçe", "🇹🇷", "tr-TR-Wavenet-A"),
+ VoiceLanguage("ar-XA", "ar", "العربية", "", "ar-XA-Wavenet-A"),
+ VoiceLanguage("hi-IN", "hi", "हिन्दी", "🇮🇳", "hi-IN-Wavenet-A"),
+ VoiceLanguage("ja-JP", "ja", "日本語", "🇯🇵", "ja-JP-Wavenet-A"),
+ VoiceLanguage("ko-KR", "ko", "한국어", "🇰🇷", "ko-KR-Wavenet-A"),
+ VoiceLanguage("zh-CN", "zh", "中文 (简体)", "🇨🇳", "cmn-CN-Wavenet-A"),
+ VoiceLanguage("vi-VN", "vi", "Tiếng Việt", "🇻🇳", "vi-VN-Wavenet-A"),
+ VoiceLanguage("th-TH", "th", "ไทย", "🇹🇭", "th-TH-Standard-A"),
+ VoiceLanguage("id-ID", "id", "Bahasa Indonesia", "🇮🇩", "id-ID-Wavenet-A"),
+]
+
+
+# ---------------------------------------------------------------------------
+# Lookup indexes (built once at import).
+# ---------------------------------------------------------------------------
+
+_BY_BCP47: Dict[str, VoiceLanguage] = {v.bcp47.lower(): v for v in VOICE_LANGUAGES}
+_BY_ISO: Dict[str, VoiceLanguage] = {}
+for _v in VOICE_LANGUAGES:
+ _BY_ISO.setdefault(_v.iso.lower(), _v)
+
+
+def listVoiceLanguages() -> List[VoiceLanguage]:
+ """Return the canonical, ordered list of supported voice languages."""
+ return list(VOICE_LANGUAGES)
+
+
+def getCatalogPayload() -> List[Dict[str, Optional[str]]]:
+ """Return the catalog as plain dicts — ready for JSON serialization."""
+ return [asdict(v) for v in VOICE_LANGUAGES]
+
+
+def getByBcp47(code: Optional[str]) -> Optional[VoiceLanguage]:
+ if not code:
+ return None
+ return _BY_BCP47.get(code.strip().lower())
+
+
+def getByIso(code: Optional[str]) -> Optional[VoiceLanguage]:
+ if not code:
+ return None
+ return _BY_ISO.get(code.strip().lower())
+
+
+def getDefaultVoice(bcp47: Optional[str]) -> Optional[str]:
+ """Return the curated default Google TTS voice for a BCP-47 code, else None.
+
+ None means: caller must omit `name` in VoiceSelectionParams so Google
+ auto-selects a voice for the language code.
+ """
+ entry = getByBcp47(bcp47)
+ return entry.defaultVoice if entry else None
+
+
+def isoToBcp47(iso: Optional[str]) -> Optional[str]:
+ """Map an ISO-639-1 short code to the canonical BCP-47 locale.
+
+ Already-qualified BCP-47 inputs are passed through unchanged (canonicalised
+ to the catalog form when known). Unknown ISO codes fall back to
+ ``-`` (e.g. "fa" → "fa-FA") so callers always get a parseable
+ locale, but unknown codes carry no curated voice.
+ """
+ if not iso:
+ return None
+ normalized = iso.strip()
+ if not normalized:
+ return None
+ if "-" in normalized:
+ canonical = getByBcp47(normalized)
+ return canonical.bcp47 if canonical else normalized
+ isoLower = normalized.lower()
+ entry = _BY_ISO.get(isoLower)
+ if entry:
+ return entry.bcp47
+ return f"{isoLower}-{isoLower.upper()}"