centralized language catalog

This commit is contained in:
ValueOn AG 2026-04-19 00:36:45 +02:00
parent 24ff6058d5
commit 3ea85fe57e
6 changed files with 197 additions and 148 deletions

View file

@ -15,6 +15,7 @@ from google.cloud import speech
from google.cloud import translate_v2 as translate from google.cloud import translate_v2 as translate
from google.cloud import texttospeech from google.cloud import texttospeech
from modules.shared.configuration import APP_CONFIG from modules.shared.configuration import APP_CONFIG
from modules.shared.voiceCatalog import getDefaultVoice as _catalogDefaultVoice
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -940,14 +941,17 @@ class ConnectorGoogleSpeech:
stripped = voiceName.strip() stripped = voiceName.strip()
return bool(stripped) and "-" not in stripped return bool(stripped) and "-" not in stripped
async def textToSpeech(self, text: str, languageCode: str = "de-DE", voiceName: str = None) -> Dict[str, Any]: async def textToSpeech(self, text: str, languageCode: str = "de-DE", voiceName: Optional[str] = None) -> Dict[str, Any]:
""" """
Convert text to speech using Google Cloud Text-to-Speech. Convert text to speech using Google Cloud Text-to-Speech.
Args: Args:
text: Text to convert to speech text: Text to convert to speech
language_code: Language code (e.g., 'de-DE', 'en-US') languageCode: BCP-47 language code (e.g., 'de-DE', 'en-US', 'ru-RU')
voice_name: Specific voice name (optional) voiceName: Specific voice name (optional). If omitted, a curated
default is used; if no curated default exists for the language,
Google selects a default voice automatically based on
languageCode + ssml_gender (no hard failure).
Returns: Returns:
Dict with success status and audio data Dict with success status and audio data
@ -955,18 +959,8 @@ class ConnectorGoogleSpeech:
try: try:
logger.info(f"Converting text to speech: '{text[:50]}...' in {languageCode}") logger.info(f"Converting text to speech: '{text[:50]}...' in {languageCode}")
# Build the voice request
selectedVoice = voiceName or self._getDefaultVoice(languageCode) selectedVoice = voiceName or self._getDefaultVoice(languageCode)
isGeminiVoice = self._isGeminiTtsSpeakerVoiceName(selectedVoice) if selectedVoice else False
if not selectedVoice:
return {
"success": False,
"error": f"No voice specified for language {languageCode}. Please select a voice."
}
logger.info(f"Using TTS voice: {selectedVoice} for language: {languageCode}")
isGeminiVoice = self._isGeminiTtsSpeakerVoiceName(selectedVoice)
if isGeminiVoice: if isGeminiVoice:
synthesisInput = texttospeech.SynthesisInput( synthesisInput = texttospeech.SynthesisInput(
@ -981,11 +975,23 @@ class ConnectorGoogleSpeech:
) )
else: else:
synthesisInput = texttospeech.SynthesisInput(text=text) synthesisInput = texttospeech.SynthesisInput(text=text)
voice = texttospeech.VoiceSelectionParams( voiceKwargs: Dict[str, Any] = {
language_code=languageCode, "language_code": languageCode,
name=selectedVoice, "ssml_gender": texttospeech.SsmlVoiceGender.NEUTRAL,
ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL, }
) if selectedVoice:
voiceKwargs["name"] = selectedVoice
else:
logger.info(
f"TTS: no curated voice for '{languageCode}', "
f"letting Google auto-select by language + gender"
)
voice = texttospeech.VoiceSelectionParams(**voiceKwargs)
logger.info(
f"Using TTS voice: {selectedVoice or '<google-auto>'} "
f"for language: {languageCode}"
)
audioConfig = texttospeech.AudioConfig( audioConfig = texttospeech.AudioConfig(
audio_encoding=texttospeech.AudioEncoding.MP3 audio_encoding=texttospeech.AudioEncoding.MP3
@ -994,16 +1000,15 @@ class ConnectorGoogleSpeech:
response = self.tts_client.synthesize_speech( response = self.tts_client.synthesize_speech(
input=synthesisInput, input=synthesisInput,
voice=voice, voice=voice,
audio_config=audioConfig audio_config=audioConfig,
) )
# Return the audio content
return { return {
"success": True, "success": True,
"audio_content": response.audio_content, "audio_content": response.audio_content,
"audio_format": "mp3", "audio_format": "mp3",
"language_code": languageCode, "language_code": languageCode,
"voice_name": voice.name "voice_name": selectedVoice or "<google-auto>",
} }
except Exception as e: except Exception as e:
@ -1018,58 +1023,14 @@ class ConnectorGoogleSpeech:
"error": f"Text-to-Speech failed: {detail}{extra}", "error": f"Text-to-Speech failed: {detail}{extra}",
} }
def _getDefaultVoice(self, languageCode: str) -> str: def _getDefaultVoice(self, languageCode: str) -> Optional[str]:
"""Return the curated default Google TTS voice for `languageCode`.
Delegates to the central voice catalog; returns None when no curated
voice exists, in which case the caller omits `name` and Google
auto-selects based on languageCode + ssml_gender.
""" """
Get default voice name for a language code. return _catalogDefaultVoice(languageCode)
Falls back to a Wavenet voice for common languages.
"""
_defaults = {
"de-DE": "de-DE-Wavenet-A",
"de-CH": "de-DE-Wavenet-A",
"en-US": "en-US-Wavenet-C",
"en-GB": "en-GB-Wavenet-A",
"fr-FR": "fr-FR-Wavenet-A",
"it-IT": "it-IT-Wavenet-A",
}
return _defaults.get(languageCode)
async def getAvailableLanguages(self) -> Dict[str, Any]:
"""
Get available languages from Google Cloud Text-to-Speech.
Returns:
Dict containing success status and list of available languages
"""
try:
logger.info("🌐 Getting available languages from Google Cloud TTS")
# List voices from Google Cloud TTS
response = self.tts_client.list_voices()
# Extract unique language codes
# Note: Google TTS API doesn't provide language descriptions, only codes
language_codes = set()
for voice in response.voices:
if voice.language_codes:
language_codes.update(voice.language_codes)
# Convert to sorted list of language codes
available_languages = sorted(list(language_codes))
logger.info(f"✅ Found {len(available_languages)} available languages")
return {
"success": True,
"languages": available_languages
}
except Exception as e:
logger.error(f"❌ Failed to get available languages: {e}")
return {
"success": False,
"error": str(e),
"languages": []
}
async def getAvailableVoices(self, languageCode: Optional[str] = None) -> Dict[str, Any]: async def getAvailableVoices(self, languageCode: Optional[str] = None) -> Dict[str, Any]:
""" """

View file

@ -338,35 +338,10 @@ class VoiceObjects:
"error": str(e) "error": str(e)
} }
# Language and Voice Information # Voice Information
# Note: Available languages live in the central voice catalog
async def getAvailableLanguages(self) -> Dict[str, Any]: # (modules.shared.voiceCatalog); voice picks per language stay live from
""" # Google so users can see all available speakers per locale.
Get available languages from Google Cloud Text-to-Speech.
Returns:
Dict containing success status and list of available languages
"""
try:
logger.info("🌐 Getting available languages from Google Cloud TTS")
connector = self._getGoogleSpeechConnector()
result = await connector.getAvailableLanguages()
if result["success"]:
logger.info(f"✅ Found {len(result['languages'])} available languages")
else:
logger.warning(f"⚠️ Failed to get languages: {result.get('error', 'Unknown error')}")
return result
except Exception as e:
logger.error(f"❌ Error getting available languages: {e}")
return {
"success": False,
"error": str(e),
"languages": []
}
async def getAvailableVoices(self, languageCode: Optional[str] = None) -> Dict[str, Any]: async def getAvailableVoices(self, languageCode: Optional[str] = None) -> Dict[str, Any]:
""" """

View file

@ -17,6 +17,7 @@ from typing import Optional, Dict, Any, List
from modules.auth import getCurrentUser, getRequestContext, RequestContext, limiter from modules.auth import getCurrentUser, getRequestContext, RequestContext, limiter
from modules.datamodels.datamodelUam import User from modules.datamodels.datamodelUam import User
from modules.interfaces.interfaceVoiceObjects import getVoiceInterface, VoiceObjects from modules.interfaces.interfaceVoiceObjects import getVoiceInterface, VoiceObjects
from modules.shared.voiceCatalog import getCatalogPayload
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
router = APIRouter(prefix="/voice-google", tags=["Voice Google"]) router = APIRouter(prefix="/voice-google", tags=["Voice Google"])
@ -61,32 +62,15 @@ def _getVoiceInterface(currentUser: User) -> VoiceObjects:
@router.get("/languages") @router.get("/languages")
async def get_available_languages(currentUser: User = Depends(getCurrentUser)): async def get_available_languages(currentUser: User = Depends(getCurrentUser)):
"""Get available languages from Google Cloud Text-to-Speech.""" """Return the curated voice/language catalog (single source of truth).
try:
logger.info("🌐 Getting available languages from Google Cloud TTS")
voiceInterface = _getVoiceInterface(currentUser) Each entry: {bcp47, iso, label, flag, defaultVoice}. Same payload as
result = await voiceInterface.getAvailableLanguages() /api/voice/languages both endpoints back the same catalog.
"""
if result["success"]: return {
return { "success": True,
"success": True, "languages": getCatalogPayload(),
"languages": result["languages"] }
}
else:
raise HTTPException(
status_code=400,
detail=f"Failed to get languages: {result.get('error', 'Unknown error')}"
)
except HTTPException:
raise
except Exception as e:
logger.error(f"❌ Get languages error: {e}")
raise HTTPException(
status_code=500,
detail=f"Failed to get available languages: {str(e)}"
)
@router.get("/voices") @router.get("/voices")
async def get_available_voices( async def get_available_voices(

View file

@ -18,6 +18,7 @@ from modules.datamodels.datamodelUam import User, UserVoicePreferences, _normali
from modules.interfaces.interfaceDbApp import getRootInterface from modules.interfaces.interfaceDbApp import getRootInterface
from modules.interfaces.interfaceVoiceObjects import getVoiceInterface from modules.interfaces.interfaceVoiceObjects import getVoiceInterface
from modules.shared.i18nRegistry import apiRouteContext from modules.shared.i18nRegistry import apiRouteContext
from modules.shared.voiceCatalog import getCatalogPayload
routeApiMsg = apiRouteContext("routeVoiceUser") routeApiMsg = apiRouteContext("routeVoiceUser")
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -101,11 +102,11 @@ async def getVoiceLanguages(
request: Request, request: Request,
currentUser: User = Depends(getCurrentUser), currentUser: User = Depends(getCurrentUser),
) -> Dict[str, Any]: ) -> Dict[str, Any]:
"""Return available TTS languages (user-level, no instance context needed).""" """Return the curated voice/language catalog (single source of truth).
voiceInterface = getVoiceInterface(currentUser)
languagesResult = await voiceInterface.getAvailableLanguages() Each entry: {bcp47, iso, label, flag, defaultVoice}.
languageList = languagesResult.get("languages", []) if isinstance(languagesResult, dict) else languagesResult """
return {"languages": languageList} return {"languages": getCatalogPayload()}
@router.get("/voices") @router.get("/voices")

View file

@ -395,25 +395,17 @@ def _registerMediaTools(registry: ToolRegistry, services):
try: try:
from modules.interfaces.interfaceVoiceObjects import getVoiceInterface from modules.interfaces.interfaceVoiceObjects import getVoiceInterface
from modules.shared.voiceCatalog import isoToBcp47
mandateId = context.get("mandateId", "") mandateId = context.get("mandateId", "")
voiceInterface = getVoiceInterface(currentUser=None, mandateId=mandateId) voiceInterface = getVoiceInterface(currentUser=None, mandateId=mandateId)
_ISO_TO_BCP47 = {
"de": "de-DE", "en": "en-US", "fr": "fr-FR", "it": "it-IT",
"es": "es-ES", "pt": "pt-BR", "nl": "nl-NL", "pl": "pl-PL",
"ru": "ru-RU", "ja": "ja-JP", "zh": "zh-CN", "ko": "ko-KR",
"ar": "ar-XA", "hi": "hi-IN", "tr": "tr-TR", "sv": "sv-SE",
}
if language == "auto": if language == "auto":
try: try:
snippet = cleanText[:500] snippet = cleanText[:500]
detectResult = await voiceInterface.detectLanguage(snippet) detectResult = await voiceInterface.detectLanguage(snippet)
if detectResult and detectResult.get("success"): if detectResult and detectResult.get("success"):
detected = detectResult.get("language", "de") detected = detectResult.get("language", "de")
language = _ISO_TO_BCP47.get(detected, detected) language = isoToBcp47(detected) or "de-DE"
if "-" not in language:
language = _ISO_TO_BCP47.get(language, f"{language}-{language.upper()}")
logger.info(f"textToSpeech: auto-detected language '{detected}' -> '{language}'") logger.info(f"textToSpeech: auto-detected language '{detected}' -> '{language}'")
else: else:
language = "de-DE" language = "de-DE"

View file

@ -0,0 +1,136 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Voice / Language Catalog Single Source of Truth.
Every voice-related component (TTS connector, AI tools, REST routes, frontend
language pickers) consumes this catalog. Hard-coded language lists or ad-hoc
ISOBCP-47 maps elsewhere are forbidden extend the catalog instead.
Schema per entry:
bcp47 BCP-47 locale code, e.g. "de-DE", "ru-RU"
iso ISO-639-1 short code, e.g. "de", "ru"
label Native display label ("Deutsch", "Русский")
flag Emoji flag (or empty string for region-neutral codes)
defaultVoice Curated Google TTS voice name; None means "let Google
pick automatically based on bcp47 + ssml_gender".
"""
from __future__ import annotations
from dataclasses import asdict, dataclass
from typing import Dict, List, Optional
@dataclass(frozen=True)
class VoiceLanguage:
bcp47: str
iso: str
label: str
flag: str
defaultVoice: Optional[str]
# Order matters for UI: most common first, then alphabetical groups.
VOICE_LANGUAGES: List[VoiceLanguage] = [
VoiceLanguage("de-DE", "de", "Deutsch", "🇩🇪", "de-DE-Wavenet-A"),
VoiceLanguage("de-CH", "de", "Deutsch (Schweiz)", "🇨🇭", "de-DE-Wavenet-A"),
VoiceLanguage("de-AT", "de", "Deutsch (Österreich)", "🇦🇹", "de-DE-Wavenet-A"),
VoiceLanguage("en-US", "en", "English (US)", "🇺🇸", "en-US-Wavenet-C"),
VoiceLanguage("en-GB", "en", "English (UK)", "🇬🇧", "en-GB-Wavenet-A"),
VoiceLanguage("en-AU", "en", "English (Australia)", "🇦🇺", "en-AU-Wavenet-A"),
VoiceLanguage("fr-FR", "fr", "Français", "🇫🇷", "fr-FR-Wavenet-A"),
VoiceLanguage("fr-CA", "fr", "Français (Canada)", "🇨🇦", "fr-CA-Wavenet-A"),
VoiceLanguage("it-IT", "it", "Italiano", "🇮🇹", "it-IT-Wavenet-A"),
VoiceLanguage("es-ES", "es", "Español", "🇪🇸", "es-ES-Wavenet-B"),
VoiceLanguage("es-US", "es", "Español (US)", "🇺🇸", "es-US-Wavenet-A"),
VoiceLanguage("pt-BR", "pt", "Português (Brasil)", "🇧🇷", "pt-BR-Wavenet-A"),
VoiceLanguage("pt-PT", "pt", "Português (Portugal)", "🇵🇹", "pt-PT-Wavenet-A"),
VoiceLanguage("nl-NL", "nl", "Nederlands", "🇳🇱", "nl-NL-Wavenet-A"),
VoiceLanguage("pl-PL", "pl", "Polski", "🇵🇱", "pl-PL-Wavenet-A"),
VoiceLanguage("ru-RU", "ru", "Русский", "🇷🇺", "ru-RU-Wavenet-A"),
VoiceLanguage("uk-UA", "uk", "Українська", "🇺🇦", "uk-UA-Wavenet-A"),
VoiceLanguage("cs-CZ", "cs", "Čeština", "🇨🇿", "cs-CZ-Wavenet-A"),
VoiceLanguage("sk-SK", "sk", "Slovenčina", "🇸🇰", "sk-SK-Wavenet-A"),
VoiceLanguage("hu-HU", "hu", "Magyar", "🇭🇺", "hu-HU-Wavenet-A"),
VoiceLanguage("ro-RO", "ro", "Română", "🇷🇴", "ro-RO-Wavenet-A"),
VoiceLanguage("el-GR", "el", "Ελληνικά", "🇬🇷", "el-GR-Wavenet-A"),
VoiceLanguage("sv-SE", "sv", "Svenska", "🇸🇪", "sv-SE-Wavenet-A"),
VoiceLanguage("da-DK", "da", "Dansk", "🇩🇰", "da-DK-Wavenet-A"),
VoiceLanguage("nb-NO", "nb", "Norsk", "🇳🇴", "nb-NO-Wavenet-A"),
VoiceLanguage("fi-FI", "fi", "Suomi", "🇫🇮", "fi-FI-Wavenet-A"),
VoiceLanguage("tr-TR", "tr", "Türkçe", "🇹🇷", "tr-TR-Wavenet-A"),
VoiceLanguage("ar-XA", "ar", "العربية", "", "ar-XA-Wavenet-A"),
VoiceLanguage("hi-IN", "hi", "हिन्दी", "🇮🇳", "hi-IN-Wavenet-A"),
VoiceLanguage("ja-JP", "ja", "日本語", "🇯🇵", "ja-JP-Wavenet-A"),
VoiceLanguage("ko-KR", "ko", "한국어", "🇰🇷", "ko-KR-Wavenet-A"),
VoiceLanguage("zh-CN", "zh", "中文 (简体)", "🇨🇳", "cmn-CN-Wavenet-A"),
VoiceLanguage("vi-VN", "vi", "Tiếng Việt", "🇻🇳", "vi-VN-Wavenet-A"),
VoiceLanguage("th-TH", "th", "ไทย", "🇹🇭", "th-TH-Standard-A"),
VoiceLanguage("id-ID", "id", "Bahasa Indonesia", "🇮🇩", "id-ID-Wavenet-A"),
]
# ---------------------------------------------------------------------------
# Lookup indexes (built once at import).
# ---------------------------------------------------------------------------
_BY_BCP47: Dict[str, VoiceLanguage] = {v.bcp47.lower(): v for v in VOICE_LANGUAGES}
_BY_ISO: Dict[str, VoiceLanguage] = {}
for _v in VOICE_LANGUAGES:
_BY_ISO.setdefault(_v.iso.lower(), _v)
def listVoiceLanguages() -> List[VoiceLanguage]:
"""Return the canonical, ordered list of supported voice languages."""
return list(VOICE_LANGUAGES)
def getCatalogPayload() -> List[Dict[str, Optional[str]]]:
"""Return the catalog as plain dicts — ready for JSON serialization."""
return [asdict(v) for v in VOICE_LANGUAGES]
def getByBcp47(code: Optional[str]) -> Optional[VoiceLanguage]:
if not code:
return None
return _BY_BCP47.get(code.strip().lower())
def getByIso(code: Optional[str]) -> Optional[VoiceLanguage]:
if not code:
return None
return _BY_ISO.get(code.strip().lower())
def getDefaultVoice(bcp47: Optional[str]) -> Optional[str]:
"""Return the curated default Google TTS voice for a BCP-47 code, else None.
None means: caller must omit `name` in VoiceSelectionParams so Google
auto-selects a voice for the language code.
"""
entry = getByBcp47(bcp47)
return entry.defaultVoice if entry else None
def isoToBcp47(iso: Optional[str]) -> Optional[str]:
"""Map an ISO-639-1 short code to the canonical BCP-47 locale.
Already-qualified BCP-47 inputs are passed through unchanged (canonicalised
to the catalog form when known). Unknown ISO codes fall back to
``<iso>-<ISO>`` (e.g. "fa" "fa-FA") so callers always get a parseable
locale, but unknown codes carry no curated voice.
"""
if not iso:
return None
normalized = iso.strip()
if not normalized:
return None
if "-" in normalized:
canonical = getByBcp47(normalized)
return canonical.bcp47 if canonical else normalized
isoLower = normalized.lower()
entry = _BY_ISO.get(isoLower)
if entry:
return entry.bcp47
return f"{isoLower}-{isoLower.upper()}"