centralized language catalog

This commit is contained in:
ValueOn AG 2026-04-19 00:36:45 +02:00
parent 24ff6058d5
commit 3ea85fe57e
6 changed files with 197 additions and 148 deletions

View file

@ -15,6 +15,7 @@ from google.cloud import speech
from google.cloud import translate_v2 as translate
from google.cloud import texttospeech
from modules.shared.configuration import APP_CONFIG
from modules.shared.voiceCatalog import getDefaultVoice as _catalogDefaultVoice
logger = logging.getLogger(__name__)
@ -940,33 +941,26 @@ class ConnectorGoogleSpeech:
stripped = voiceName.strip()
return bool(stripped) and "-" not in stripped
async def textToSpeech(self, text: str, languageCode: str = "de-DE", voiceName: str = None) -> Dict[str, Any]:
async def textToSpeech(self, text: str, languageCode: str = "de-DE", voiceName: Optional[str] = None) -> Dict[str, Any]:
"""
Convert text to speech using Google Cloud Text-to-Speech.
Args:
text: Text to convert to speech
language_code: Language code (e.g., 'de-DE', 'en-US')
voice_name: Specific voice name (optional)
languageCode: BCP-47 language code (e.g., 'de-DE', 'en-US', 'ru-RU')
voiceName: Specific voice name (optional). If omitted, a curated
default is used; if no curated default exists for the language,
Google selects a default voice automatically based on
languageCode + ssml_gender (no hard failure).
Returns:
Dict with success status and audio data
"""
try:
logger.info(f"Converting text to speech: '{text[:50]}...' in {languageCode}")
# Build the voice request
selectedVoice = voiceName or self._getDefaultVoice(languageCode)
if not selectedVoice:
return {
"success": False,
"error": f"No voice specified for language {languageCode}. Please select a voice."
}
logger.info(f"Using TTS voice: {selectedVoice} for language: {languageCode}")
isGeminiVoice = self._isGeminiTtsSpeakerVoiceName(selectedVoice)
isGeminiVoice = self._isGeminiTtsSpeakerVoiceName(selectedVoice) if selectedVoice else False
if isGeminiVoice:
synthesisInput = texttospeech.SynthesisInput(
@ -981,11 +975,23 @@ class ConnectorGoogleSpeech:
)
else:
synthesisInput = texttospeech.SynthesisInput(text=text)
voice = texttospeech.VoiceSelectionParams(
language_code=languageCode,
name=selectedVoice,
ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL,
)
voiceKwargs: Dict[str, Any] = {
"language_code": languageCode,
"ssml_gender": texttospeech.SsmlVoiceGender.NEUTRAL,
}
if selectedVoice:
voiceKwargs["name"] = selectedVoice
else:
logger.info(
f"TTS: no curated voice for '{languageCode}', "
f"letting Google auto-select by language + gender"
)
voice = texttospeech.VoiceSelectionParams(**voiceKwargs)
logger.info(
f"Using TTS voice: {selectedVoice or '<google-auto>'} "
f"for language: {languageCode}"
)
audioConfig = texttospeech.AudioConfig(
audio_encoding=texttospeech.AudioEncoding.MP3
@ -994,16 +1000,15 @@ class ConnectorGoogleSpeech:
response = self.tts_client.synthesize_speech(
input=synthesisInput,
voice=voice,
audio_config=audioConfig
audio_config=audioConfig,
)
# Return the audio content
return {
"success": True,
"audio_content": response.audio_content,
"audio_format": "mp3",
"language_code": languageCode,
"voice_name": voice.name
"voice_name": selectedVoice or "<google-auto>",
}
except Exception as e:
@ -1018,59 +1023,15 @@ class ConnectorGoogleSpeech:
"error": f"Text-to-Speech failed: {detail}{extra}",
}
def _getDefaultVoice(self, languageCode: str) -> str:
def _getDefaultVoice(self, languageCode: str) -> Optional[str]:
"""Return the curated default Google TTS voice for `languageCode`.
Delegates to the central voice catalog; returns None when no curated
voice exists, in which case the caller omits `name` and Google
auto-selects based on languageCode + ssml_gender.
"""
Get default voice name for a language code.
Falls back to a Wavenet voice for common languages.
"""
_defaults = {
"de-DE": "de-DE-Wavenet-A",
"de-CH": "de-DE-Wavenet-A",
"en-US": "en-US-Wavenet-C",
"en-GB": "en-GB-Wavenet-A",
"fr-FR": "fr-FR-Wavenet-A",
"it-IT": "it-IT-Wavenet-A",
}
return _defaults.get(languageCode)
async def getAvailableLanguages(self) -> Dict[str, Any]:
"""
Get available languages from Google Cloud Text-to-Speech.
Returns:
Dict containing success status and list of available languages
"""
try:
logger.info("🌐 Getting available languages from Google Cloud TTS")
# List voices from Google Cloud TTS
response = self.tts_client.list_voices()
# Extract unique language codes
# Note: Google TTS API doesn't provide language descriptions, only codes
language_codes = set()
for voice in response.voices:
if voice.language_codes:
language_codes.update(voice.language_codes)
# Convert to sorted list of language codes
available_languages = sorted(list(language_codes))
logger.info(f"✅ Found {len(available_languages)} available languages")
return {
"success": True,
"languages": available_languages
}
except Exception as e:
logger.error(f"❌ Failed to get available languages: {e}")
return {
"success": False,
"error": str(e),
"languages": []
}
return _catalogDefaultVoice(languageCode)
async def getAvailableVoices(self, languageCode: Optional[str] = None) -> Dict[str, Any]:
"""
Get available voices from Google Cloud Text-to-Speech.

View file

@ -338,36 +338,11 @@ class VoiceObjects:
"error": str(e)
}
# Language and Voice Information
async def getAvailableLanguages(self) -> Dict[str, Any]:
"""
Get available languages from Google Cloud Text-to-Speech.
Returns:
Dict containing success status and list of available languages
"""
try:
logger.info("🌐 Getting available languages from Google Cloud TTS")
connector = self._getGoogleSpeechConnector()
result = await connector.getAvailableLanguages()
if result["success"]:
logger.info(f"✅ Found {len(result['languages'])} available languages")
else:
logger.warning(f"⚠️ Failed to get languages: {result.get('error', 'Unknown error')}")
return result
except Exception as e:
logger.error(f"❌ Error getting available languages: {e}")
return {
"success": False,
"error": str(e),
"languages": []
}
# Voice Information
# Note: Available languages live in the central voice catalog
# (modules.shared.voiceCatalog); voice picks per language stay live from
# Google so users can see all available speakers per locale.
async def getAvailableVoices(self, languageCode: Optional[str] = None) -> Dict[str, Any]:
"""
Get available voices from Google Cloud Text-to-Speech.

View file

@ -17,6 +17,7 @@ from typing import Optional, Dict, Any, List
from modules.auth import getCurrentUser, getRequestContext, RequestContext, limiter
from modules.datamodels.datamodelUam import User
from modules.interfaces.interfaceVoiceObjects import getVoiceInterface, VoiceObjects
from modules.shared.voiceCatalog import getCatalogPayload
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/voice-google", tags=["Voice Google"])
@ -61,32 +62,15 @@ def _getVoiceInterface(currentUser: User) -> VoiceObjects:
@router.get("/languages")
async def get_available_languages(currentUser: User = Depends(getCurrentUser)):
"""Get available languages from Google Cloud Text-to-Speech."""
try:
logger.info("🌐 Getting available languages from Google Cloud TTS")
voiceInterface = _getVoiceInterface(currentUser)
result = await voiceInterface.getAvailableLanguages()
if result["success"]:
return {
"success": True,
"languages": result["languages"]
}
else:
raise HTTPException(
status_code=400,
detail=f"Failed to get languages: {result.get('error', 'Unknown error')}"
)
except HTTPException:
raise
except Exception as e:
logger.error(f"❌ Get languages error: {e}")
raise HTTPException(
status_code=500,
detail=f"Failed to get available languages: {str(e)}"
)
"""Return the curated voice/language catalog (single source of truth).
Each entry: {bcp47, iso, label, flag, defaultVoice}. Same payload as
/api/voice/languages both endpoints back the same catalog.
"""
return {
"success": True,
"languages": getCatalogPayload(),
}
@router.get("/voices")
async def get_available_voices(

View file

@ -18,6 +18,7 @@ from modules.datamodels.datamodelUam import User, UserVoicePreferences, _normali
from modules.interfaces.interfaceDbApp import getRootInterface
from modules.interfaces.interfaceVoiceObjects import getVoiceInterface
from modules.shared.i18nRegistry import apiRouteContext
from modules.shared.voiceCatalog import getCatalogPayload
routeApiMsg = apiRouteContext("routeVoiceUser")
logger = logging.getLogger(__name__)
@ -101,11 +102,11 @@ async def getVoiceLanguages(
request: Request,
currentUser: User = Depends(getCurrentUser),
) -> Dict[str, Any]:
"""Return available TTS languages (user-level, no instance context needed)."""
voiceInterface = getVoiceInterface(currentUser)
languagesResult = await voiceInterface.getAvailableLanguages()
languageList = languagesResult.get("languages", []) if isinstance(languagesResult, dict) else languagesResult
return {"languages": languageList}
"""Return the curated voice/language catalog (single source of truth).
Each entry: {bcp47, iso, label, flag, defaultVoice}.
"""
return {"languages": getCatalogPayload()}
@router.get("/voices")

View file

@ -395,25 +395,17 @@ def _registerMediaTools(registry: ToolRegistry, services):
try:
from modules.interfaces.interfaceVoiceObjects import getVoiceInterface
from modules.shared.voiceCatalog import isoToBcp47
mandateId = context.get("mandateId", "")
voiceInterface = getVoiceInterface(currentUser=None, mandateId=mandateId)
_ISO_TO_BCP47 = {
"de": "de-DE", "en": "en-US", "fr": "fr-FR", "it": "it-IT",
"es": "es-ES", "pt": "pt-BR", "nl": "nl-NL", "pl": "pl-PL",
"ru": "ru-RU", "ja": "ja-JP", "zh": "zh-CN", "ko": "ko-KR",
"ar": "ar-XA", "hi": "hi-IN", "tr": "tr-TR", "sv": "sv-SE",
}
if language == "auto":
try:
snippet = cleanText[:500]
detectResult = await voiceInterface.detectLanguage(snippet)
if detectResult and detectResult.get("success"):
detected = detectResult.get("language", "de")
language = _ISO_TO_BCP47.get(detected, detected)
if "-" not in language:
language = _ISO_TO_BCP47.get(language, f"{language}-{language.upper()}")
language = isoToBcp47(detected) or "de-DE"
logger.info(f"textToSpeech: auto-detected language '{detected}' -> '{language}'")
else:
language = "de-DE"

View file

@ -0,0 +1,136 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Voice / Language Catalog Single Source of Truth.
Every voice-related component (TTS connector, AI tools, REST routes, frontend
language pickers) consumes this catalog. Hard-coded language lists or ad-hoc
ISOBCP-47 maps elsewhere are forbidden extend the catalog instead.
Schema per entry:
bcp47 BCP-47 locale code, e.g. "de-DE", "ru-RU"
iso ISO-639-1 short code, e.g. "de", "ru"
label Native display label ("Deutsch", "Русский")
flag Emoji flag (or empty string for region-neutral codes)
defaultVoice Curated Google TTS voice name; None means "let Google
pick automatically based on bcp47 + ssml_gender".
"""
from __future__ import annotations
from dataclasses import asdict, dataclass
from typing import Dict, List, Optional
@dataclass(frozen=True)
class VoiceLanguage:
bcp47: str
iso: str
label: str
flag: str
defaultVoice: Optional[str]
# Order matters for UI: most common first, then alphabetical groups.
VOICE_LANGUAGES: List[VoiceLanguage] = [
VoiceLanguage("de-DE", "de", "Deutsch", "🇩🇪", "de-DE-Wavenet-A"),
VoiceLanguage("de-CH", "de", "Deutsch (Schweiz)", "🇨🇭", "de-DE-Wavenet-A"),
VoiceLanguage("de-AT", "de", "Deutsch (Österreich)", "🇦🇹", "de-DE-Wavenet-A"),
VoiceLanguage("en-US", "en", "English (US)", "🇺🇸", "en-US-Wavenet-C"),
VoiceLanguage("en-GB", "en", "English (UK)", "🇬🇧", "en-GB-Wavenet-A"),
VoiceLanguage("en-AU", "en", "English (Australia)", "🇦🇺", "en-AU-Wavenet-A"),
VoiceLanguage("fr-FR", "fr", "Français", "🇫🇷", "fr-FR-Wavenet-A"),
VoiceLanguage("fr-CA", "fr", "Français (Canada)", "🇨🇦", "fr-CA-Wavenet-A"),
VoiceLanguage("it-IT", "it", "Italiano", "🇮🇹", "it-IT-Wavenet-A"),
VoiceLanguage("es-ES", "es", "Español", "🇪🇸", "es-ES-Wavenet-B"),
VoiceLanguage("es-US", "es", "Español (US)", "🇺🇸", "es-US-Wavenet-A"),
VoiceLanguage("pt-BR", "pt", "Português (Brasil)", "🇧🇷", "pt-BR-Wavenet-A"),
VoiceLanguage("pt-PT", "pt", "Português (Portugal)", "🇵🇹", "pt-PT-Wavenet-A"),
VoiceLanguage("nl-NL", "nl", "Nederlands", "🇳🇱", "nl-NL-Wavenet-A"),
VoiceLanguage("pl-PL", "pl", "Polski", "🇵🇱", "pl-PL-Wavenet-A"),
VoiceLanguage("ru-RU", "ru", "Русский", "🇷🇺", "ru-RU-Wavenet-A"),
VoiceLanguage("uk-UA", "uk", "Українська", "🇺🇦", "uk-UA-Wavenet-A"),
VoiceLanguage("cs-CZ", "cs", "Čeština", "🇨🇿", "cs-CZ-Wavenet-A"),
VoiceLanguage("sk-SK", "sk", "Slovenčina", "🇸🇰", "sk-SK-Wavenet-A"),
VoiceLanguage("hu-HU", "hu", "Magyar", "🇭🇺", "hu-HU-Wavenet-A"),
VoiceLanguage("ro-RO", "ro", "Română", "🇷🇴", "ro-RO-Wavenet-A"),
VoiceLanguage("el-GR", "el", "Ελληνικά", "🇬🇷", "el-GR-Wavenet-A"),
VoiceLanguage("sv-SE", "sv", "Svenska", "🇸🇪", "sv-SE-Wavenet-A"),
VoiceLanguage("da-DK", "da", "Dansk", "🇩🇰", "da-DK-Wavenet-A"),
VoiceLanguage("nb-NO", "nb", "Norsk", "🇳🇴", "nb-NO-Wavenet-A"),
VoiceLanguage("fi-FI", "fi", "Suomi", "🇫🇮", "fi-FI-Wavenet-A"),
VoiceLanguage("tr-TR", "tr", "Türkçe", "🇹🇷", "tr-TR-Wavenet-A"),
VoiceLanguage("ar-XA", "ar", "العربية", "", "ar-XA-Wavenet-A"),
VoiceLanguage("hi-IN", "hi", "हिन्दी", "🇮🇳", "hi-IN-Wavenet-A"),
VoiceLanguage("ja-JP", "ja", "日本語", "🇯🇵", "ja-JP-Wavenet-A"),
VoiceLanguage("ko-KR", "ko", "한국어", "🇰🇷", "ko-KR-Wavenet-A"),
VoiceLanguage("zh-CN", "zh", "中文 (简体)", "🇨🇳", "cmn-CN-Wavenet-A"),
VoiceLanguage("vi-VN", "vi", "Tiếng Việt", "🇻🇳", "vi-VN-Wavenet-A"),
VoiceLanguage("th-TH", "th", "ไทย", "🇹🇭", "th-TH-Standard-A"),
VoiceLanguage("id-ID", "id", "Bahasa Indonesia", "🇮🇩", "id-ID-Wavenet-A"),
]
# ---------------------------------------------------------------------------
# Lookup indexes (built once at import).
# ---------------------------------------------------------------------------
_BY_BCP47: Dict[str, VoiceLanguage] = {v.bcp47.lower(): v for v in VOICE_LANGUAGES}
_BY_ISO: Dict[str, VoiceLanguage] = {}
for _v in VOICE_LANGUAGES:
_BY_ISO.setdefault(_v.iso.lower(), _v)
def listVoiceLanguages() -> List[VoiceLanguage]:
"""Return the canonical, ordered list of supported voice languages."""
return list(VOICE_LANGUAGES)
def getCatalogPayload() -> List[Dict[str, Optional[str]]]:
"""Return the catalog as plain dicts — ready for JSON serialization."""
return [asdict(v) for v in VOICE_LANGUAGES]
def getByBcp47(code: Optional[str]) -> Optional[VoiceLanguage]:
if not code:
return None
return _BY_BCP47.get(code.strip().lower())
def getByIso(code: Optional[str]) -> Optional[VoiceLanguage]:
if not code:
return None
return _BY_ISO.get(code.strip().lower())
def getDefaultVoice(bcp47: Optional[str]) -> Optional[str]:
"""Return the curated default Google TTS voice for a BCP-47 code, else None.
None means: caller must omit `name` in VoiceSelectionParams so Google
auto-selects a voice for the language code.
"""
entry = getByBcp47(bcp47)
return entry.defaultVoice if entry else None
def isoToBcp47(iso: Optional[str]) -> Optional[str]:
"""Map an ISO-639-1 short code to the canonical BCP-47 locale.
Already-qualified BCP-47 inputs are passed through unchanged (canonicalised
to the catalog form when known). Unknown ISO codes fall back to
``<iso>-<ISO>`` (e.g. "fa" "fa-FA") so callers always get a parseable
locale, but unknown codes carry no curated voice.
"""
if not iso:
return None
normalized = iso.strip()
if not normalized:
return None
if "-" in normalized:
canonical = getByBcp47(normalized)
return canonical.bcp47 if canonical else normalized
isoLower = normalized.lower()
entry = _BY_ISO.get(isoLower)
if entry:
return entry.bcp47
return f"{isoLower}-{isoLower.upper()}"