centralized language catalog
This commit is contained in:
parent
24ff6058d5
commit
3ea85fe57e
6 changed files with 197 additions and 148 deletions
|
|
@ -15,6 +15,7 @@ from google.cloud import speech
|
|||
from google.cloud import translate_v2 as translate
|
||||
from google.cloud import texttospeech
|
||||
from modules.shared.configuration import APP_CONFIG
|
||||
from modules.shared.voiceCatalog import getDefaultVoice as _catalogDefaultVoice
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -940,33 +941,26 @@ class ConnectorGoogleSpeech:
|
|||
stripped = voiceName.strip()
|
||||
return bool(stripped) and "-" not in stripped
|
||||
|
||||
async def textToSpeech(self, text: str, languageCode: str = "de-DE", voiceName: str = None) -> Dict[str, Any]:
|
||||
async def textToSpeech(self, text: str, languageCode: str = "de-DE", voiceName: Optional[str] = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Convert text to speech using Google Cloud Text-to-Speech.
|
||||
|
||||
|
||||
Args:
|
||||
text: Text to convert to speech
|
||||
language_code: Language code (e.g., 'de-DE', 'en-US')
|
||||
voice_name: Specific voice name (optional)
|
||||
|
||||
languageCode: BCP-47 language code (e.g., 'de-DE', 'en-US', 'ru-RU')
|
||||
voiceName: Specific voice name (optional). If omitted, a curated
|
||||
default is used; if no curated default exists for the language,
|
||||
Google selects a default voice automatically based on
|
||||
languageCode + ssml_gender (no hard failure).
|
||||
|
||||
Returns:
|
||||
Dict with success status and audio data
|
||||
"""
|
||||
try:
|
||||
logger.info(f"Converting text to speech: '{text[:50]}...' in {languageCode}")
|
||||
|
||||
# Build the voice request
|
||||
|
||||
selectedVoice = voiceName or self._getDefaultVoice(languageCode)
|
||||
|
||||
if not selectedVoice:
|
||||
return {
|
||||
"success": False,
|
||||
"error": f"No voice specified for language {languageCode}. Please select a voice."
|
||||
}
|
||||
|
||||
logger.info(f"Using TTS voice: {selectedVoice} for language: {languageCode}")
|
||||
|
||||
isGeminiVoice = self._isGeminiTtsSpeakerVoiceName(selectedVoice)
|
||||
isGeminiVoice = self._isGeminiTtsSpeakerVoiceName(selectedVoice) if selectedVoice else False
|
||||
|
||||
if isGeminiVoice:
|
||||
synthesisInput = texttospeech.SynthesisInput(
|
||||
|
|
@ -981,11 +975,23 @@ class ConnectorGoogleSpeech:
|
|||
)
|
||||
else:
|
||||
synthesisInput = texttospeech.SynthesisInput(text=text)
|
||||
voice = texttospeech.VoiceSelectionParams(
|
||||
language_code=languageCode,
|
||||
name=selectedVoice,
|
||||
ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL,
|
||||
)
|
||||
voiceKwargs: Dict[str, Any] = {
|
||||
"language_code": languageCode,
|
||||
"ssml_gender": texttospeech.SsmlVoiceGender.NEUTRAL,
|
||||
}
|
||||
if selectedVoice:
|
||||
voiceKwargs["name"] = selectedVoice
|
||||
else:
|
||||
logger.info(
|
||||
f"TTS: no curated voice for '{languageCode}', "
|
||||
f"letting Google auto-select by language + gender"
|
||||
)
|
||||
voice = texttospeech.VoiceSelectionParams(**voiceKwargs)
|
||||
|
||||
logger.info(
|
||||
f"Using TTS voice: {selectedVoice or '<google-auto>'} "
|
||||
f"for language: {languageCode}"
|
||||
)
|
||||
|
||||
audioConfig = texttospeech.AudioConfig(
|
||||
audio_encoding=texttospeech.AudioEncoding.MP3
|
||||
|
|
@ -994,16 +1000,15 @@ class ConnectorGoogleSpeech:
|
|||
response = self.tts_client.synthesize_speech(
|
||||
input=synthesisInput,
|
||||
voice=voice,
|
||||
audio_config=audioConfig
|
||||
audio_config=audioConfig,
|
||||
)
|
||||
|
||||
# Return the audio content
|
||||
return {
|
||||
"success": True,
|
||||
"audio_content": response.audio_content,
|
||||
"audio_format": "mp3",
|
||||
"language_code": languageCode,
|
||||
"voice_name": voice.name
|
||||
"voice_name": selectedVoice or "<google-auto>",
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
|
|
@ -1018,59 +1023,15 @@ class ConnectorGoogleSpeech:
|
|||
"error": f"Text-to-Speech failed: {detail}{extra}",
|
||||
}
|
||||
|
||||
def _getDefaultVoice(self, languageCode: str) -> str:
|
||||
def _getDefaultVoice(self, languageCode: str) -> Optional[str]:
|
||||
"""Return the curated default Google TTS voice for `languageCode`.
|
||||
|
||||
Delegates to the central voice catalog; returns None when no curated
|
||||
voice exists, in which case the caller omits `name` and Google
|
||||
auto-selects based on languageCode + ssml_gender.
|
||||
"""
|
||||
Get default voice name for a language code.
|
||||
Falls back to a Wavenet voice for common languages.
|
||||
"""
|
||||
_defaults = {
|
||||
"de-DE": "de-DE-Wavenet-A",
|
||||
"de-CH": "de-DE-Wavenet-A",
|
||||
"en-US": "en-US-Wavenet-C",
|
||||
"en-GB": "en-GB-Wavenet-A",
|
||||
"fr-FR": "fr-FR-Wavenet-A",
|
||||
"it-IT": "it-IT-Wavenet-A",
|
||||
}
|
||||
return _defaults.get(languageCode)
|
||||
|
||||
async def getAvailableLanguages(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Get available languages from Google Cloud Text-to-Speech.
|
||||
|
||||
Returns:
|
||||
Dict containing success status and list of available languages
|
||||
"""
|
||||
try:
|
||||
logger.info("🌐 Getting available languages from Google Cloud TTS")
|
||||
|
||||
# List voices from Google Cloud TTS
|
||||
response = self.tts_client.list_voices()
|
||||
|
||||
# Extract unique language codes
|
||||
# Note: Google TTS API doesn't provide language descriptions, only codes
|
||||
language_codes = set()
|
||||
for voice in response.voices:
|
||||
if voice.language_codes:
|
||||
language_codes.update(voice.language_codes)
|
||||
|
||||
# Convert to sorted list of language codes
|
||||
available_languages = sorted(list(language_codes))
|
||||
|
||||
logger.info(f"✅ Found {len(available_languages)} available languages")
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"languages": available_languages
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Failed to get available languages: {e}")
|
||||
return {
|
||||
"success": False,
|
||||
"error": str(e),
|
||||
"languages": []
|
||||
}
|
||||
|
||||
return _catalogDefaultVoice(languageCode)
|
||||
|
||||
async def getAvailableVoices(self, languageCode: Optional[str] = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Get available voices from Google Cloud Text-to-Speech.
|
||||
|
|
|
|||
|
|
@ -338,36 +338,11 @@ class VoiceObjects:
|
|||
"error": str(e)
|
||||
}
|
||||
|
||||
# Language and Voice Information
|
||||
|
||||
async def getAvailableLanguages(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Get available languages from Google Cloud Text-to-Speech.
|
||||
|
||||
Returns:
|
||||
Dict containing success status and list of available languages
|
||||
"""
|
||||
try:
|
||||
logger.info("🌐 Getting available languages from Google Cloud TTS")
|
||||
|
||||
connector = self._getGoogleSpeechConnector()
|
||||
result = await connector.getAvailableLanguages()
|
||||
|
||||
if result["success"]:
|
||||
logger.info(f"✅ Found {len(result['languages'])} available languages")
|
||||
else:
|
||||
logger.warning(f"⚠️ Failed to get languages: {result.get('error', 'Unknown error')}")
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Error getting available languages: {e}")
|
||||
return {
|
||||
"success": False,
|
||||
"error": str(e),
|
||||
"languages": []
|
||||
}
|
||||
|
||||
# Voice Information
|
||||
# Note: Available languages live in the central voice catalog
|
||||
# (modules.shared.voiceCatalog); voice picks per language stay live from
|
||||
# Google so users can see all available speakers per locale.
|
||||
|
||||
async def getAvailableVoices(self, languageCode: Optional[str] = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Get available voices from Google Cloud Text-to-Speech.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@ from typing import Optional, Dict, Any, List
|
|||
from modules.auth import getCurrentUser, getRequestContext, RequestContext, limiter
|
||||
from modules.datamodels.datamodelUam import User
|
||||
from modules.interfaces.interfaceVoiceObjects import getVoiceInterface, VoiceObjects
|
||||
from modules.shared.voiceCatalog import getCatalogPayload
|
||||
logger = logging.getLogger(__name__)
|
||||
router = APIRouter(prefix="/voice-google", tags=["Voice Google"])
|
||||
|
||||
|
|
@ -61,32 +62,15 @@ def _getVoiceInterface(currentUser: User) -> VoiceObjects:
|
|||
|
||||
@router.get("/languages")
|
||||
async def get_available_languages(currentUser: User = Depends(getCurrentUser)):
|
||||
"""Get available languages from Google Cloud Text-to-Speech."""
|
||||
try:
|
||||
logger.info("🌐 Getting available languages from Google Cloud TTS")
|
||||
|
||||
voiceInterface = _getVoiceInterface(currentUser)
|
||||
result = await voiceInterface.getAvailableLanguages()
|
||||
|
||||
if result["success"]:
|
||||
return {
|
||||
"success": True,
|
||||
"languages": result["languages"]
|
||||
}
|
||||
else:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Failed to get languages: {result.get('error', 'Unknown error')}"
|
||||
)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Get languages error: {e}")
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Failed to get available languages: {str(e)}"
|
||||
)
|
||||
"""Return the curated voice/language catalog (single source of truth).
|
||||
|
||||
Each entry: {bcp47, iso, label, flag, defaultVoice}. Same payload as
|
||||
/api/voice/languages — both endpoints back the same catalog.
|
||||
"""
|
||||
return {
|
||||
"success": True,
|
||||
"languages": getCatalogPayload(),
|
||||
}
|
||||
|
||||
@router.get("/voices")
|
||||
async def get_available_voices(
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@ from modules.datamodels.datamodelUam import User, UserVoicePreferences, _normali
|
|||
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||
from modules.interfaces.interfaceVoiceObjects import getVoiceInterface
|
||||
from modules.shared.i18nRegistry import apiRouteContext
|
||||
from modules.shared.voiceCatalog import getCatalogPayload
|
||||
routeApiMsg = apiRouteContext("routeVoiceUser")
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
@ -101,11 +102,11 @@ async def getVoiceLanguages(
|
|||
request: Request,
|
||||
currentUser: User = Depends(getCurrentUser),
|
||||
) -> Dict[str, Any]:
|
||||
"""Return available TTS languages (user-level, no instance context needed)."""
|
||||
voiceInterface = getVoiceInterface(currentUser)
|
||||
languagesResult = await voiceInterface.getAvailableLanguages()
|
||||
languageList = languagesResult.get("languages", []) if isinstance(languagesResult, dict) else languagesResult
|
||||
return {"languages": languageList}
|
||||
"""Return the curated voice/language catalog (single source of truth).
|
||||
|
||||
Each entry: {bcp47, iso, label, flag, defaultVoice}.
|
||||
"""
|
||||
return {"languages": getCatalogPayload()}
|
||||
|
||||
|
||||
@router.get("/voices")
|
||||
|
|
|
|||
|
|
@ -395,25 +395,17 @@ def _registerMediaTools(registry: ToolRegistry, services):
|
|||
|
||||
try:
|
||||
from modules.interfaces.interfaceVoiceObjects import getVoiceInterface
|
||||
from modules.shared.voiceCatalog import isoToBcp47
|
||||
mandateId = context.get("mandateId", "")
|
||||
voiceInterface = getVoiceInterface(currentUser=None, mandateId=mandateId)
|
||||
|
||||
_ISO_TO_BCP47 = {
|
||||
"de": "de-DE", "en": "en-US", "fr": "fr-FR", "it": "it-IT",
|
||||
"es": "es-ES", "pt": "pt-BR", "nl": "nl-NL", "pl": "pl-PL",
|
||||
"ru": "ru-RU", "ja": "ja-JP", "zh": "zh-CN", "ko": "ko-KR",
|
||||
"ar": "ar-XA", "hi": "hi-IN", "tr": "tr-TR", "sv": "sv-SE",
|
||||
}
|
||||
|
||||
if language == "auto":
|
||||
try:
|
||||
snippet = cleanText[:500]
|
||||
detectResult = await voiceInterface.detectLanguage(snippet)
|
||||
if detectResult and detectResult.get("success"):
|
||||
detected = detectResult.get("language", "de")
|
||||
language = _ISO_TO_BCP47.get(detected, detected)
|
||||
if "-" not in language:
|
||||
language = _ISO_TO_BCP47.get(language, f"{language}-{language.upper()}")
|
||||
language = isoToBcp47(detected) or "de-DE"
|
||||
logger.info(f"textToSpeech: auto-detected language '{detected}' -> '{language}'")
|
||||
else:
|
||||
language = "de-DE"
|
||||
|
|
|
|||
136
modules/shared/voiceCatalog.py
Normal file
136
modules/shared/voiceCatalog.py
Normal file
|
|
@ -0,0 +1,136 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""
|
||||
Voice / Language Catalog — Single Source of Truth.
|
||||
|
||||
Every voice-related component (TTS connector, AI tools, REST routes, frontend
|
||||
language pickers) consumes this catalog. Hard-coded language lists or ad-hoc
|
||||
ISO→BCP-47 maps elsewhere are forbidden — extend the catalog instead.
|
||||
|
||||
Schema per entry:
|
||||
bcp47 BCP-47 locale code, e.g. "de-DE", "ru-RU"
|
||||
iso ISO-639-1 short code, e.g. "de", "ru"
|
||||
label Native display label ("Deutsch", "Русский")
|
||||
flag Emoji flag (or empty string for region-neutral codes)
|
||||
defaultVoice Curated Google TTS voice name; None means "let Google
|
||||
pick automatically based on bcp47 + ssml_gender".
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import asdict, dataclass
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class VoiceLanguage:
|
||||
bcp47: str
|
||||
iso: str
|
||||
label: str
|
||||
flag: str
|
||||
defaultVoice: Optional[str]
|
||||
|
||||
|
||||
# Order matters for UI: most common first, then alphabetical groups.
|
||||
VOICE_LANGUAGES: List[VoiceLanguage] = [
|
||||
VoiceLanguage("de-DE", "de", "Deutsch", "🇩🇪", "de-DE-Wavenet-A"),
|
||||
VoiceLanguage("de-CH", "de", "Deutsch (Schweiz)", "🇨🇭", "de-DE-Wavenet-A"),
|
||||
VoiceLanguage("de-AT", "de", "Deutsch (Österreich)", "🇦🇹", "de-DE-Wavenet-A"),
|
||||
VoiceLanguage("en-US", "en", "English (US)", "🇺🇸", "en-US-Wavenet-C"),
|
||||
VoiceLanguage("en-GB", "en", "English (UK)", "🇬🇧", "en-GB-Wavenet-A"),
|
||||
VoiceLanguage("en-AU", "en", "English (Australia)", "🇦🇺", "en-AU-Wavenet-A"),
|
||||
VoiceLanguage("fr-FR", "fr", "Français", "🇫🇷", "fr-FR-Wavenet-A"),
|
||||
VoiceLanguage("fr-CA", "fr", "Français (Canada)", "🇨🇦", "fr-CA-Wavenet-A"),
|
||||
VoiceLanguage("it-IT", "it", "Italiano", "🇮🇹", "it-IT-Wavenet-A"),
|
||||
VoiceLanguage("es-ES", "es", "Español", "🇪🇸", "es-ES-Wavenet-B"),
|
||||
VoiceLanguage("es-US", "es", "Español (US)", "🇺🇸", "es-US-Wavenet-A"),
|
||||
VoiceLanguage("pt-BR", "pt", "Português (Brasil)", "🇧🇷", "pt-BR-Wavenet-A"),
|
||||
VoiceLanguage("pt-PT", "pt", "Português (Portugal)", "🇵🇹", "pt-PT-Wavenet-A"),
|
||||
VoiceLanguage("nl-NL", "nl", "Nederlands", "🇳🇱", "nl-NL-Wavenet-A"),
|
||||
VoiceLanguage("pl-PL", "pl", "Polski", "🇵🇱", "pl-PL-Wavenet-A"),
|
||||
VoiceLanguage("ru-RU", "ru", "Русский", "🇷🇺", "ru-RU-Wavenet-A"),
|
||||
VoiceLanguage("uk-UA", "uk", "Українська", "🇺🇦", "uk-UA-Wavenet-A"),
|
||||
VoiceLanguage("cs-CZ", "cs", "Čeština", "🇨🇿", "cs-CZ-Wavenet-A"),
|
||||
VoiceLanguage("sk-SK", "sk", "Slovenčina", "🇸🇰", "sk-SK-Wavenet-A"),
|
||||
VoiceLanguage("hu-HU", "hu", "Magyar", "🇭🇺", "hu-HU-Wavenet-A"),
|
||||
VoiceLanguage("ro-RO", "ro", "Română", "🇷🇴", "ro-RO-Wavenet-A"),
|
||||
VoiceLanguage("el-GR", "el", "Ελληνικά", "🇬🇷", "el-GR-Wavenet-A"),
|
||||
VoiceLanguage("sv-SE", "sv", "Svenska", "🇸🇪", "sv-SE-Wavenet-A"),
|
||||
VoiceLanguage("da-DK", "da", "Dansk", "🇩🇰", "da-DK-Wavenet-A"),
|
||||
VoiceLanguage("nb-NO", "nb", "Norsk", "🇳🇴", "nb-NO-Wavenet-A"),
|
||||
VoiceLanguage("fi-FI", "fi", "Suomi", "🇫🇮", "fi-FI-Wavenet-A"),
|
||||
VoiceLanguage("tr-TR", "tr", "Türkçe", "🇹🇷", "tr-TR-Wavenet-A"),
|
||||
VoiceLanguage("ar-XA", "ar", "العربية", "", "ar-XA-Wavenet-A"),
|
||||
VoiceLanguage("hi-IN", "hi", "हिन्दी", "🇮🇳", "hi-IN-Wavenet-A"),
|
||||
VoiceLanguage("ja-JP", "ja", "日本語", "🇯🇵", "ja-JP-Wavenet-A"),
|
||||
VoiceLanguage("ko-KR", "ko", "한국어", "🇰🇷", "ko-KR-Wavenet-A"),
|
||||
VoiceLanguage("zh-CN", "zh", "中文 (简体)", "🇨🇳", "cmn-CN-Wavenet-A"),
|
||||
VoiceLanguage("vi-VN", "vi", "Tiếng Việt", "🇻🇳", "vi-VN-Wavenet-A"),
|
||||
VoiceLanguage("th-TH", "th", "ไทย", "🇹🇭", "th-TH-Standard-A"),
|
||||
VoiceLanguage("id-ID", "id", "Bahasa Indonesia", "🇮🇩", "id-ID-Wavenet-A"),
|
||||
]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Lookup indexes (built once at import).
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_BY_BCP47: Dict[str, VoiceLanguage] = {v.bcp47.lower(): v for v in VOICE_LANGUAGES}
|
||||
_BY_ISO: Dict[str, VoiceLanguage] = {}
|
||||
for _v in VOICE_LANGUAGES:
|
||||
_BY_ISO.setdefault(_v.iso.lower(), _v)
|
||||
|
||||
|
||||
def listVoiceLanguages() -> List[VoiceLanguage]:
|
||||
"""Return the canonical, ordered list of supported voice languages."""
|
||||
return list(VOICE_LANGUAGES)
|
||||
|
||||
|
||||
def getCatalogPayload() -> List[Dict[str, Optional[str]]]:
|
||||
"""Return the catalog as plain dicts — ready for JSON serialization."""
|
||||
return [asdict(v) for v in VOICE_LANGUAGES]
|
||||
|
||||
|
||||
def getByBcp47(code: Optional[str]) -> Optional[VoiceLanguage]:
|
||||
if not code:
|
||||
return None
|
||||
return _BY_BCP47.get(code.strip().lower())
|
||||
|
||||
|
||||
def getByIso(code: Optional[str]) -> Optional[VoiceLanguage]:
|
||||
if not code:
|
||||
return None
|
||||
return _BY_ISO.get(code.strip().lower())
|
||||
|
||||
|
||||
def getDefaultVoice(bcp47: Optional[str]) -> Optional[str]:
|
||||
"""Return the curated default Google TTS voice for a BCP-47 code, else None.
|
||||
|
||||
None means: caller must omit `name` in VoiceSelectionParams so Google
|
||||
auto-selects a voice for the language code.
|
||||
"""
|
||||
entry = getByBcp47(bcp47)
|
||||
return entry.defaultVoice if entry else None
|
||||
|
||||
|
||||
def isoToBcp47(iso: Optional[str]) -> Optional[str]:
|
||||
"""Map an ISO-639-1 short code to the canonical BCP-47 locale.
|
||||
|
||||
Already-qualified BCP-47 inputs are passed through unchanged (canonicalised
|
||||
to the catalog form when known). Unknown ISO codes fall back to
|
||||
``<iso>-<ISO>`` (e.g. "fa" → "fa-FA") so callers always get a parseable
|
||||
locale, but unknown codes carry no curated voice.
|
||||
"""
|
||||
if not iso:
|
||||
return None
|
||||
normalized = iso.strip()
|
||||
if not normalized:
|
||||
return None
|
||||
if "-" in normalized:
|
||||
canonical = getByBcp47(normalized)
|
||||
return canonical.bcp47 if canonical else normalized
|
||||
isoLower = normalized.lower()
|
||||
entry = _BY_ISO.get(isoLower)
|
||||
if entry:
|
||||
return entry.bcp47
|
||||
return f"{isoLower}-{isoLower.upper()}"
|
||||
Loading…
Reference in a new issue