diff --git a/modules/connectors/connectorVoiceGoogle.py b/modules/connectors/connectorVoiceGoogle.py index 1dc0912b..f875c72c 100644 --- a/modules/connectors/connectorVoiceGoogle.py +++ b/modules/connectors/connectorVoiceGoogle.py @@ -15,6 +15,7 @@ from google.cloud import speech from google.cloud import translate_v2 as translate from google.cloud import texttospeech from modules.shared.configuration import APP_CONFIG +from modules.shared.voiceCatalog import getDefaultVoice as _catalogDefaultVoice logger = logging.getLogger(__name__) @@ -940,33 +941,26 @@ class ConnectorGoogleSpeech: stripped = voiceName.strip() return bool(stripped) and "-" not in stripped - async def textToSpeech(self, text: str, languageCode: str = "de-DE", voiceName: str = None) -> Dict[str, Any]: + async def textToSpeech(self, text: str, languageCode: str = "de-DE", voiceName: Optional[str] = None) -> Dict[str, Any]: """ Convert text to speech using Google Cloud Text-to-Speech. - + Args: text: Text to convert to speech - language_code: Language code (e.g., 'de-DE', 'en-US') - voice_name: Specific voice name (optional) - + languageCode: BCP-47 language code (e.g., 'de-DE', 'en-US', 'ru-RU') + voiceName: Specific voice name (optional). If omitted, a curated + default is used; if no curated default exists for the language, + Google selects a default voice automatically based on + languageCode + ssml_gender (no hard failure). + Returns: Dict with success status and audio data """ try: logger.info(f"Converting text to speech: '{text[:50]}...' in {languageCode}") - - # Build the voice request + selectedVoice = voiceName or self._getDefaultVoice(languageCode) - - if not selectedVoice: - return { - "success": False, - "error": f"No voice specified for language {languageCode}. Please select a voice." - } - - logger.info(f"Using TTS voice: {selectedVoice} for language: {languageCode}") - - isGeminiVoice = self._isGeminiTtsSpeakerVoiceName(selectedVoice) + isGeminiVoice = self._isGeminiTtsSpeakerVoiceName(selectedVoice) if selectedVoice else False if isGeminiVoice: synthesisInput = texttospeech.SynthesisInput( @@ -981,11 +975,23 @@ class ConnectorGoogleSpeech: ) else: synthesisInput = texttospeech.SynthesisInput(text=text) - voice = texttospeech.VoiceSelectionParams( - language_code=languageCode, - name=selectedVoice, - ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL, - ) + voiceKwargs: Dict[str, Any] = { + "language_code": languageCode, + "ssml_gender": texttospeech.SsmlVoiceGender.NEUTRAL, + } + if selectedVoice: + voiceKwargs["name"] = selectedVoice + else: + logger.info( + f"TTS: no curated voice for '{languageCode}', " + f"letting Google auto-select by language + gender" + ) + voice = texttospeech.VoiceSelectionParams(**voiceKwargs) + + logger.info( + f"Using TTS voice: {selectedVoice or ''} " + f"for language: {languageCode}" + ) audioConfig = texttospeech.AudioConfig( audio_encoding=texttospeech.AudioEncoding.MP3 @@ -994,16 +1000,15 @@ class ConnectorGoogleSpeech: response = self.tts_client.synthesize_speech( input=synthesisInput, voice=voice, - audio_config=audioConfig + audio_config=audioConfig, ) - # Return the audio content return { "success": True, "audio_content": response.audio_content, "audio_format": "mp3", "language_code": languageCode, - "voice_name": voice.name + "voice_name": selectedVoice or "", } except Exception as e: @@ -1018,59 +1023,15 @@ class ConnectorGoogleSpeech: "error": f"Text-to-Speech failed: {detail}{extra}", } - def _getDefaultVoice(self, languageCode: str) -> str: + def _getDefaultVoice(self, languageCode: str) -> Optional[str]: + """Return the curated default Google TTS voice for `languageCode`. + + Delegates to the central voice catalog; returns None when no curated + voice exists, in which case the caller omits `name` and Google + auto-selects based on languageCode + ssml_gender. """ - Get default voice name for a language code. - Falls back to a Wavenet voice for common languages. - """ - _defaults = { - "de-DE": "de-DE-Wavenet-A", - "de-CH": "de-DE-Wavenet-A", - "en-US": "en-US-Wavenet-C", - "en-GB": "en-GB-Wavenet-A", - "fr-FR": "fr-FR-Wavenet-A", - "it-IT": "it-IT-Wavenet-A", - } - return _defaults.get(languageCode) - - async def getAvailableLanguages(self) -> Dict[str, Any]: - """ - Get available languages from Google Cloud Text-to-Speech. - - Returns: - Dict containing success status and list of available languages - """ - try: - logger.info("🌐 Getting available languages from Google Cloud TTS") - - # List voices from Google Cloud TTS - response = self.tts_client.list_voices() - - # Extract unique language codes - # Note: Google TTS API doesn't provide language descriptions, only codes - language_codes = set() - for voice in response.voices: - if voice.language_codes: - language_codes.update(voice.language_codes) - - # Convert to sorted list of language codes - available_languages = sorted(list(language_codes)) - - logger.info(f"✅ Found {len(available_languages)} available languages") - - return { - "success": True, - "languages": available_languages - } - - except Exception as e: - logger.error(f"❌ Failed to get available languages: {e}") - return { - "success": False, - "error": str(e), - "languages": [] - } - + return _catalogDefaultVoice(languageCode) + async def getAvailableVoices(self, languageCode: Optional[str] = None) -> Dict[str, Any]: """ Get available voices from Google Cloud Text-to-Speech. diff --git a/modules/interfaces/interfaceVoiceObjects.py b/modules/interfaces/interfaceVoiceObjects.py index 69962259..d0b6f461 100644 --- a/modules/interfaces/interfaceVoiceObjects.py +++ b/modules/interfaces/interfaceVoiceObjects.py @@ -338,36 +338,11 @@ class VoiceObjects: "error": str(e) } - # Language and Voice Information - - async def getAvailableLanguages(self) -> Dict[str, Any]: - """ - Get available languages from Google Cloud Text-to-Speech. - - Returns: - Dict containing success status and list of available languages - """ - try: - logger.info("🌐 Getting available languages from Google Cloud TTS") - - connector = self._getGoogleSpeechConnector() - result = await connector.getAvailableLanguages() - - if result["success"]: - logger.info(f"✅ Found {len(result['languages'])} available languages") - else: - logger.warning(f"⚠️ Failed to get languages: {result.get('error', 'Unknown error')}") - - return result - - except Exception as e: - logger.error(f"❌ Error getting available languages: {e}") - return { - "success": False, - "error": str(e), - "languages": [] - } - + # Voice Information + # Note: Available languages live in the central voice catalog + # (modules.shared.voiceCatalog); voice picks per language stay live from + # Google so users can see all available speakers per locale. + async def getAvailableVoices(self, languageCode: Optional[str] = None) -> Dict[str, Any]: """ Get available voices from Google Cloud Text-to-Speech. diff --git a/modules/routes/routeVoiceGoogle.py b/modules/routes/routeVoiceGoogle.py index dfa1a15e..8987e73f 100644 --- a/modules/routes/routeVoiceGoogle.py +++ b/modules/routes/routeVoiceGoogle.py @@ -17,6 +17,7 @@ from typing import Optional, Dict, Any, List from modules.auth import getCurrentUser, getRequestContext, RequestContext, limiter from modules.datamodels.datamodelUam import User from modules.interfaces.interfaceVoiceObjects import getVoiceInterface, VoiceObjects +from modules.shared.voiceCatalog import getCatalogPayload logger = logging.getLogger(__name__) router = APIRouter(prefix="/voice-google", tags=["Voice Google"]) @@ -61,32 +62,15 @@ def _getVoiceInterface(currentUser: User) -> VoiceObjects: @router.get("/languages") async def get_available_languages(currentUser: User = Depends(getCurrentUser)): - """Get available languages from Google Cloud Text-to-Speech.""" - try: - logger.info("🌐 Getting available languages from Google Cloud TTS") - - voiceInterface = _getVoiceInterface(currentUser) - result = await voiceInterface.getAvailableLanguages() - - if result["success"]: - return { - "success": True, - "languages": result["languages"] - } - else: - raise HTTPException( - status_code=400, - detail=f"Failed to get languages: {result.get('error', 'Unknown error')}" - ) - - except HTTPException: - raise - except Exception as e: - logger.error(f"❌ Get languages error: {e}") - raise HTTPException( - status_code=500, - detail=f"Failed to get available languages: {str(e)}" - ) + """Return the curated voice/language catalog (single source of truth). + + Each entry: {bcp47, iso, label, flag, defaultVoice}. Same payload as + /api/voice/languages — both endpoints back the same catalog. + """ + return { + "success": True, + "languages": getCatalogPayload(), + } @router.get("/voices") async def get_available_voices( diff --git a/modules/routes/routeVoiceUser.py b/modules/routes/routeVoiceUser.py index a3c3fda7..4edbdf0d 100644 --- a/modules/routes/routeVoiceUser.py +++ b/modules/routes/routeVoiceUser.py @@ -18,6 +18,7 @@ from modules.datamodels.datamodelUam import User, UserVoicePreferences, _normali from modules.interfaces.interfaceDbApp import getRootInterface from modules.interfaces.interfaceVoiceObjects import getVoiceInterface from modules.shared.i18nRegistry import apiRouteContext +from modules.shared.voiceCatalog import getCatalogPayload routeApiMsg = apiRouteContext("routeVoiceUser") logger = logging.getLogger(__name__) @@ -101,11 +102,11 @@ async def getVoiceLanguages( request: Request, currentUser: User = Depends(getCurrentUser), ) -> Dict[str, Any]: - """Return available TTS languages (user-level, no instance context needed).""" - voiceInterface = getVoiceInterface(currentUser) - languagesResult = await voiceInterface.getAvailableLanguages() - languageList = languagesResult.get("languages", []) if isinstance(languagesResult, dict) else languagesResult - return {"languages": languageList} + """Return the curated voice/language catalog (single source of truth). + + Each entry: {bcp47, iso, label, flag, defaultVoice}. + """ + return {"languages": getCatalogPayload()} @router.get("/voices") diff --git a/modules/serviceCenter/services/serviceAgent/coreTools/_mediaTools.py b/modules/serviceCenter/services/serviceAgent/coreTools/_mediaTools.py index da6e616c..83f6e990 100644 --- a/modules/serviceCenter/services/serviceAgent/coreTools/_mediaTools.py +++ b/modules/serviceCenter/services/serviceAgent/coreTools/_mediaTools.py @@ -395,25 +395,17 @@ def _registerMediaTools(registry: ToolRegistry, services): try: from modules.interfaces.interfaceVoiceObjects import getVoiceInterface + from modules.shared.voiceCatalog import isoToBcp47 mandateId = context.get("mandateId", "") voiceInterface = getVoiceInterface(currentUser=None, mandateId=mandateId) - _ISO_TO_BCP47 = { - "de": "de-DE", "en": "en-US", "fr": "fr-FR", "it": "it-IT", - "es": "es-ES", "pt": "pt-BR", "nl": "nl-NL", "pl": "pl-PL", - "ru": "ru-RU", "ja": "ja-JP", "zh": "zh-CN", "ko": "ko-KR", - "ar": "ar-XA", "hi": "hi-IN", "tr": "tr-TR", "sv": "sv-SE", - } - if language == "auto": try: snippet = cleanText[:500] detectResult = await voiceInterface.detectLanguage(snippet) if detectResult and detectResult.get("success"): detected = detectResult.get("language", "de") - language = _ISO_TO_BCP47.get(detected, detected) - if "-" not in language: - language = _ISO_TO_BCP47.get(language, f"{language}-{language.upper()}") + language = isoToBcp47(detected) or "de-DE" logger.info(f"textToSpeech: auto-detected language '{detected}' -> '{language}'") else: language = "de-DE" diff --git a/modules/shared/voiceCatalog.py b/modules/shared/voiceCatalog.py new file mode 100644 index 00000000..2e98902e --- /dev/null +++ b/modules/shared/voiceCatalog.py @@ -0,0 +1,136 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. +""" +Voice / Language Catalog — Single Source of Truth. + +Every voice-related component (TTS connector, AI tools, REST routes, frontend +language pickers) consumes this catalog. Hard-coded language lists or ad-hoc +ISO→BCP-47 maps elsewhere are forbidden — extend the catalog instead. + +Schema per entry: + bcp47 BCP-47 locale code, e.g. "de-DE", "ru-RU" + iso ISO-639-1 short code, e.g. "de", "ru" + label Native display label ("Deutsch", "Русский") + flag Emoji flag (or empty string for region-neutral codes) + defaultVoice Curated Google TTS voice name; None means "let Google + pick automatically based on bcp47 + ssml_gender". +""" + +from __future__ import annotations + +from dataclasses import asdict, dataclass +from typing import Dict, List, Optional + + +@dataclass(frozen=True) +class VoiceLanguage: + bcp47: str + iso: str + label: str + flag: str + defaultVoice: Optional[str] + + +# Order matters for UI: most common first, then alphabetical groups. +VOICE_LANGUAGES: List[VoiceLanguage] = [ + VoiceLanguage("de-DE", "de", "Deutsch", "🇩🇪", "de-DE-Wavenet-A"), + VoiceLanguage("de-CH", "de", "Deutsch (Schweiz)", "🇨🇭", "de-DE-Wavenet-A"), + VoiceLanguage("de-AT", "de", "Deutsch (Österreich)", "🇦🇹", "de-DE-Wavenet-A"), + VoiceLanguage("en-US", "en", "English (US)", "🇺🇸", "en-US-Wavenet-C"), + VoiceLanguage("en-GB", "en", "English (UK)", "🇬🇧", "en-GB-Wavenet-A"), + VoiceLanguage("en-AU", "en", "English (Australia)", "🇦🇺", "en-AU-Wavenet-A"), + VoiceLanguage("fr-FR", "fr", "Français", "🇫🇷", "fr-FR-Wavenet-A"), + VoiceLanguage("fr-CA", "fr", "Français (Canada)", "🇨🇦", "fr-CA-Wavenet-A"), + VoiceLanguage("it-IT", "it", "Italiano", "🇮🇹", "it-IT-Wavenet-A"), + VoiceLanguage("es-ES", "es", "Español", "🇪🇸", "es-ES-Wavenet-B"), + VoiceLanguage("es-US", "es", "Español (US)", "🇺🇸", "es-US-Wavenet-A"), + VoiceLanguage("pt-BR", "pt", "Português (Brasil)", "🇧🇷", "pt-BR-Wavenet-A"), + VoiceLanguage("pt-PT", "pt", "Português (Portugal)", "🇵🇹", "pt-PT-Wavenet-A"), + VoiceLanguage("nl-NL", "nl", "Nederlands", "🇳🇱", "nl-NL-Wavenet-A"), + VoiceLanguage("pl-PL", "pl", "Polski", "🇵🇱", "pl-PL-Wavenet-A"), + VoiceLanguage("ru-RU", "ru", "Русский", "🇷🇺", "ru-RU-Wavenet-A"), + VoiceLanguage("uk-UA", "uk", "Українська", "🇺🇦", "uk-UA-Wavenet-A"), + VoiceLanguage("cs-CZ", "cs", "Čeština", "🇨🇿", "cs-CZ-Wavenet-A"), + VoiceLanguage("sk-SK", "sk", "Slovenčina", "🇸🇰", "sk-SK-Wavenet-A"), + VoiceLanguage("hu-HU", "hu", "Magyar", "🇭🇺", "hu-HU-Wavenet-A"), + VoiceLanguage("ro-RO", "ro", "Română", "🇷🇴", "ro-RO-Wavenet-A"), + VoiceLanguage("el-GR", "el", "Ελληνικά", "🇬🇷", "el-GR-Wavenet-A"), + VoiceLanguage("sv-SE", "sv", "Svenska", "🇸🇪", "sv-SE-Wavenet-A"), + VoiceLanguage("da-DK", "da", "Dansk", "🇩🇰", "da-DK-Wavenet-A"), + VoiceLanguage("nb-NO", "nb", "Norsk", "🇳🇴", "nb-NO-Wavenet-A"), + VoiceLanguage("fi-FI", "fi", "Suomi", "🇫🇮", "fi-FI-Wavenet-A"), + VoiceLanguage("tr-TR", "tr", "Türkçe", "🇹🇷", "tr-TR-Wavenet-A"), + VoiceLanguage("ar-XA", "ar", "العربية", "", "ar-XA-Wavenet-A"), + VoiceLanguage("hi-IN", "hi", "हिन्दी", "🇮🇳", "hi-IN-Wavenet-A"), + VoiceLanguage("ja-JP", "ja", "日本語", "🇯🇵", "ja-JP-Wavenet-A"), + VoiceLanguage("ko-KR", "ko", "한국어", "🇰🇷", "ko-KR-Wavenet-A"), + VoiceLanguage("zh-CN", "zh", "中文 (简体)", "🇨🇳", "cmn-CN-Wavenet-A"), + VoiceLanguage("vi-VN", "vi", "Tiếng Việt", "🇻🇳", "vi-VN-Wavenet-A"), + VoiceLanguage("th-TH", "th", "ไทย", "🇹🇭", "th-TH-Standard-A"), + VoiceLanguage("id-ID", "id", "Bahasa Indonesia", "🇮🇩", "id-ID-Wavenet-A"), +] + + +# --------------------------------------------------------------------------- +# Lookup indexes (built once at import). +# --------------------------------------------------------------------------- + +_BY_BCP47: Dict[str, VoiceLanguage] = {v.bcp47.lower(): v for v in VOICE_LANGUAGES} +_BY_ISO: Dict[str, VoiceLanguage] = {} +for _v in VOICE_LANGUAGES: + _BY_ISO.setdefault(_v.iso.lower(), _v) + + +def listVoiceLanguages() -> List[VoiceLanguage]: + """Return the canonical, ordered list of supported voice languages.""" + return list(VOICE_LANGUAGES) + + +def getCatalogPayload() -> List[Dict[str, Optional[str]]]: + """Return the catalog as plain dicts — ready for JSON serialization.""" + return [asdict(v) for v in VOICE_LANGUAGES] + + +def getByBcp47(code: Optional[str]) -> Optional[VoiceLanguage]: + if not code: + return None + return _BY_BCP47.get(code.strip().lower()) + + +def getByIso(code: Optional[str]) -> Optional[VoiceLanguage]: + if not code: + return None + return _BY_ISO.get(code.strip().lower()) + + +def getDefaultVoice(bcp47: Optional[str]) -> Optional[str]: + """Return the curated default Google TTS voice for a BCP-47 code, else None. + + None means: caller must omit `name` in VoiceSelectionParams so Google + auto-selects a voice for the language code. + """ + entry = getByBcp47(bcp47) + return entry.defaultVoice if entry else None + + +def isoToBcp47(iso: Optional[str]) -> Optional[str]: + """Map an ISO-639-1 short code to the canonical BCP-47 locale. + + Already-qualified BCP-47 inputs are passed through unchanged (canonicalised + to the catalog form when known). Unknown ISO codes fall back to + ``-`` (e.g. "fa" → "fa-FA") so callers always get a parseable + locale, but unknown codes carry no curated voice. + """ + if not iso: + return None + normalized = iso.strip() + if not normalized: + return None + if "-" in normalized: + canonical = getByBcp47(normalized) + return canonical.bcp47 if canonical else normalized + isoLower = normalized.lower() + entry = _BY_ISO.get(isoLower) + if entry: + return entry.bcp47 + return f"{isoLower}-{isoLower.upper()}"