voice tts beta test version
This commit is contained in:
parent
7ed36283c5
commit
cf1b302008
35 changed files with 1032 additions and 83 deletions
5
app.py
5
app.py
|
|
@ -250,4 +250,7 @@ from modules.routes.routeSecurityGoogle import router as googleRouter
|
||||||
app.include_router(googleRouter)
|
app.include_router(googleRouter)
|
||||||
|
|
||||||
from modules.routes.routeVoiceGoogle import router as voiceGoogleRouter
|
from modules.routes.routeVoiceGoogle import router as voiceGoogleRouter
|
||||||
app.include_router(voiceGoogleRouter)
|
app.include_router(voiceGoogleRouter)
|
||||||
|
|
||||||
|
from modules.routes.routeVoiceStreaming import router as voiceStreamingRouter
|
||||||
|
app.include_router(voiceStreamingRouter)
|
||||||
14
config.ini
14
config.ini
|
|
@ -49,7 +49,19 @@ Service_GOOGLE_CLIENT_SECRET = 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
|
||||||
Connector_WebTavily_API_KEY = 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
|
Connector_WebTavily_API_KEY = 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
|
||||||
|
|
||||||
# Google Cloud Speech Services configuration
|
# Google Cloud Speech Services configuration
|
||||||
# Set GOOGLE_APPLICATION_CREDENTIALS environment variable or place credentials file in project directory
|
Connector_GoogleSpeech_API_KEY = {
|
||||||
|
"type": "service_account",
|
||||||
|
"project_id": "poweronid",
|
||||||
|
"private_key_id": "88db66e4248326e9baeac4231bc196fd46a9a441",
|
||||||
|
"private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQDTnJuxA+xBL3LA\nPgFILYCsGuppkkdO6d153Q36f2jTj6zpH3OhKMVsaaTBknG2o2+D0Whlk6Yh5rOw\nkWzpMC3y81leRLm5kucERMkBUgd2GL4v16k6m+QGuC3BFlt/XeyuckJNW0V6v/Dy\n3+bSYM7/5o1ftPNWJeAIEWoE/V4wKCYde8RE4Vp1LO5YwhgcM4rRuPmF2OhekpA+\npteYwkY/8/gTTRpZIc8OTsBYRbaMwsjoDj5riuL3boVtkwZwKRb+ZLvupXeU7Ds7\n1305odTcZUwnImHiHfuq83ZJViQiLRNhUAFnQIXPrYLwEpCmzRBGzYHaRlb69ga/\nzqUbKnclAgMBAAECggEAH6W9qHehubioPMAJM7Y6bC2KU/JLNS4csBZd+idb52gG\nwBwIEFjR+H4ZjymhAA4+pe7c4h7MKyh0RI/l7eoFX98Cb+rEq/r1udm1BhGH3s2h\n2UiI8qRQh1YRjF2/nrN5VjhDBOFa6W9opaopZy/l8AzsT8f21zIgPen8z8o6GpFg\n64fJFcbqCGk2ykN2+x2pIOT04tmCszrfbXZP8LEs4xrUB/XwlHL1vT/M3EWIKbnj\njDaIMjw7q/KRgNUvmKS6SU9b3fnOLcQCz9f5cKdiWACKIU/UvuiWhWJ9ou6BWLWU\nva1A6Fi4XJjhW7s3po58/ioQfl0A9p/L92lGg4ST8QKBgQDx8LIM1g0dh9Ql6LmH\nBUGCOewNNXTs+y3ZznUfvVMoyyZK5w/pzeUvkmOwzbRGnZJ9WyCghq8aezyEpo2D\nPL7Odf988IeHmvhyZIM4PLJYgDvSwGXyf/gh6gJkf/4wpx+tx/yQYNBm3Rht7sA0\npSaLehK0E0kW1uyBzHGKgyQOhwKBgQDf6LiZ7hSQqh54vIU1XMDRth0UOo/s/HGi\nDoij29KjmHjLkm8vOlCo83e79X0WhcnyB5kM7nWFegwcM1PJ0Dl8gidUuTlOVDtM\n5u2AaxDoyXAUL457U5dGFAIW+R653ZDkzMfCglacP8HixXEyIpL1cTLqiCAgzszS\nLcSWwoAr8wKBgQC4CGm3X97sFpTmHSd6sCHLaDnJNl9xoAKZifUHpqCqCBVhpm8x\nXp+11vmj1GULzfJPDlE8Khbp4tH+6R39tOhC7fjgVaoSGWxgv1odHfZfYXOf9R/X\nHUZmrbUSM1XsNkPfkZ7pR+teQ1HA1Xo40WMHd1zgw0a2a9fNR/EZ9nUn4wKBgGaK\nUEgGNRrPHadTRnnaoV8o1IZYD2OLdIqvtzm7SOqsv90SkaKCRUAqR5InaYKwAHy7\nqAa5Cc73xqX/h4arujff7x0ouiq5/nJIa0ndPmAtKAvGf6zQ6j0ompBkxAKAioON\nmInmYL2roSI2I5G/LagDkDrB3lzH+Brk5NvZ9RKrAoGAGox462GGGb/NbGdDkahN\ndifzYYvq4FPiWFFo0ynKAulxCBWLXO/N45XNuAyen433d8eREcAYz1Dzax44+MdQ\nHo9dU7YcZvFyt6iZsYeQF8dluHui3vzMpUe0KbqpZC5KMOSw53ZdNIwzo8NTAK59\n+uv3dHGj7sS8fhDo3yCifzc=\n-----END PRIVATE KEY-----\n",
|
||||||
|
"client_email": "poweron-voice-services@poweronid.iam.gserviceaccount.com",
|
||||||
|
"client_id": "116641749406798186404",
|
||||||
|
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
|
||||||
|
"token_uri": "https://oauth2.googleapis.com/token",
|
||||||
|
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
|
||||||
|
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/poweron-voice-services%40poweronid.iam.gserviceaccount.com",
|
||||||
|
"universe_domain": "googleapis.com"
|
||||||
|
}
|
||||||
|
|
||||||
# Web Search configuration
|
# Web Search configuration
|
||||||
Web_Search_MAX_QUERY_LENGTH = 400
|
Web_Search_MAX_QUERY_LENGTH = 400
|
||||||
|
|
|
||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
debug_audio/audio_google_conversation_sliding.webm
Normal file
BIN
debug_audio/audio_google_conversation_sliding.webm
Normal file
Binary file not shown.
BIN
debug_audio/audio_google_interpreter_recording.webm
Normal file
BIN
debug_audio/audio_google_interpreter_recording.webm
Normal file
Binary file not shown.
BIN
debug_audio/audio_google_recording.webm
Normal file
BIN
debug_audio/audio_google_recording.webm
Normal file
Binary file not shown.
|
|
@ -5,11 +5,15 @@ Replaces Azure Speech Services with Google Cloud APIs
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import io
|
import io
|
||||||
|
import json
|
||||||
|
import html
|
||||||
import logging
|
import logging
|
||||||
import asyncio
|
import asyncio
|
||||||
from typing import Dict, Optional, Any
|
from typing import Dict, Optional, Any
|
||||||
from google.cloud import speech
|
from google.cloud import speech
|
||||||
from google.cloud import translate_v2 as translate
|
from google.cloud import translate_v2 as translate
|
||||||
|
from google.cloud import texttospeech
|
||||||
|
from modules.shared.configuration import APP_CONFIG
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
@ -19,21 +23,34 @@ class ConnectorGoogleSpeech:
|
||||||
Handles audio processing, speech recognition, and translation.
|
Handles audio processing, speech recognition, and translation.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, credentials_path: Optional[str] = None):
|
def __init__(self):
|
||||||
"""
|
"""
|
||||||
Initialize Google Cloud Speech and Translation clients.
|
Initialize Google Cloud Speech and Translation clients using config.ini.
|
||||||
|
|
||||||
Args:
|
|
||||||
credentials_path: Path to Google Cloud service account JSON file
|
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
# Set up authentication
|
# Get JSON key from config.ini
|
||||||
if credentials_path:
|
api_key = APP_CONFIG.get("Connector_GoogleSpeech_API_KEY")
|
||||||
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = credentials_path
|
|
||||||
|
|
||||||
# Initialize clients
|
if not api_key or api_key == "YOUR_GOOGLE_SERVICE_ACCOUNT_JSON_KEY_HERE":
|
||||||
self.speech_client = speech.SpeechClient()
|
raise ValueError("Google Speech API key not configured. Please set Connector_GoogleSpeech_API_KEY in config.ini with the full service account JSON key")
|
||||||
self.translate_client = translate.Client()
|
|
||||||
|
# Parse the JSON key and set up authentication
|
||||||
|
try:
|
||||||
|
credentials_info = json.loads(api_key)
|
||||||
|
|
||||||
|
# Create credentials object directly (no file needed!)
|
||||||
|
from google.oauth2 import service_account
|
||||||
|
credentials = service_account.Credentials.from_service_account_info(credentials_info)
|
||||||
|
|
||||||
|
logger.info("✅ Using Google Speech credentials from config.ini")
|
||||||
|
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
raise ValueError(f"Invalid JSON in Google Speech API key: {e}")
|
||||||
|
|
||||||
|
# Initialize clients with explicit credentials
|
||||||
|
self.speech_client = speech.SpeechClient(credentials=credentials)
|
||||||
|
self.translate_client = translate.Client(credentials=credentials)
|
||||||
|
self.tts_client = texttospeech.TextToSpeechClient(credentials=credentials)
|
||||||
|
|
||||||
logger.info("✅ Google Cloud Speech and Translation clients initialized successfully")
|
logger.info("✅ Google Cloud Speech and Translation clients initialized successfully")
|
||||||
|
|
||||||
|
|
@ -42,37 +59,134 @@ class ConnectorGoogleSpeech:
|
||||||
raise
|
raise
|
||||||
|
|
||||||
async def speech_to_text(self, audio_content: bytes, language: str = "de-DE",
|
async def speech_to_text(self, audio_content: bytes, language: str = "de-DE",
|
||||||
sample_rate: int = 16000, channels: int = 1) -> Dict:
|
sample_rate: int = None, channels: int = None) -> Dict:
|
||||||
"""
|
"""
|
||||||
Convert speech to text using Google Cloud Speech-to-Text API.
|
Convert speech to text using Google Cloud Speech-to-Text API.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
audio_content: Raw audio data (PCM format)
|
audio_content: Raw audio data (various formats supported)
|
||||||
language: Language code (e.g., 'de-DE', 'en-US')
|
language: Language code (e.g., 'de-DE', 'en-US')
|
||||||
sample_rate: Audio sample rate (default: 16000 Hz)
|
sample_rate: Audio sample rate (auto-detected if None)
|
||||||
channels: Number of audio channels (default: 1)
|
channels: Number of audio channels (auto-detected if None)
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Dict containing transcribed text, confidence, and metadata
|
Dict containing transcribed text, confidence, and metadata
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
logger.info(f"🎤 Processing audio with Google Cloud Speech-to-Text")
|
# Auto-detect audio format if not provided
|
||||||
logger.info(f"📊 Audio: {len(audio_content)} bytes, {sample_rate}Hz, {channels}ch")
|
if sample_rate is None or channels is None:
|
||||||
|
validation = self.validate_audio_format(audio_content)
|
||||||
|
if not validation["valid"]:
|
||||||
|
return {
|
||||||
|
"success": False,
|
||||||
|
"text": "",
|
||||||
|
"confidence": 0.0,
|
||||||
|
"error": f"Invalid audio format: {validation.get('error', 'Unknown error')}"
|
||||||
|
}
|
||||||
|
sample_rate = validation["sample_rate"]
|
||||||
|
channels = validation["channels"]
|
||||||
|
audio_format = validation["format"]
|
||||||
|
logger.info(f"Auto-detected audio: {audio_format}, {sample_rate}Hz, {channels}ch")
|
||||||
|
|
||||||
|
logger.info(f"Processing audio with Google Cloud Speech-to-Text")
|
||||||
|
logger.info(f"Audio: {len(audio_content)} bytes, {sample_rate}Hz, {channels}ch")
|
||||||
|
|
||||||
# Configure audio settings
|
# Configure audio settings
|
||||||
audio = speech.RecognitionAudio(content=audio_content)
|
audio = speech.RecognitionAudio(content=audio_content)
|
||||||
config = speech.RecognitionConfig(
|
|
||||||
encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
|
# Determine encoding based on detected format
|
||||||
sample_rate_hertz=sample_rate,
|
# Google Cloud Speech API has specific requirements for different formats
|
||||||
audio_channel_count=channels,
|
if audio_format == "webm_opus":
|
||||||
language_code=language,
|
# For WEBM OPUS, we need to ensure proper format
|
||||||
enable_automatic_punctuation=True,
|
encoding = speech.RecognitionConfig.AudioEncoding.WEBM_OPUS
|
||||||
model="latest_long" # Use the latest model
|
# WEBM_OPUS requires specific sample rate handling - must match header
|
||||||
)
|
if sample_rate != 48000:
|
||||||
|
logger.warning(f"WEBM_OPUS detected but sample rate is {sample_rate}, adjusting to 48000")
|
||||||
|
sample_rate = 48000
|
||||||
|
# For WEBM_OPUS, don't specify sample_rate_hertz in config
|
||||||
|
# Google Cloud will read it from the WEBM header
|
||||||
|
use_sample_rate = False
|
||||||
|
elif audio_format == "linear16":
|
||||||
|
# For LINEAR16 format (PCM)
|
||||||
|
encoding = speech.RecognitionConfig.AudioEncoding.LINEAR16
|
||||||
|
# Ensure sample rate is reasonable
|
||||||
|
if sample_rate not in [8000, 16000, 22050, 24000, 32000, 44100, 48000]:
|
||||||
|
logger.warning(f"Unusual sample rate {sample_rate}, adjusting to 16000")
|
||||||
|
sample_rate = 16000
|
||||||
|
use_sample_rate = True
|
||||||
|
elif audio_format == "mp3":
|
||||||
|
# For MP3 format
|
||||||
|
encoding = speech.RecognitionConfig.AudioEncoding.MP3
|
||||||
|
use_sample_rate = True
|
||||||
|
elif audio_format == "flac":
|
||||||
|
# For FLAC format
|
||||||
|
encoding = speech.RecognitionConfig.AudioEncoding.FLAC
|
||||||
|
use_sample_rate = True
|
||||||
|
elif audio_format == "wav":
|
||||||
|
# For WAV format
|
||||||
|
encoding = speech.RecognitionConfig.AudioEncoding.LINEAR16
|
||||||
|
use_sample_rate = True
|
||||||
|
else:
|
||||||
|
# For unknown formats, try LINEAR16 as fallback
|
||||||
|
encoding = speech.RecognitionConfig.AudioEncoding.LINEAR16
|
||||||
|
sample_rate = 16000 # Use standard sample rate
|
||||||
|
channels = 1 # Use mono
|
||||||
|
use_sample_rate = True
|
||||||
|
logger.warning(f"Unknown audio format '{audio_format}', using LINEAR16 encoding with 16000Hz")
|
||||||
|
|
||||||
|
# Build config based on format requirements
|
||||||
|
config_params = {
|
||||||
|
"encoding": encoding,
|
||||||
|
"audio_channel_count": channels,
|
||||||
|
"language_code": language,
|
||||||
|
"enable_automatic_punctuation": True,
|
||||||
|
"model": "latest_long", # Try latest_long model for better recognition
|
||||||
|
"enable_word_time_offsets": True, # Enable word-level timing
|
||||||
|
"enable_word_confidence": True, # Enable word-level confidence
|
||||||
|
"max_alternatives": 3, # Try more alternatives
|
||||||
|
"use_enhanced": True # Use enhanced model if available
|
||||||
|
}
|
||||||
|
|
||||||
|
# Only add sample_rate_hertz if needed (not for WEBM_OPUS)
|
||||||
|
if use_sample_rate:
|
||||||
|
config_params["sample_rate_hertz"] = sample_rate
|
||||||
|
logger.debug(f"Recognition config: encoding={encoding}, sample_rate={sample_rate}, channels={channels}, language={language}")
|
||||||
|
else:
|
||||||
|
logger.debug(f"Recognition config: encoding={encoding}, sample_rate=auto (from header), channels={channels}, language={language}")
|
||||||
|
|
||||||
|
config = speech.RecognitionConfig(**config_params)
|
||||||
|
|
||||||
# Perform speech recognition
|
# Perform speech recognition
|
||||||
logger.info("🔄 Sending audio to Google Cloud Speech-to-Text...")
|
logger.info("Sending audio to Google Cloud Speech-to-Text...")
|
||||||
response = self.speech_client.recognize(config=config, audio=audio)
|
|
||||||
|
try:
|
||||||
|
# Use regular recognition for single audio files (not streaming)
|
||||||
|
logger.info("Using regular recognition for single audio file...")
|
||||||
|
response = self.speech_client.recognize(config=config, audio=audio)
|
||||||
|
logger.debug(f"Google Cloud response: {response}")
|
||||||
|
|
||||||
|
except Exception as api_error:
|
||||||
|
logger.error(f"Google Cloud API error: {api_error}")
|
||||||
|
# Try with different encoding as fallback
|
||||||
|
if encoding != speech.RecognitionConfig.AudioEncoding.LINEAR16:
|
||||||
|
logger.info("Trying fallback with LINEAR16 encoding...")
|
||||||
|
fallback_config = speech.RecognitionConfig(
|
||||||
|
encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
|
||||||
|
sample_rate_hertz=16000, # Use standard sample rate
|
||||||
|
audio_channel_count=1,
|
||||||
|
language_code=language,
|
||||||
|
enable_automatic_punctuation=True,
|
||||||
|
model="latest_long"
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = self.speech_client.recognize(config=fallback_config, audio=audio)
|
||||||
|
logger.debug(f"Google Cloud fallback response: {response}")
|
||||||
|
except Exception as fallback_error:
|
||||||
|
logger.error(f"Google Cloud fallback error: {fallback_error}")
|
||||||
|
raise api_error
|
||||||
|
else:
|
||||||
|
raise api_error
|
||||||
|
|
||||||
# Process results
|
# Process results
|
||||||
if response.results:
|
if response.results:
|
||||||
|
|
@ -82,7 +196,7 @@ class ConnectorGoogleSpeech:
|
||||||
transcribed_text = alternative.transcript
|
transcribed_text = alternative.transcript
|
||||||
confidence = alternative.confidence
|
confidence = alternative.confidence
|
||||||
|
|
||||||
logger.info(f"✅ Transcription successful: '{transcribed_text}' (confidence: {confidence:.2f})")
|
logger.info(f"Transcription successful: '{transcribed_text}' (confidence: {confidence:.2f})")
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"success": True,
|
"success": True,
|
||||||
|
|
@ -96,7 +210,8 @@ class ConnectorGoogleSpeech:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else:
|
else:
|
||||||
logger.warning("⚠️ No transcription alternatives found")
|
logger.warning("No transcription alternatives found")
|
||||||
|
logger.debug(f"Result details: {result}")
|
||||||
return {
|
return {
|
||||||
"success": False,
|
"success": False,
|
||||||
"text": "",
|
"text": "",
|
||||||
|
|
@ -104,16 +219,102 @@ class ConnectorGoogleSpeech:
|
||||||
"error": "No transcription alternatives found"
|
"error": "No transcription alternatives found"
|
||||||
}
|
}
|
||||||
else:
|
else:
|
||||||
logger.warning("⚠️ No recognition results from Google Cloud")
|
logger.warning("No recognition results from Google Cloud")
|
||||||
|
logger.debug(f"Response details: {response}")
|
||||||
|
|
||||||
|
# Check if there are any error messages in the response
|
||||||
|
if hasattr(response, 'error') and response.error:
|
||||||
|
logger.error(f"Google Cloud error: {response.error}")
|
||||||
|
return {
|
||||||
|
"success": False,
|
||||||
|
"text": "",
|
||||||
|
"confidence": 0.0,
|
||||||
|
"error": f"Google Cloud error: {response.error}"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Try multiple fallback approaches
|
||||||
|
fallback_configs = []
|
||||||
|
|
||||||
|
if encoding != speech.RecognitionConfig.AudioEncoding.LINEAR16:
|
||||||
|
# Try LINEAR16 with detected sample rate
|
||||||
|
fallback_configs.append({
|
||||||
|
"encoding": speech.RecognitionConfig.AudioEncoding.LINEAR16,
|
||||||
|
"sample_rate": sample_rate,
|
||||||
|
"channels": channels,
|
||||||
|
"use_sample_rate": True,
|
||||||
|
"description": f"LINEAR16 with {sample_rate}Hz"
|
||||||
|
})
|
||||||
|
|
||||||
|
# Try LINEAR16 with standard sample rates
|
||||||
|
for std_rate in [16000, 8000, 22050, 44100]:
|
||||||
|
if std_rate != sample_rate:
|
||||||
|
fallback_configs.append({
|
||||||
|
"encoding": speech.RecognitionConfig.AudioEncoding.LINEAR16,
|
||||||
|
"sample_rate": std_rate,
|
||||||
|
"channels": 1,
|
||||||
|
"use_sample_rate": True,
|
||||||
|
"description": f"LINEAR16 with {std_rate}Hz"
|
||||||
|
})
|
||||||
|
|
||||||
|
# Try with different models
|
||||||
|
models = ["latest_long", "phone_call", "latest_short"]
|
||||||
|
|
||||||
|
for fallback_config in fallback_configs:
|
||||||
|
for model in models:
|
||||||
|
try:
|
||||||
|
logger.info(f"Trying fallback: {fallback_config['description']} with {model} model...")
|
||||||
|
|
||||||
|
# Build fallback config with proper sample rate handling
|
||||||
|
fallback_config_params = {
|
||||||
|
"encoding": fallback_config["encoding"],
|
||||||
|
"audio_channel_count": fallback_config["channels"],
|
||||||
|
"language_code": language,
|
||||||
|
"enable_automatic_punctuation": True,
|
||||||
|
"model": model
|
||||||
|
}
|
||||||
|
|
||||||
|
# Only add sample_rate_hertz if needed
|
||||||
|
if fallback_config["use_sample_rate"]:
|
||||||
|
fallback_config_params["sample_rate_hertz"] = fallback_config["sample_rate"]
|
||||||
|
|
||||||
|
fallback_config_obj = speech.RecognitionConfig(**fallback_config_params)
|
||||||
|
fallback_response = self.speech_client.recognize(config=fallback_config_obj, audio=audio)
|
||||||
|
|
||||||
|
if fallback_response.results:
|
||||||
|
result = fallback_response.results[0]
|
||||||
|
if result.alternatives:
|
||||||
|
alternative = result.alternatives[0]
|
||||||
|
transcribed_text = alternative.transcript
|
||||||
|
confidence = alternative.confidence
|
||||||
|
|
||||||
|
logger.info(f"Fallback transcription successful: '{transcribed_text}' (confidence: {confidence:.2f})")
|
||||||
|
|
||||||
|
return {
|
||||||
|
"success": True,
|
||||||
|
"text": transcribed_text,
|
||||||
|
"confidence": confidence,
|
||||||
|
"language": language,
|
||||||
|
"raw_result": {
|
||||||
|
"transcript": transcribed_text,
|
||||||
|
"confidence": confidence,
|
||||||
|
"language_code": language
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"Fallback failed: {e}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
logger.warning("All fallback attempts failed")
|
||||||
return {
|
return {
|
||||||
"success": False,
|
"success": False,
|
||||||
"text": "",
|
"text": "",
|
||||||
"confidence": 0.0,
|
"confidence": 0.0,
|
||||||
"error": "No recognition results"
|
"error": "No recognition results - audio may be too short, unclear, or in unsupported format"
|
||||||
}
|
}
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"❌ Google Cloud Speech-to-Text error: {e}")
|
logger.error(f"Google Cloud Speech-to-Text error: {e}")
|
||||||
return {
|
return {
|
||||||
"success": False,
|
"success": False,
|
||||||
"text": "",
|
"text": "",
|
||||||
|
|
@ -155,6 +356,9 @@ class ConnectorGoogleSpeech:
|
||||||
translated_text = result['translatedText']
|
translated_text = result['translatedText']
|
||||||
detected_language = result.get('detectedSourceLanguage', source_language)
|
detected_language = result.get('detectedSourceLanguage', source_language)
|
||||||
|
|
||||||
|
# Decode HTML entities in translated text
|
||||||
|
translated_text = html.unescape(translated_text)
|
||||||
|
|
||||||
logger.info(f"✅ Translation successful: '{translated_text}'")
|
logger.info(f"✅ Translation successful: '{translated_text}'")
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
|
@ -256,26 +460,154 @@ class ConnectorGoogleSpeech:
|
||||||
Dict containing validation results
|
Dict containing validation results
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
# Google Cloud Speech-to-Text supports various formats
|
# Basic validation
|
||||||
# We'll do basic validation
|
|
||||||
if len(audio_content) < 100:
|
if len(audio_content) < 100:
|
||||||
return {
|
return {
|
||||||
"valid": False,
|
"valid": False,
|
||||||
"error": "Audio too short (less than 100 bytes)"
|
"error": "Audio too short (less than 100 bytes)"
|
||||||
}
|
}
|
||||||
|
|
||||||
# Check if it looks like PCM audio (basic check)
|
# Detect audio format by checking file headers
|
||||||
if len(audio_content) % 2 != 0:
|
audio_format = "unknown"
|
||||||
return {
|
sample_rate = 16000 # Default fallback
|
||||||
"valid": False,
|
channels = 1 # Default fallback
|
||||||
"error": "Audio data length is odd (not 16-bit PCM)"
|
|
||||||
}
|
# Debug: Log first few bytes for format detection
|
||||||
|
logger.debug(f"Audio header bytes: {audio_content[:20].hex()}")
|
||||||
|
logger.debug(f"Audio content length: {len(audio_content)} bytes")
|
||||||
|
|
||||||
|
# Check for WEBM/OPUS format (common from web recordings)
|
||||||
|
if audio_content.startswith(b'\x1a\x45\xdf\xa3'):
|
||||||
|
audio_format = "webm_opus"
|
||||||
|
sample_rate = 48000 # WEBM OPUS typically uses 48kHz
|
||||||
|
channels = 1
|
||||||
|
logger.info(f"Detected WEBM OPUS format: {sample_rate}Hz, {channels}ch")
|
||||||
|
|
||||||
|
# Check for specific header patterns seen in logs (43c381...)
|
||||||
|
# This appears to be a different audio format or corrupted WEBM
|
||||||
|
elif audio_content.startswith(b'\x43\xc3\x81') and len(audio_content) > 1000:
|
||||||
|
# This might be a different format or corrupted audio
|
||||||
|
# Try to detect if it's actually WEBM by looking deeper
|
||||||
|
if b'webm' in audio_content[:200] or b'opus' in audio_content[:200]:
|
||||||
|
audio_format = "webm_opus"
|
||||||
|
sample_rate = 48000
|
||||||
|
channels = 1
|
||||||
|
logger.info(f"Detected WEBM format (deep scan): {sample_rate}Hz, {channels}ch")
|
||||||
|
else:
|
||||||
|
# Unknown format, try as LINEAR16
|
||||||
|
audio_format = "linear16"
|
||||||
|
sample_rate = 16000
|
||||||
|
channels = 1
|
||||||
|
logger.warning(f"Unknown audio format with header {audio_content[:8].hex()}, trying LINEAR16")
|
||||||
|
|
||||||
|
# Check for WEBM format (alternative detection)
|
||||||
|
elif b'webm' in audio_content[:100].lower() or b'opus' in audio_content[:100].lower():
|
||||||
|
audio_format = "webm_opus"
|
||||||
|
sample_rate = 48000 # WEBM OPUS typically uses 48kHz
|
||||||
|
channels = 1
|
||||||
|
logger.info(f"Detected WEBM format: {sample_rate}Hz, {channels}ch")
|
||||||
|
|
||||||
|
# Check for MediaRecorder WEBM chunks (common in browser recordings)
|
||||||
|
elif audio_content.startswith(b'\x1a\x45\xdf\xa3') and len(audio_content) > 1000:
|
||||||
|
audio_format = "webm_opus"
|
||||||
|
sample_rate = 48000 # Browser MediaRecorder typically uses 48kHz
|
||||||
|
channels = 1
|
||||||
|
logger.info(f"Detected MediaRecorder WEBM: {sample_rate}Hz, {channels}ch")
|
||||||
|
|
||||||
|
# Check for OPUS format by looking for OPUS magic bytes
|
||||||
|
elif audio_content.startswith(b'OpusHead') or b'OpusHead' in audio_content[:50]:
|
||||||
|
audio_format = "webm_opus"
|
||||||
|
sample_rate = 48000 # OPUS typically uses 48kHz
|
||||||
|
channels = 1
|
||||||
|
logger.info(f"Detected OPUS format: {sample_rate}Hz, {channels}ch")
|
||||||
|
|
||||||
|
# Check for OGG format (often contains OPUS)
|
||||||
|
elif audio_content.startswith(b'OggS'):
|
||||||
|
audio_format = "webm_opus"
|
||||||
|
sample_rate = 48000 # OGG OPUS typically uses 48kHz
|
||||||
|
channels = 1
|
||||||
|
logger.info(f"Detected OGG format: {sample_rate}Hz, {channels}ch")
|
||||||
|
|
||||||
|
# Check for WAV format
|
||||||
|
elif audio_content.startswith(b'RIFF') and b'WAVE' in audio_content[:12]:
|
||||||
|
audio_format = "wav"
|
||||||
|
# Try to extract sample rate from WAV header
|
||||||
|
try:
|
||||||
|
# WAV header sample rate is at offset 24-27 (little endian)
|
||||||
|
sample_rate = int.from_bytes(audio_content[24:28], 'little')
|
||||||
|
channels = int.from_bytes(audio_content[22:24], 'little')
|
||||||
|
logger.info(f"Detected WAV format: {sample_rate}Hz, {channels}ch")
|
||||||
|
except:
|
||||||
|
sample_rate = 16000 # Fallback
|
||||||
|
channels = 1
|
||||||
|
|
||||||
|
# Check for MP3 format
|
||||||
|
elif audio_content.startswith(b'\xff\xfb') or audio_content.startswith(b'ID3'):
|
||||||
|
audio_format = "mp3"
|
||||||
|
sample_rate = 44100 # MP3 typically uses 44.1kHz
|
||||||
|
channels = 2 # Usually stereo
|
||||||
|
logger.info(f"Detected MP3 format: {sample_rate}Hz, {channels}ch")
|
||||||
|
|
||||||
|
# Check for FLAC format
|
||||||
|
elif audio_content.startswith(b'fLaC'):
|
||||||
|
audio_format = "flac"
|
||||||
|
sample_rate = 44100 # Common FLAC sample rate
|
||||||
|
channels = 2
|
||||||
|
logger.info(f"Detected FLAC format: {sample_rate}Hz, {channels}ch")
|
||||||
|
|
||||||
|
else:
|
||||||
|
# Unknown format, try WEBM_OPUS as it's most common for web recordings
|
||||||
|
audio_format = "webm_opus"
|
||||||
|
sample_rate = 48000 # Try 48kHz for web recordings
|
||||||
|
channels = 1
|
||||||
|
logger.warning(f"Unknown audio format, trying WEBM_OPUS: {sample_rate}Hz, {channels}ch")
|
||||||
|
|
||||||
|
# Calculate estimated duration
|
||||||
|
if audio_format == "webm_opus":
|
||||||
|
# WEBM OPUS duration is hard to calculate without decoding
|
||||||
|
estimated_duration = 3.0 # Assume 3 seconds for web recordings
|
||||||
|
else:
|
||||||
|
# Rough estimate for uncompressed audio
|
||||||
|
estimated_duration = len(audio_content) / (sample_rate * channels * 2) # 16-bit = 2 bytes per sample
|
||||||
|
|
||||||
|
# Check if audio is too short (less than 0.5 seconds)
|
||||||
|
if estimated_duration < 0.5:
|
||||||
|
logger.warning(f"Audio too short: {estimated_duration:.2f}s, may not be recognized")
|
||||||
|
|
||||||
|
# Log audio details for debugging
|
||||||
|
logger.info(f"Audio analysis: {len(audio_content)} bytes, {estimated_duration:.2f}s, {sample_rate}Hz, {channels}ch, format={audio_format}")
|
||||||
|
|
||||||
|
# Check audio levels (simple check for silence)
|
||||||
|
if audio_format == "webm_opus":
|
||||||
|
# For WEBM, we can't easily check levels, but log the first few bytes
|
||||||
|
logger.debug(f"Audio sample bytes: {audio_content[:20].hex()}")
|
||||||
|
# Check if audio has some variation (not all same bytes)
|
||||||
|
if len(audio_content) > 100:
|
||||||
|
sample_bytes = audio_content[100:200] # Skip header
|
||||||
|
if len(set(sample_bytes)) < 5: # Less than 5 different byte values
|
||||||
|
logger.warning("Audio may be silent or very quiet (low byte variation)")
|
||||||
|
else:
|
||||||
|
logger.debug(f"Audio has good byte variation: {len(set(sample_bytes))} unique values")
|
||||||
|
else:
|
||||||
|
# For PCM audio, check for silence
|
||||||
|
if len(audio_content) > 100:
|
||||||
|
# Convert first 100 bytes to check for silence
|
||||||
|
sample_bytes = audio_content[:100]
|
||||||
|
if all(b == 0 for b in sample_bytes):
|
||||||
|
logger.warning("Audio appears to be silent (all zeros)")
|
||||||
|
else:
|
||||||
|
logger.debug(f"Audio sample bytes: {sample_bytes[:20].hex()}")
|
||||||
|
# Check for low variation
|
||||||
|
if len(set(sample_bytes)) < 5:
|
||||||
|
logger.warning("Audio may be very quiet (low byte variation)")
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"valid": True,
|
"valid": True,
|
||||||
"format": "pcm",
|
"format": audio_format,
|
||||||
|
"sample_rate": sample_rate,
|
||||||
|
"channels": channels,
|
||||||
"size": len(audio_content),
|
"size": len(audio_content),
|
||||||
"estimated_duration": len(audio_content) / (16000 * 2) # Rough estimate for 16kHz, 16-bit
|
"estimated_duration": estimated_duration
|
||||||
}
|
}
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
@ -283,3 +615,181 @@ class ConnectorGoogleSpeech:
|
||||||
"valid": False,
|
"valid": False,
|
||||||
"error": f"Validation error: {e}"
|
"error": f"Validation error: {e}"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async def text_to_speech(self, text: str, language_code: str = "de-DE", voice_name: str = None) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Convert text to speech using Google Cloud Text-to-Speech.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text: Text to convert to speech
|
||||||
|
language_code: Language code (e.g., 'de-DE', 'en-US')
|
||||||
|
voice_name: Specific voice name (optional)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict with success status and audio data
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
logger.info(f"Converting text to speech: '{text[:50]}...' in {language_code}")
|
||||||
|
|
||||||
|
# Set up the synthesis input
|
||||||
|
synthesis_input = texttospeech.SynthesisInput(text=text)
|
||||||
|
|
||||||
|
# Build the voice request
|
||||||
|
selected_voice = voice_name or self._get_default_voice(language_code)
|
||||||
|
logger.info(f"Using TTS voice: {selected_voice} for language: {language_code}")
|
||||||
|
|
||||||
|
voice = texttospeech.VoiceSelectionParams(
|
||||||
|
language_code=language_code,
|
||||||
|
name=selected_voice,
|
||||||
|
ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL
|
||||||
|
)
|
||||||
|
|
||||||
|
# Select the type of audio file to return
|
||||||
|
audio_config = texttospeech.AudioConfig(
|
||||||
|
audio_encoding=texttospeech.AudioEncoding.MP3
|
||||||
|
)
|
||||||
|
|
||||||
|
# Perform the text-to-speech request
|
||||||
|
response = self.tts_client.synthesize_speech(
|
||||||
|
input=synthesis_input,
|
||||||
|
voice=voice,
|
||||||
|
audio_config=audio_config
|
||||||
|
)
|
||||||
|
|
||||||
|
# Return the audio content
|
||||||
|
return {
|
||||||
|
"success": True,
|
||||||
|
"audio_content": response.audio_content,
|
||||||
|
"audio_format": "mp3",
|
||||||
|
"language_code": language_code,
|
||||||
|
"voice_name": voice.name
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Text-to-Speech error: {e}")
|
||||||
|
return {
|
||||||
|
"success": False,
|
||||||
|
"error": f"Text-to-Speech failed: {str(e)}"
|
||||||
|
}
|
||||||
|
|
||||||
|
def _get_default_voice(self, language_code: str) -> str:
|
||||||
|
"""
|
||||||
|
Get default voice name for a language code.
|
||||||
|
Uses female voices as default for better user experience.
|
||||||
|
"""
|
||||||
|
voice_mapping = {
|
||||||
|
# European Languages
|
||||||
|
'de-DE': 'de-DE-Wavenet-B', # German, female
|
||||||
|
'en-US': 'en-US-Wavenet-B', # English US, female
|
||||||
|
'en-GB': 'en-GB-Wavenet-B', # English UK, female
|
||||||
|
'en-AU': 'en-AU-Wavenet-B', # English Australia, female
|
||||||
|
'en-CA': 'en-CA-Wavenet-B', # English Canada, female
|
||||||
|
'en-IN': 'en-IN-Wavenet-B', # English India, female
|
||||||
|
'fr-FR': 'fr-FR-Wavenet-B', # French, female
|
||||||
|
'fr-CA': 'fr-CA-Wavenet-B', # French Canada, female
|
||||||
|
'es-ES': 'es-ES-Wavenet-B', # Spanish Spain, female
|
||||||
|
'es-MX': 'es-MX-Wavenet-B', # Spanish Mexico, female
|
||||||
|
'es-AR': 'es-AR-Wavenet-B', # Spanish Argentina, female
|
||||||
|
'es-CO': 'es-CO-Wavenet-B', # Spanish Colombia, female
|
||||||
|
'es-PE': 'es-PE-Wavenet-B', # Spanish Peru, female
|
||||||
|
'es-VE': 'es-VE-Wavenet-B', # Spanish Venezuela, female
|
||||||
|
'es-CL': 'es-CL-Wavenet-B', # Spanish Chile, female
|
||||||
|
'es-UY': 'es-UY-Wavenet-B', # Spanish Uruguay, female
|
||||||
|
'es-BO': 'es-BO-Wavenet-B', # Spanish Bolivia, female
|
||||||
|
'es-CR': 'es-CR-Wavenet-B', # Spanish Costa Rica, female
|
||||||
|
'es-EC': 'es-EC-Wavenet-B', # Spanish Ecuador, female
|
||||||
|
'es-GT': 'es-GT-Wavenet-B', # Spanish Guatemala, female
|
||||||
|
'es-HN': 'es-HN-Wavenet-B', # Spanish Honduras, female
|
||||||
|
'es-NI': 'es-NI-Wavenet-B', # Spanish Nicaragua, female
|
||||||
|
'es-PA': 'es-PA-Wavenet-B', # Spanish Panama, female
|
||||||
|
'es-PY': 'es-PY-Wavenet-B', # Spanish Paraguay, female
|
||||||
|
'es-PR': 'es-PR-Wavenet-B', # Spanish Puerto Rico, female
|
||||||
|
'es-DO': 'es-DO-Wavenet-B', # Spanish Dominican Republic, female
|
||||||
|
'es-SV': 'es-SV-Wavenet-B', # Spanish El Salvador, female
|
||||||
|
'it-IT': 'it-IT-Wavenet-B', # Italian, female
|
||||||
|
'pt-PT': 'pt-PT-Wavenet-B', # Portuguese Portugal, female
|
||||||
|
'pt-BR': 'pt-BR-Wavenet-B', # Portuguese Brazil, female
|
||||||
|
'nl-NL': 'nl-NL-Wavenet-B', # Dutch, female
|
||||||
|
'pl-PL': 'pl-PL-Wavenet-B', # Polish, female
|
||||||
|
'ru-RU': 'ru-RU-Wavenet-B', # Russian, female
|
||||||
|
'uk-UA': 'uk-UA-Wavenet-B', # Ukrainian, female
|
||||||
|
'cs-CZ': 'cs-CZ-Wavenet-B', # Czech, female
|
||||||
|
'sk-SK': 'sk-SK-Wavenet-B', # Slovak, female
|
||||||
|
'hu-HU': 'hu-HU-Wavenet-B', # Hungarian, female
|
||||||
|
'ro-RO': 'ro-RO-Wavenet-B', # Romanian, female
|
||||||
|
'bg-BG': 'bg-BG-Wavenet-B', # Bulgarian, female
|
||||||
|
'hr-HR': 'hr-HR-Wavenet-B', # Croatian, female
|
||||||
|
'sr-RS': 'sr-RS-Wavenet-B', # Serbian, female
|
||||||
|
'sl-SI': 'sl-SI-Wavenet-B', # Slovenian, female
|
||||||
|
'et-EE': 'et-EE-Wavenet-B', # Estonian, female
|
||||||
|
'lv-LV': 'lv-LV-Wavenet-B', # Latvian, female
|
||||||
|
'lt-LT': 'lt-LT-Wavenet-B', # Lithuanian, female
|
||||||
|
'fi-FI': 'fi-FI-Wavenet-B', # Finnish, female
|
||||||
|
'sv-SE': 'sv-SE-Wavenet-B', # Swedish, female
|
||||||
|
'no-NO': 'no-NO-Wavenet-B', # Norwegian, female
|
||||||
|
'da-DK': 'da-DK-Wavenet-B', # Danish, female
|
||||||
|
'is-IS': 'is-IS-Wavenet-B', # Icelandic, female
|
||||||
|
'el-GR': 'el-GR-Wavenet-B', # Greek, female
|
||||||
|
'ca-ES': 'ca-ES-Wavenet-B', # Catalan, female
|
||||||
|
'eu-ES': 'eu-ES-Wavenet-B', # Basque, female
|
||||||
|
'gl-ES': 'gl-ES-Wavenet-B', # Galician, female
|
||||||
|
'cy-GB': 'cy-GB-Wavenet-B', # Welsh, female
|
||||||
|
'ga-IE': 'ga-IE-Wavenet-B', # Irish, female
|
||||||
|
'mt-MT': 'mt-MT-Wavenet-B', # Maltese, female
|
||||||
|
|
||||||
|
# Asian Languages
|
||||||
|
'ja-JP': 'ja-JP-Wavenet-B', # Japanese, female
|
||||||
|
'ko-KR': 'ko-KR-Wavenet-B', # Korean, female
|
||||||
|
'zh-CN': 'cmn-CN-Wavenet-B', # Chinese Mandarin, female
|
||||||
|
'zh-TW': 'cmn-TW-Wavenet-B', # Chinese Traditional, female
|
||||||
|
'zh-HK': 'cmn-HK-Wavenet-B', # Chinese Hong Kong, female
|
||||||
|
'hi-IN': 'hi-IN-Wavenet-B', # Hindi, female
|
||||||
|
'bn-IN': 'bn-IN-Wavenet-B', # Bengali, female
|
||||||
|
'te-IN': 'te-IN-Wavenet-B', # Telugu, female
|
||||||
|
'ta-IN': 'ta-IN-Wavenet-B', # Tamil, female
|
||||||
|
'gu-IN': 'gu-IN-Wavenet-B', # Gujarati, female
|
||||||
|
'kn-IN': 'kn-IN-Wavenet-B', # Kannada, female
|
||||||
|
'ml-IN': 'ml-IN-Wavenet-B', # Malayalam, female
|
||||||
|
'pa-IN': 'pa-IN-Wavenet-B', # Punjabi, female
|
||||||
|
'or-IN': 'or-IN-Wavenet-B', # Odia, female
|
||||||
|
'as-IN': 'as-IN-Wavenet-B', # Assamese, female
|
||||||
|
'ne-NP': 'ne-NP-Wavenet-B', # Nepali, female
|
||||||
|
'si-LK': 'si-LK-Wavenet-B', # Sinhala, female
|
||||||
|
'th-TH': 'th-TH-Wavenet-B', # Thai, female
|
||||||
|
'vi-VN': 'vi-VN-Wavenet-B', # Vietnamese, female
|
||||||
|
'id-ID': 'id-ID-Wavenet-B', # Indonesian, female
|
||||||
|
'ms-MY': 'ms-MY-Wavenet-B', # Malay, female
|
||||||
|
'tl-PH': 'fil-PH-Wavenet-B', # Filipino, female
|
||||||
|
'tr-TR': 'tr-TR-Wavenet-B', # Turkish, female
|
||||||
|
|
||||||
|
# Middle Eastern & African Languages
|
||||||
|
'ar-SA': 'ar-SA-Wavenet-B', # Arabic Saudi Arabia, female
|
||||||
|
'ar-EG': 'ar-EG-Wavenet-B', # Arabic Egypt, female
|
||||||
|
'ar-AE': 'ar-AE-Wavenet-B', # Arabic UAE, female
|
||||||
|
'ar-JO': 'ar-JO-Wavenet-B', # Arabic Jordan, female
|
||||||
|
'ar-KW': 'ar-KW-Wavenet-B', # Arabic Kuwait, female
|
||||||
|
'ar-LB': 'ar-LB-Wavenet-B', # Arabic Lebanon, female
|
||||||
|
'ar-QA': 'ar-QA-Wavenet-B', # Arabic Qatar, female
|
||||||
|
'ar-BH': 'ar-BH-Wavenet-B', # Arabic Bahrain, female
|
||||||
|
'ar-OM': 'ar-OM-Wavenet-B', # Arabic Oman, female
|
||||||
|
'ar-IQ': 'ar-IQ-Wavenet-B', # Arabic Iraq, female
|
||||||
|
'ar-PS': 'ar-PS-Wavenet-B', # Arabic Palestine, female
|
||||||
|
'ar-SY': 'ar-SY-Wavenet-B', # Arabic Syria, female
|
||||||
|
'ar-YE': 'ar-YE-Wavenet-B', # Arabic Yemen, female
|
||||||
|
'ar-MA': 'ar-MA-Wavenet-B', # Arabic Morocco, female
|
||||||
|
'ar-DZ': 'ar-DZ-Wavenet-B', # Arabic Algeria, female
|
||||||
|
'ar-TN': 'ar-TN-Wavenet-B', # Arabic Tunisia, female
|
||||||
|
'ar-LY': 'ar-LY-Wavenet-B', # Arabic Libya, female
|
||||||
|
'ar-SD': 'ar-SD-Wavenet-B', # Arabic Sudan, female
|
||||||
|
'he-IL': 'he-IL-Wavenet-B', # Hebrew, female
|
||||||
|
'fa-IR': 'fa-IR-Wavenet-B', # Persian, female
|
||||||
|
'ur-PK': 'ur-PK-Wavenet-B', # Urdu, female
|
||||||
|
'af-ZA': 'af-ZA-Wavenet-B', # Afrikaans, female
|
||||||
|
'sw-KE': 'sw-KE-Wavenet-B', # Swahili Kenya, female
|
||||||
|
'am-ET': 'am-ET-Wavenet-B', # Amharic, female
|
||||||
|
'sw-TZ': 'sw-TZ-Wavenet-B', # Swahili Tanzania, female
|
||||||
|
'zu-ZA': 'zu-ZA-Wavenet-B', # Zulu, female
|
||||||
|
'xh-ZA': 'xh-ZA-Wavenet-B', # Xhosa, female
|
||||||
|
}
|
||||||
|
return voice_mapping.get(language_code, 'en-US-Wavenet-B')
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -5,14 +5,16 @@ Replaces Azure voice services with Google Cloud Speech-to-Text and Translation
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import logging
|
import logging
|
||||||
from fastapi import APIRouter, File, Form, UploadFile, Depends, HTTPException
|
from fastapi import APIRouter, File, Form, UploadFile, Depends, HTTPException, Body
|
||||||
from typing import Optional
|
from fastapi.responses import Response
|
||||||
|
from typing import Optional, Dict, Any
|
||||||
from modules.connectors.connectorGoogleSpeech import ConnectorGoogleSpeech
|
from modules.connectors.connectorGoogleSpeech import ConnectorGoogleSpeech
|
||||||
from modules.security.auth import getCurrentUser
|
from modules.security.auth import getCurrentUser
|
||||||
from modules.interfaces.interfaceAppModel import User
|
from modules.interfaces.interfaceAppModel import User
|
||||||
|
from modules.interfaces.interfaceComponentObjects import getInterface
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
router = APIRouter(prefix="/voice-google", tags=["voice-google"])
|
router = APIRouter(prefix="/voice-google", tags=["Voice Google"])
|
||||||
|
|
||||||
# Global connector instance
|
# Global connector instance
|
||||||
_google_speech_connector = None
|
_google_speech_connector = None
|
||||||
|
|
@ -23,28 +25,7 @@ def get_google_speech_connector() -> ConnectorGoogleSpeech:
|
||||||
|
|
||||||
if _google_speech_connector is None:
|
if _google_speech_connector is None:
|
||||||
try:
|
try:
|
||||||
# Get credentials path from environment or config
|
_google_speech_connector = ConnectorGoogleSpeech()
|
||||||
credentials_path = os.getenv("GOOGLE_APPLICATION_CREDENTIALS")
|
|
||||||
if not credentials_path:
|
|
||||||
# Try to find credentials in common locations
|
|
||||||
possible_paths = [
|
|
||||||
"credentials/google-service-account.json",
|
|
||||||
"config/google-credentials.json",
|
|
||||||
"google-credentials.json"
|
|
||||||
]
|
|
||||||
|
|
||||||
for path in possible_paths:
|
|
||||||
if os.path.exists(path):
|
|
||||||
credentials_path = path
|
|
||||||
break
|
|
||||||
|
|
||||||
if not credentials_path:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=500,
|
|
||||||
detail="Google Cloud credentials not found. Please set GOOGLE_APPLICATION_CREDENTIALS environment variable or place credentials file in project directory."
|
|
||||||
)
|
|
||||||
|
|
||||||
_google_speech_connector = ConnectorGoogleSpeech(credentials_path)
|
|
||||||
logger.info("✅ Google Cloud Speech connector initialized")
|
logger.info("✅ Google Cloud Speech connector initialized")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
@ -173,13 +154,15 @@ async def realtime_interpreter(
|
||||||
try:
|
try:
|
||||||
logger.info(f"🔄 Real-time interpreter request: {audio_file.filename}")
|
logger.info(f"🔄 Real-time interpreter request: {audio_file.filename}")
|
||||||
logger.info(f" From: {from_language} -> To: {to_language}")
|
logger.info(f" From: {from_language} -> To: {to_language}")
|
||||||
|
logger.info(f" MIME type: {audio_file.content_type}")
|
||||||
|
|
||||||
# Read audio file
|
# Read audio file
|
||||||
audio_content = await audio_file.read()
|
audio_content = await audio_file.read()
|
||||||
logger.info(f"📊 Audio file size: {len(audio_content)} bytes")
|
logger.info(f"📊 Audio file size: {len(audio_content)} bytes")
|
||||||
|
|
||||||
# Save audio file for debugging
|
# Save audio file for debugging with correct extension
|
||||||
debug_filename = f"debug_audio/audio_google_{audio_file.filename}"
|
file_extension = "webm" if audio_file.filename.endswith('.webm') else "wav"
|
||||||
|
debug_filename = f"debug_audio/audio_google_{audio_file.filename.replace('.wav', '.webm')}"
|
||||||
os.makedirs("debug_audio", exist_ok=True)
|
os.makedirs("debug_audio", exist_ok=True)
|
||||||
with open(debug_filename, "wb") as f:
|
with open(debug_filename, "wb") as f:
|
||||||
f.write(audio_content)
|
f.write(audio_content)
|
||||||
|
|
@ -235,6 +218,56 @@ async def realtime_interpreter(
|
||||||
detail=f"Real-time interpreter processing failed: {str(e)}"
|
detail=f"Real-time interpreter processing failed: {str(e)}"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/text-to-speech")
|
||||||
|
async def text_to_speech(
|
||||||
|
text: str = Form(...),
|
||||||
|
language: str = Form("de-DE"),
|
||||||
|
voice: str = Form(None),
|
||||||
|
current_user: User = Depends(getCurrentUser)
|
||||||
|
):
|
||||||
|
"""Convert text to speech using Google Cloud Text-to-Speech."""
|
||||||
|
try:
|
||||||
|
logger.info(f"Text-to-Speech request: '{text[:50]}...' in {language}")
|
||||||
|
|
||||||
|
if not text.strip():
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=400,
|
||||||
|
detail="Empty text provided for text-to-speech"
|
||||||
|
)
|
||||||
|
|
||||||
|
connector = get_google_speech_connector()
|
||||||
|
result = await connector.text_to_speech(
|
||||||
|
text=text,
|
||||||
|
language_code=language,
|
||||||
|
voice_name=voice
|
||||||
|
)
|
||||||
|
|
||||||
|
if result["success"]:
|
||||||
|
return Response(
|
||||||
|
content=result["audio_content"],
|
||||||
|
media_type="audio/mpeg",
|
||||||
|
headers={
|
||||||
|
"Content-Disposition": "attachment; filename=speech.mp3",
|
||||||
|
"X-Voice-Name": result["voice_name"],
|
||||||
|
"X-Language-Code": result["language_code"]
|
||||||
|
}
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=400,
|
||||||
|
detail=f"Text-to-Speech failed: {result.get('error', 'Unknown error')}"
|
||||||
|
)
|
||||||
|
|
||||||
|
except HTTPException:
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Text-to-Speech error: {e}")
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=500,
|
||||||
|
detail=f"Text-to-Speech processing failed: {str(e)}"
|
||||||
|
)
|
||||||
|
|
||||||
@router.get("/health")
|
@router.get("/health")
|
||||||
async def health_check(current_user: User = Depends(getCurrentUser)):
|
async def health_check(current_user: User = Depends(getCurrentUser)):
|
||||||
"""Health check for Google Cloud voice services."""
|
"""Health check for Google Cloud voice services."""
|
||||||
|
|
@ -266,3 +299,113 @@ async def health_check(current_user: User = Depends(getCurrentUser)):
|
||||||
"status": "unhealthy",
|
"status": "unhealthy",
|
||||||
"error": str(e)
|
"error": str(e)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@router.get("/settings")
|
||||||
|
async def get_voice_settings(current_user: User = Depends(getCurrentUser)):
|
||||||
|
"""Get voice settings for the current user."""
|
||||||
|
try:
|
||||||
|
logger.info(f"Getting voice settings for user: {current_user.id}")
|
||||||
|
|
||||||
|
# Get database interface with user context
|
||||||
|
interface = getInterface(current_user)
|
||||||
|
|
||||||
|
# Get or create voice settings for the user
|
||||||
|
voice_settings = interface.getOrCreateVoiceSettings(current_user.id)
|
||||||
|
|
||||||
|
if voice_settings:
|
||||||
|
# Return user settings
|
||||||
|
return {
|
||||||
|
"success": True,
|
||||||
|
"data": {
|
||||||
|
"user_settings": voice_settings.to_dict(),
|
||||||
|
"default_settings": {
|
||||||
|
"sttLanguage": "de-DE",
|
||||||
|
"ttsLanguage": "de-DE",
|
||||||
|
"ttsVoice": "de-DE-Wavenet-A",
|
||||||
|
"translationEnabled": True,
|
||||||
|
"targetLanguage": "en-US"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
# Fallback to default settings if database fails
|
||||||
|
logger.warning("Failed to get voice settings from database, using defaults")
|
||||||
|
return {
|
||||||
|
"success": True,
|
||||||
|
"data": {
|
||||||
|
"user_settings": None,
|
||||||
|
"default_settings": {
|
||||||
|
"sttLanguage": "de-DE",
|
||||||
|
"ttsLanguage": "de-DE",
|
||||||
|
"ttsVoice": "de-DE-Wavenet-A",
|
||||||
|
"translationEnabled": True,
|
||||||
|
"targetLanguage": "en-US"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error getting voice settings: {e}")
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=500,
|
||||||
|
detail=f"Failed to get voice settings: {str(e)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
@router.post("/settings")
|
||||||
|
async def save_voice_settings(
|
||||||
|
settings: Dict[str, Any] = Body(...),
|
||||||
|
current_user: User = Depends(getCurrentUser)
|
||||||
|
):
|
||||||
|
"""Save voice settings for the current user."""
|
||||||
|
try:
|
||||||
|
logger.info(f"Saving voice settings for user: {current_user.id}")
|
||||||
|
logger.info(f"Settings: {settings}")
|
||||||
|
|
||||||
|
# Validate required settings
|
||||||
|
required_fields = ["sttLanguage", "ttsLanguage", "ttsVoice"]
|
||||||
|
for field in required_fields:
|
||||||
|
if field not in settings:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=400,
|
||||||
|
detail=f"Missing required field: {field}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Set default values for optional fields if not provided
|
||||||
|
if "translationEnabled" not in settings:
|
||||||
|
settings["translationEnabled"] = True
|
||||||
|
if "targetLanguage" not in settings:
|
||||||
|
settings["targetLanguage"] = "en-US"
|
||||||
|
|
||||||
|
# Get database interface with user context
|
||||||
|
interface = getInterface(current_user)
|
||||||
|
|
||||||
|
# Check if settings already exist for this user
|
||||||
|
existing_settings = interface.getVoiceSettings(current_user.id)
|
||||||
|
|
||||||
|
if existing_settings:
|
||||||
|
# Update existing settings
|
||||||
|
logger.info(f"Updating existing voice settings for user {current_user.id}")
|
||||||
|
updated_settings = interface.updateVoiceSettings(current_user.id, settings)
|
||||||
|
logger.info(f"Voice settings updated for user {current_user.id}: {updated_settings}")
|
||||||
|
else:
|
||||||
|
# Create new settings
|
||||||
|
logger.info(f"Creating new voice settings for user {current_user.id}")
|
||||||
|
# Add userId to settings
|
||||||
|
settings["userId"] = current_user.id
|
||||||
|
created_settings = interface.createVoiceSettings(settings)
|
||||||
|
logger.info(f"Voice settings created for user {current_user.id}: {created_settings}")
|
||||||
|
|
||||||
|
return {
|
||||||
|
"success": True,
|
||||||
|
"message": "Voice settings saved successfully",
|
||||||
|
"data": settings
|
||||||
|
}
|
||||||
|
|
||||||
|
except HTTPException:
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error saving voice settings: {e}")
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=500,
|
||||||
|
detail=f"Failed to save voice settings: {str(e)}"
|
||||||
|
)
|
||||||
|
|
|
||||||
231
modules/routes/routeVoiceStreaming.py
Normal file
231
modules/routes/routeVoiceStreaming.py
Normal file
|
|
@ -0,0 +1,231 @@
|
||||||
|
"""
|
||||||
|
Voice Streaming WebSocket Routes
|
||||||
|
Provides real-time audio streaming for voice services
|
||||||
|
"""
|
||||||
|
|
||||||
|
from fastapi import APIRouter, WebSocket, WebSocketDisconnect, Depends
|
||||||
|
from fastapi.responses import JSONResponse
|
||||||
|
import logging
|
||||||
|
import json
|
||||||
|
import base64
|
||||||
|
import asyncio
|
||||||
|
from typing import Dict, List
|
||||||
|
|
||||||
|
from modules.shared.configuration import APP_CONFIG
|
||||||
|
from modules.connectors.connectorGoogleSpeech import ConnectorGoogleSpeech
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
router = APIRouter(prefix="/api/voice/ws", tags=["Voice Streaming"])
|
||||||
|
|
||||||
|
# Store active connections
|
||||||
|
active_connections: Dict[str, WebSocket] = {}
|
||||||
|
|
||||||
|
class ConnectionManager:
|
||||||
|
def __init__(self):
|
||||||
|
self.active_connections: List[WebSocket] = []
|
||||||
|
|
||||||
|
async def connect(self, websocket: WebSocket, connection_id: str):
|
||||||
|
await websocket.accept()
|
||||||
|
self.active_connections.append(websocket)
|
||||||
|
active_connections[connection_id] = websocket
|
||||||
|
logger.info(f"WebSocket connected: {connection_id}")
|
||||||
|
|
||||||
|
def disconnect(self, websocket: WebSocket, connection_id: str):
|
||||||
|
if websocket in self.active_connections:
|
||||||
|
self.active_connections.remove(websocket)
|
||||||
|
if connection_id in active_connections:
|
||||||
|
del active_connections[connection_id]
|
||||||
|
logger.info(f"WebSocket disconnected: {connection_id}")
|
||||||
|
|
||||||
|
async def send_personal_message(self, message: dict, websocket: WebSocket):
|
||||||
|
try:
|
||||||
|
await websocket.send_text(json.dumps(message))
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error sending message: {e}")
|
||||||
|
|
||||||
|
manager = ConnectionManager()
|
||||||
|
|
||||||
|
@router.websocket("/realtime-interpreter")
|
||||||
|
async def websocket_realtime_interpreter(
|
||||||
|
websocket: WebSocket,
|
||||||
|
user_id: str = "default",
|
||||||
|
from_language: str = "de-DE",
|
||||||
|
to_language: str = "en-US"
|
||||||
|
):
|
||||||
|
"""WebSocket endpoint for real-time voice interpretation"""
|
||||||
|
connection_id = f"realtime_{user_id}_{from_language}_{to_language}"
|
||||||
|
|
||||||
|
try:
|
||||||
|
await manager.connect(websocket, connection_id)
|
||||||
|
|
||||||
|
# Send connection confirmation
|
||||||
|
await manager.send_personal_message({
|
||||||
|
"type": "connected",
|
||||||
|
"connection_id": connection_id,
|
||||||
|
"message": "Connected to real-time interpreter"
|
||||||
|
}, websocket)
|
||||||
|
|
||||||
|
# Initialize Google Speech connector
|
||||||
|
google_speech = ConnectorGoogleSpeech()
|
||||||
|
|
||||||
|
while True:
|
||||||
|
# Receive message from client
|
||||||
|
data = await websocket.receive_text()
|
||||||
|
message = json.loads(data)
|
||||||
|
|
||||||
|
if message["type"] == "audio_chunk":
|
||||||
|
# Process audio chunk
|
||||||
|
try:
|
||||||
|
# Decode base64 audio data
|
||||||
|
audio_data = base64.b64decode(message["data"])
|
||||||
|
|
||||||
|
# For now, just acknowledge receipt
|
||||||
|
# In a full implementation, this would:
|
||||||
|
# 1. Buffer audio chunks
|
||||||
|
# 2. Process with Google Cloud Speech-to-Text streaming
|
||||||
|
# 3. Send partial results back
|
||||||
|
# 4. Handle translation
|
||||||
|
|
||||||
|
await manager.send_personal_message({
|
||||||
|
"type": "audio_received",
|
||||||
|
"chunk_size": len(audio_data),
|
||||||
|
"timestamp": message.get("timestamp")
|
||||||
|
}, websocket)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error processing audio chunk: {e}")
|
||||||
|
await manager.send_personal_message({
|
||||||
|
"type": "error",
|
||||||
|
"error": f"Failed to process audio: {str(e)}"
|
||||||
|
}, websocket)
|
||||||
|
|
||||||
|
elif message["type"] == "ping":
|
||||||
|
# Respond to ping
|
||||||
|
await manager.send_personal_message({
|
||||||
|
"type": "pong",
|
||||||
|
"timestamp": message.get("timestamp")
|
||||||
|
}, websocket)
|
||||||
|
|
||||||
|
else:
|
||||||
|
logger.warning(f"Unknown message type: {message['type']}")
|
||||||
|
|
||||||
|
except WebSocketDisconnect:
|
||||||
|
manager.disconnect(websocket, connection_id)
|
||||||
|
logger.info(f"Client disconnected: {connection_id}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"WebSocket error: {e}")
|
||||||
|
manager.disconnect(websocket, connection_id)
|
||||||
|
|
||||||
|
@router.websocket("/speech-to-text")
|
||||||
|
async def websocket_speech_to_text(
|
||||||
|
websocket: WebSocket,
|
||||||
|
user_id: str = "default",
|
||||||
|
language: str = "de-DE"
|
||||||
|
):
|
||||||
|
"""WebSocket endpoint for real-time speech-to-text"""
|
||||||
|
connection_id = f"stt_{user_id}_{language}"
|
||||||
|
|
||||||
|
try:
|
||||||
|
await manager.connect(websocket, connection_id)
|
||||||
|
|
||||||
|
await manager.send_personal_message({
|
||||||
|
"type": "connected",
|
||||||
|
"connection_id": connection_id,
|
||||||
|
"message": "Connected to speech-to-text"
|
||||||
|
}, websocket)
|
||||||
|
|
||||||
|
# Initialize Google Speech connector
|
||||||
|
google_speech = ConnectorGoogleSpeech()
|
||||||
|
|
||||||
|
while True:
|
||||||
|
data = await websocket.receive_text()
|
||||||
|
message = json.loads(data)
|
||||||
|
|
||||||
|
if message["type"] == "audio_chunk":
|
||||||
|
try:
|
||||||
|
audio_data = base64.b64decode(message["data"])
|
||||||
|
|
||||||
|
# Process audio chunk
|
||||||
|
# This would integrate with Google Cloud Speech-to-Text streaming API
|
||||||
|
|
||||||
|
await manager.send_personal_message({
|
||||||
|
"type": "transcription_result",
|
||||||
|
"text": "Audio chunk received", # Placeholder
|
||||||
|
"confidence": 0.95,
|
||||||
|
"is_final": False
|
||||||
|
}, websocket)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error processing audio: {e}")
|
||||||
|
await manager.send_personal_message({
|
||||||
|
"type": "error",
|
||||||
|
"error": f"Failed to process audio: {str(e)}"
|
||||||
|
}, websocket)
|
||||||
|
|
||||||
|
elif message["type"] == "ping":
|
||||||
|
await manager.send_personal_message({
|
||||||
|
"type": "pong",
|
||||||
|
"timestamp": message.get("timestamp")
|
||||||
|
}, websocket)
|
||||||
|
|
||||||
|
except WebSocketDisconnect:
|
||||||
|
manager.disconnect(websocket, connection_id)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"WebSocket error: {e}")
|
||||||
|
manager.disconnect(websocket, connection_id)
|
||||||
|
|
||||||
|
@router.websocket("/text-to-speech")
|
||||||
|
async def websocket_text_to_speech(
|
||||||
|
websocket: WebSocket,
|
||||||
|
user_id: str = "default",
|
||||||
|
language: str = "de-DE",
|
||||||
|
voice: str = "de-DE-Wavenet-A"
|
||||||
|
):
|
||||||
|
"""WebSocket endpoint for real-time text-to-speech"""
|
||||||
|
connection_id = f"tts_{user_id}_{language}_{voice}"
|
||||||
|
|
||||||
|
try:
|
||||||
|
await manager.connect(websocket, connection_id)
|
||||||
|
|
||||||
|
await manager.send_personal_message({
|
||||||
|
"type": "connected",
|
||||||
|
"connection_id": connection_id,
|
||||||
|
"message": "Connected to text-to-speech"
|
||||||
|
}, websocket)
|
||||||
|
|
||||||
|
while True:
|
||||||
|
data = await websocket.receive_text()
|
||||||
|
message = json.loads(data)
|
||||||
|
|
||||||
|
if message["type"] == "text_to_speak":
|
||||||
|
try:
|
||||||
|
text = message["text"]
|
||||||
|
|
||||||
|
# Process text-to-speech
|
||||||
|
# This would integrate with Google Cloud Text-to-Speech API
|
||||||
|
|
||||||
|
# For now, send a placeholder response
|
||||||
|
await manager.send_personal_message({
|
||||||
|
"type": "audio_data",
|
||||||
|
"audio": "base64_encoded_audio_here", # Placeholder
|
||||||
|
"format": "mp3"
|
||||||
|
}, websocket)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error processing text-to-speech: {e}")
|
||||||
|
await manager.send_personal_message({
|
||||||
|
"type": "error",
|
||||||
|
"error": f"Failed to process text: {str(e)}"
|
||||||
|
}, websocket)
|
||||||
|
|
||||||
|
elif message["type"] == "ping":
|
||||||
|
await manager.send_personal_message({
|
||||||
|
"type": "pong",
|
||||||
|
"timestamp": message.get("timestamp")
|
||||||
|
}, websocket)
|
||||||
|
|
||||||
|
except WebSocketDisconnect:
|
||||||
|
manager.disconnect(websocket, connection_id)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"WebSocket error: {e}")
|
||||||
|
manager.disconnect(websocket, connection_id)
|
||||||
|
|
@ -58,20 +58,43 @@ class Configuration:
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with open(configPath, 'r') as f:
|
with open(configPath, 'r') as f:
|
||||||
for line in f:
|
lines = f.readlines()
|
||||||
line = line.strip()
|
|
||||||
# Skip empty lines and comments
|
i = 0
|
||||||
if not line or line.startswith('#'):
|
while i < len(lines):
|
||||||
continue
|
line = lines[i].strip()
|
||||||
|
|
||||||
|
# Skip empty lines and comments
|
||||||
|
if not line or line.startswith('#'):
|
||||||
|
i += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Parse key-value pairs
|
||||||
|
if '=' in line:
|
||||||
|
key, value = line.split('=', 1)
|
||||||
|
key = key.strip()
|
||||||
|
value = value.strip()
|
||||||
|
|
||||||
|
# Check if value starts with { (JSON object)
|
||||||
|
if value.startswith('{'):
|
||||||
|
# Collect all lines until we find the closing }
|
||||||
|
json_lines = [value]
|
||||||
|
i += 1
|
||||||
|
brace_count = value.count('{') - value.count('}')
|
||||||
|
|
||||||
# Parse key-value pairs
|
while i < len(lines) and brace_count > 0:
|
||||||
if '=' in line:
|
json_lines.append(lines[i].rstrip('\n'))
|
||||||
key, value = line.split('=', 1)
|
brace_count += lines[i].count('{') - lines[i].count('}')
|
||||||
key = key.strip()
|
i += 1
|
||||||
value = value.strip()
|
|
||||||
|
|
||||||
# Add directly to data dictionary
|
# Join all lines and parse as JSON
|
||||||
self._data[key] = value
|
value = '\n'.join(json_lines)
|
||||||
|
i -= 1 # Adjust for the loop increment
|
||||||
|
|
||||||
|
# Add to data dictionary
|
||||||
|
self._data[key] = value
|
||||||
|
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
@ -144,6 +167,9 @@ class Configuration:
|
||||||
# Handle secrets (keys ending with _SECRET)
|
# Handle secrets (keys ending with _SECRET)
|
||||||
if key.endswith("_SECRET"):
|
if key.endswith("_SECRET"):
|
||||||
return handleSecret(value)
|
return handleSecret(value)
|
||||||
|
# Handle JSON secrets (keys ending with _API_KEY that contain JSON)
|
||||||
|
elif key.endswith("_API_KEY") and value.startswith("{"):
|
||||||
|
return handleJsonSecret(value)
|
||||||
return value
|
return value
|
||||||
return default
|
return default
|
||||||
|
|
||||||
|
|
@ -180,5 +206,27 @@ def handleSecret(value: str) -> str:
|
||||||
# In the future, this could be enhanced to decrypt values
|
# In the future, this could be enhanced to decrypt values
|
||||||
return value
|
return value
|
||||||
|
|
||||||
|
def handleJsonSecret(value: str) -> str:
|
||||||
|
"""
|
||||||
|
Handle JSON secret values (like Google service account keys).
|
||||||
|
Validates that the value is valid JSON.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
value: The JSON secret value to handle
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: Processed JSON secret value
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError: If the value is not valid JSON
|
||||||
|
"""
|
||||||
|
import json
|
||||||
|
try:
|
||||||
|
# Validate that it's valid JSON
|
||||||
|
json.loads(value)
|
||||||
|
return value
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
raise ValueError(f"Invalid JSON in secret value: {e}")
|
||||||
|
|
||||||
# Create the global APP_CONFIG instance
|
# Create the global APP_CONFIG instance
|
||||||
APP_CONFIG = Configuration()
|
APP_CONFIG = Configuration()
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
## Web Framework & API
|
## Web Framework & API
|
||||||
fastapi==0.104.1
|
fastapi==0.104.1
|
||||||
|
websockets==12.0
|
||||||
uvicorn==0.23.2
|
uvicorn==0.23.2
|
||||||
python-multipart==0.0.6
|
python-multipart==0.0.6
|
||||||
httpx==0.25.0
|
httpx==0.25.0
|
||||||
|
|
@ -62,6 +63,7 @@ sortedcontainers>=2.4.0 # Required by trio
|
||||||
## Google Cloud Integration
|
## Google Cloud Integration
|
||||||
google-cloud-speech==2.21.0
|
google-cloud-speech==2.21.0
|
||||||
google-cloud-translate==3.11.1
|
google-cloud-translate==3.11.1
|
||||||
|
google-cloud-texttospeech==2.16.3
|
||||||
|
|
||||||
## MSFT Integration
|
## MSFT Integration
|
||||||
msal==1.24.1
|
msal==1.24.1
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue