719 lines
26 KiB
Python
719 lines
26 KiB
Python
"""
|
|
Google Cloud Voice Services Routes
|
|
Replaces Azure voice services with Google Cloud Speech-to-Text and Translation
|
|
Includes WebSocket support for real-time voice streaming
|
|
"""
|
|
|
|
import logging
|
|
import json
|
|
import base64
|
|
from fastapi import APIRouter, File, Form, UploadFile, Depends, HTTPException, Body, WebSocket, WebSocketDisconnect
|
|
from fastapi.responses import Response
|
|
from typing import Optional, Dict, Any, List
|
|
from modules.auth import getCurrentUser
|
|
from modules.datamodels.datamodelUam import User
|
|
from modules.interfaces.interfaceVoiceObjects import getVoiceInterface, VoiceObjects
|
|
|
|
logger = logging.getLogger(__name__)
|
|
router = APIRouter(prefix="/voice-google", tags=["Voice Google"])
|
|
|
|
# Store active WebSocket connections
|
|
activeConnections: Dict[str, WebSocket] = {}
|
|
|
|
class ConnectionManager:
|
|
def __init__(self):
|
|
self.activeConnections: List[WebSocket] = []
|
|
|
|
async def connect(self, websocket: WebSocket, connectionId: str):
|
|
await websocket.accept()
|
|
self.activeConnections.append(websocket)
|
|
activeConnections[connectionId] = websocket
|
|
logger.info(f"WebSocket connected: {connectionId}")
|
|
|
|
def disconnect(self, websocket: WebSocket, connectionId: str):
|
|
if websocket in self.activeConnections:
|
|
self.activeConnections.remove(websocket)
|
|
if connectionId in activeConnections:
|
|
del activeConnections[connectionId]
|
|
logger.info(f"WebSocket disconnected: {connectionId}")
|
|
|
|
async def sendPersonalMessage(self, message: dict, websocket: WebSocket):
|
|
try:
|
|
await websocket.send_text(json.dumps(message))
|
|
except Exception as e:
|
|
logger.error(f"Error sending message: {e}")
|
|
|
|
manager = ConnectionManager()
|
|
|
|
def _getVoiceInterface(currentUser: User) -> VoiceObjects:
|
|
"""Get voice interface instance with user context."""
|
|
try:
|
|
return getVoiceInterface(currentUser)
|
|
except Exception as e:
|
|
logger.error(f"Failed to initialize voice interface: {e}")
|
|
raise HTTPException(
|
|
status_code=500,
|
|
detail=f"Failed to initialize voice interface: {str(e)}"
|
|
)
|
|
|
|
@router.post("/speech-to-text")
|
|
async def speech_to_text(
|
|
audioFile: UploadFile = File(...),
|
|
language: str = Form("de-DE"),
|
|
currentUser: User = Depends(getCurrentUser)
|
|
):
|
|
"""Convert speech to text using Google Cloud Speech-to-Text API."""
|
|
try:
|
|
logger.info(f"🎤 Speech-to-text request: {audioFile.filename}, language: {language}")
|
|
|
|
# Read audio file
|
|
audioContent = await audioFile.read()
|
|
logger.info(f"📊 Audio file size: {len(audioContent)} bytes")
|
|
|
|
# Get voice interface
|
|
voiceInterface = _getVoiceInterface(currentUser)
|
|
|
|
# Validate audio format
|
|
validation = voiceInterface.validateAudioFormat(audioContent)
|
|
|
|
if not validation["valid"]:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail=f"Invalid audio format: {validation.get('error', 'Unknown error')}"
|
|
)
|
|
|
|
# Perform speech recognition
|
|
result = await voiceInterface.speechToText(
|
|
audioContent=audioContent,
|
|
language=language
|
|
)
|
|
|
|
if result["success"]:
|
|
return {
|
|
"success": True,
|
|
"text": result["text"],
|
|
"confidence": result["confidence"],
|
|
"language": result["language"],
|
|
"audio_info": {
|
|
"size": len(audioContent),
|
|
"format": validation["format"],
|
|
"estimated_duration": validation.get("estimated_duration", 0)
|
|
}
|
|
}
|
|
else:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail=f"Speech recognition failed: {result.get('error', 'Unknown error')}"
|
|
)
|
|
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"❌ Speech-to-text error: {e}")
|
|
raise HTTPException(
|
|
status_code=500,
|
|
detail=f"Speech-to-text processing failed: {str(e)}"
|
|
)
|
|
|
|
@router.post("/detect-language")
|
|
async def detect_language(
|
|
text: str = Form(...),
|
|
currentUser: User = Depends(getCurrentUser)
|
|
):
|
|
"""Detect the language of text using Google Cloud Translation API."""
|
|
try:
|
|
logger.info(f"🔍 Language detection request: '{text[:100]}...'")
|
|
|
|
if not text.strip():
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail="Empty text provided for language detection"
|
|
)
|
|
|
|
# Get voice interface
|
|
voiceInterface = _getVoiceInterface(currentUser)
|
|
|
|
# Perform language detection
|
|
result = await voiceInterface.detectLanguage(text)
|
|
|
|
if result["success"]:
|
|
return {
|
|
"success": True,
|
|
"language": result["language"],
|
|
"confidence": result.get("confidence", 1.0)
|
|
}
|
|
else:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail=f"Language detection failed: {result.get('error', 'Unknown error')}"
|
|
)
|
|
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"❌ Language detection error: {e}")
|
|
raise HTTPException(
|
|
status_code=500,
|
|
detail=f"Language detection processing failed: {str(e)}"
|
|
)
|
|
|
|
@router.post("/translate")
|
|
async def translate_text(
|
|
text: str = Form(...),
|
|
sourceLanguage: str = Form("de"),
|
|
targetLanguage: str = Form("en"),
|
|
currentUser: User = Depends(getCurrentUser)
|
|
):
|
|
"""Translate text using Google Cloud Translation API."""
|
|
try:
|
|
logger.info(f"🌐 Translation request: '{text}' ({sourceLanguage} -> {targetLanguage})")
|
|
|
|
if not text.strip():
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail="Empty text provided for translation"
|
|
)
|
|
|
|
# Get voice interface
|
|
voiceInterface = _getVoiceInterface(currentUser)
|
|
|
|
# Perform translation
|
|
result = await voiceInterface.translateText(
|
|
text=text,
|
|
sourceLanguage=sourceLanguage,
|
|
targetLanguage=targetLanguage
|
|
)
|
|
|
|
if result["success"]:
|
|
return {
|
|
"success": True,
|
|
"original_text": result["original_text"],
|
|
"translated_text": result["translated_text"],
|
|
"source_language": result["source_language"],
|
|
"target_language": result["target_language"]
|
|
}
|
|
else:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail=f"Translation failed: {result.get('error', 'Unknown error')}"
|
|
)
|
|
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"❌ Translation error: {e}")
|
|
raise HTTPException(
|
|
status_code=500,
|
|
detail=f"Translation processing failed: {str(e)}"
|
|
)
|
|
|
|
@router.post("/realtime-interpreter")
|
|
async def realtime_interpreter(
|
|
audioFile: UploadFile = File(...),
|
|
fromLanguage: str = Form("de-DE"),
|
|
toLanguage: str = Form("en-US"),
|
|
connectionId: str = Form(None),
|
|
currentUser: User = Depends(getCurrentUser)
|
|
):
|
|
"""Real-time interpreter: speech to translated text using Google Cloud APIs."""
|
|
try:
|
|
logger.info(f"🔄 Real-time interpreter request: {audioFile.filename}")
|
|
logger.info(f" From: {fromLanguage} -> To: {toLanguage}")
|
|
logger.info(f" MIME type: {audioFile.content_type}")
|
|
|
|
# Read audio file
|
|
audioContent = await audioFile.read()
|
|
logger.info(f"📊 Audio file size: {len(audioContent)} bytes")
|
|
|
|
# Save audio file for debugging with correct extension
|
|
# file_extension = "webm" if audio_file.filename.endswith('.webm') else "wav"
|
|
# debug_filename = f"debug_audio/audio_google_{audio_file.filename.replace('.wav', '.webm')}"
|
|
# os.makedirs("debug_audio", exist_ok=True)
|
|
# with open(debug_filename, "wb") as f:
|
|
# f.write(audio_content)
|
|
# logger.info(f"💾 Saved audio file for debugging: {debug_filename}")
|
|
|
|
# Get voice interface
|
|
voiceInterface = _getVoiceInterface(currentUser)
|
|
|
|
# Validate audio format
|
|
validation = voiceInterface.validateAudioFormat(audioContent)
|
|
|
|
if not validation["valid"]:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail=f"Invalid audio format: {validation.get('error', 'Unknown error')}"
|
|
)
|
|
|
|
# Perform complete pipeline: Speech-to-Text + Translation
|
|
result = await voiceInterface.speechToTranslatedText(
|
|
audioContent=audioContent,
|
|
fromLanguage=fromLanguage,
|
|
toLanguage=toLanguage
|
|
)
|
|
|
|
if result["success"]:
|
|
logger.info(f"✅ Real-time interpreter successful:")
|
|
logger.info(f" Original: '{result['original_text']}'")
|
|
logger.info(f" Translated: '{result['translated_text']}'")
|
|
|
|
return {
|
|
"success": True,
|
|
"original_text": result["original_text"],
|
|
"translated_text": result["translated_text"],
|
|
"confidence": result["confidence"],
|
|
"source_language": result["source_language"],
|
|
"target_language": result["target_language"],
|
|
"audio_info": {
|
|
"size": len(audioContent),
|
|
"format": validation["format"],
|
|
"estimated_duration": validation.get("estimated_duration", 0)
|
|
}
|
|
}
|
|
else:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail=f"Real-time interpreter failed: {result.get('error', 'Unknown error')}"
|
|
)
|
|
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"❌ Real-time interpreter error: {e}")
|
|
raise HTTPException(
|
|
status_code=500,
|
|
detail=f"Real-time interpreter processing failed: {str(e)}"
|
|
)
|
|
|
|
|
|
@router.post("/text-to-speech")
|
|
async def text_to_speech(
|
|
text: str = Form(...),
|
|
language: str = Form("de-DE"),
|
|
voice: str = Form(None),
|
|
currentUser: User = Depends(getCurrentUser)
|
|
):
|
|
"""Convert text to speech using Google Cloud Text-to-Speech."""
|
|
try:
|
|
logger.info(f"Text-to-Speech request: '{text[:50]}...' in {language}")
|
|
|
|
if not text.strip():
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail="Empty text provided for text-to-speech"
|
|
)
|
|
|
|
voiceInterface = _getVoiceInterface(currentUser)
|
|
result = await voiceInterface.textToSpeech(
|
|
text=text,
|
|
languageCode=language,
|
|
voiceName=voice
|
|
)
|
|
|
|
if result["success"]:
|
|
return Response(
|
|
content=result["audio_content"],
|
|
media_type="audio/mpeg",
|
|
headers={
|
|
"Content-Disposition": "attachment; filename=speech.mp3",
|
|
"X-Voice-Name": result["voice_name"],
|
|
"X-Language-Code": result["language_code"]
|
|
}
|
|
)
|
|
else:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail=f"Text-to-Speech failed: {result.get('error', 'Unknown error')}"
|
|
)
|
|
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"Text-to-Speech error: {e}")
|
|
raise HTTPException(
|
|
status_code=500,
|
|
detail=f"Text-to-Speech processing failed: {str(e)}"
|
|
)
|
|
|
|
@router.get("/languages")
|
|
async def get_available_languages(currentUser: User = Depends(getCurrentUser)):
|
|
"""Get available languages from Google Cloud Text-to-Speech."""
|
|
try:
|
|
logger.info("🌐 Getting available languages from Google Cloud TTS")
|
|
|
|
voiceInterface = _getVoiceInterface(currentUser)
|
|
result = await voiceInterface.getAvailableLanguages()
|
|
|
|
if result["success"]:
|
|
return {
|
|
"success": True,
|
|
"languages": result["languages"]
|
|
}
|
|
else:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail=f"Failed to get languages: {result.get('error', 'Unknown error')}"
|
|
)
|
|
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"❌ Get languages error: {e}")
|
|
raise HTTPException(
|
|
status_code=500,
|
|
detail=f"Failed to get available languages: {str(e)}"
|
|
)
|
|
|
|
@router.get("/voices")
|
|
async def get_available_voices(
|
|
languageCode: Optional[str] = None,
|
|
language_code: Optional[str] = None, # Accept both camelCase and snake_case
|
|
currentUser: User = Depends(getCurrentUser)
|
|
):
|
|
"""
|
|
Get available voices from Google Cloud Text-to-Speech.
|
|
Accepts languageCode (camelCase) or language_code (snake_case) query parameter.
|
|
"""
|
|
# Use language_code if provided (frontend sends this), otherwise use languageCode
|
|
if language_code:
|
|
languageCode = language_code
|
|
|
|
try:
|
|
logger.info(f"🎤 Getting available voices, language filter: {languageCode}")
|
|
|
|
voiceInterface = _getVoiceInterface(currentUser)
|
|
result = await voiceInterface.getAvailableVoices(languageCode=languageCode)
|
|
|
|
if result["success"]:
|
|
return {
|
|
"success": True,
|
|
"voices": result["voices"],
|
|
"language_filter": languageCode
|
|
}
|
|
else:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail=f"Failed to get voices: {result.get('error', 'Unknown error')}"
|
|
)
|
|
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"❌ Get voices error: {e}")
|
|
raise HTTPException(
|
|
status_code=500,
|
|
detail=f"Failed to get available voices: {str(e)}"
|
|
)
|
|
|
|
@router.get("/health")
|
|
async def health_check(currentUser: User = Depends(getCurrentUser)):
|
|
"""Health check for Google Cloud voice services."""
|
|
try:
|
|
voiceInterface = _getVoiceInterface(currentUser)
|
|
test_result = await voiceInterface.healthCheck()
|
|
|
|
return test_result
|
|
|
|
except Exception as e:
|
|
logger.error(f"❌ Health check failed: {e}")
|
|
return {
|
|
"status": "unhealthy",
|
|
"error": str(e)
|
|
}
|
|
|
|
@router.get("/settings")
|
|
async def get_voice_settings(currentUser: User = Depends(getCurrentUser)):
|
|
"""Get voice settings for the current user."""
|
|
try:
|
|
logger.info(f"Getting voice settings for user: {currentUser.id}")
|
|
|
|
# Get voice interface
|
|
voiceInterface = _getVoiceInterface(currentUser)
|
|
|
|
# Get or create voice settings for the user
|
|
voice_settings = voiceInterface.getOrCreateVoiceSettings(currentUser.id)
|
|
|
|
if voice_settings:
|
|
# Return user settings
|
|
return {
|
|
"success": True,
|
|
"data": {
|
|
"user_settings": voice_settings.model_dump(),
|
|
"default_settings": {
|
|
"sttLanguage": "de-DE",
|
|
"ttsLanguage": "de-DE",
|
|
"ttsVoice": "de-DE-Wavenet-A",
|
|
"translationEnabled": True,
|
|
"targetLanguage": "en-US"
|
|
}
|
|
}
|
|
}
|
|
else:
|
|
# Fallback to default settings if database fails
|
|
logger.warning("Failed to get voice settings from database, using defaults")
|
|
return {
|
|
"success": True,
|
|
"data": {
|
|
"user_settings": None,
|
|
"default_settings": {
|
|
"sttLanguage": "de-DE",
|
|
"ttsLanguage": "de-DE",
|
|
"ttsVoice": "de-DE-Wavenet-A",
|
|
"translationEnabled": True,
|
|
"targetLanguage": "en-US"
|
|
}
|
|
}
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error getting voice settings: {e}")
|
|
raise HTTPException(
|
|
status_code=500,
|
|
detail=f"Failed to get voice settings: {str(e)}"
|
|
)
|
|
|
|
@router.post("/settings")
|
|
async def save_voice_settings(
|
|
settings: Dict[str, Any] = Body(...),
|
|
currentUser: User = Depends(getCurrentUser)
|
|
):
|
|
"""Save voice settings for the current user."""
|
|
try:
|
|
logger.info(f"Saving voice settings for user: {currentUser.id}")
|
|
logger.info(f"Settings: {settings}")
|
|
|
|
# Validate required settings
|
|
requiredFields = ["sttLanguage", "ttsLanguage", "ttsVoice"]
|
|
for field in requiredFields:
|
|
if field not in settings:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail=f"Missing required field: {field}"
|
|
)
|
|
|
|
# Set default values for optional fields if not provided
|
|
if "translationEnabled" not in settings:
|
|
settings["translationEnabled"] = True
|
|
if "targetLanguage" not in settings:
|
|
settings["targetLanguage"] = "en-US"
|
|
|
|
# Get voice interface
|
|
voiceInterface = _getVoiceInterface(currentUser)
|
|
|
|
# Check if settings already exist for this user
|
|
existing_settings = voiceInterface.getVoiceSettings(currentUser.id)
|
|
|
|
if existing_settings:
|
|
# Update existing settings
|
|
logger.info(f"Updating existing voice settings for user {currentUser.id}")
|
|
updated_settings = voiceInterface.updateVoiceSettings(currentUser.id, settings)
|
|
logger.info(f"Voice settings updated for user {currentUser.id}: {updated_settings}")
|
|
else:
|
|
# Create new settings
|
|
logger.info(f"Creating new voice settings for user {currentUser.id}")
|
|
# Add userId to settings
|
|
settings["userId"] = currentUser.id
|
|
created_settings = voiceInterface.createVoiceSettings(settings)
|
|
logger.info(f"Voice settings created for user {currentUser.id}: {created_settings}")
|
|
|
|
return {
|
|
"success": True,
|
|
"message": "Voice settings saved successfully",
|
|
"data": settings
|
|
}
|
|
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"Error saving voice settings: {e}")
|
|
raise HTTPException(
|
|
status_code=500,
|
|
detail=f"Failed to save voice settings: {str(e)}"
|
|
)
|
|
|
|
# WebSocket endpoints for real-time voice streaming
|
|
|
|
@router.websocket("/ws/realtime-interpreter")
|
|
async def websocket_realtime_interpreter(
|
|
websocket: WebSocket,
|
|
userId: str = "default",
|
|
fromLanguage: str = "de-DE",
|
|
toLanguage: str = "en-US"
|
|
):
|
|
"""WebSocket endpoint for real-time voice interpretation"""
|
|
connectionId = f"realtime_{userId}_{fromLanguage}_{toLanguage}"
|
|
|
|
try:
|
|
await manager.connect(websocket, connectionId)
|
|
|
|
# Send connection confirmation
|
|
await manager.sendPersonalMessage({
|
|
"type": "connected",
|
|
"connection_id": connectionId,
|
|
"message": "Connected to real-time interpreter"
|
|
}, websocket)
|
|
|
|
# Initialize voice interface
|
|
voiceInterface = _getVoiceInterface(User(id=userId))
|
|
|
|
while True:
|
|
# Receive message from client
|
|
data = await websocket.receive_text()
|
|
message = json.loads(data)
|
|
|
|
if message["type"] == "audio_chunk":
|
|
# Process audio chunk
|
|
try:
|
|
# Decode base64 audio data
|
|
audioData = base64.b64decode(message["data"])
|
|
|
|
# For now, just acknowledge receipt
|
|
# In a full implementation, this would:
|
|
# 1. Buffer audio chunks
|
|
# 2. Process with Google Cloud Speech-to-Text streaming
|
|
# 3. Send partial results back
|
|
# 4. Handle translation
|
|
|
|
await manager.sendPersonalMessage({
|
|
"type": "audio_received",
|
|
"chunk_size": len(audioData),
|
|
"timestamp": message.get("timestamp")
|
|
}, websocket)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error processing audio chunk: {e}")
|
|
await manager.send_personal_message({
|
|
"type": "error",
|
|
"error": f"Failed to process audio: {str(e)}"
|
|
}, websocket)
|
|
|
|
elif message["type"] == "ping":
|
|
# Respond to ping
|
|
await manager.sendPersonalMessage({
|
|
"type": "pong",
|
|
"timestamp": message.get("timestamp")
|
|
}, websocket)
|
|
|
|
else:
|
|
logger.warning(f"Unknown message type: {message['type']}")
|
|
|
|
except WebSocketDisconnect:
|
|
manager.disconnect(websocket, connectionId)
|
|
logger.info(f"Client disconnected: {connectionId}")
|
|
except Exception as e:
|
|
logger.error(f"WebSocket error: {e}")
|
|
manager.disconnect(websocket, connectionId)
|
|
|
|
@router.websocket("/ws/speech-to-text")
|
|
async def websocket_speech_to_text(
|
|
websocket: WebSocket,
|
|
userId: str = "default",
|
|
language: str = "de-DE"
|
|
):
|
|
"""WebSocket endpoint for real-time speech-to-text"""
|
|
connectionId = f"stt_{userId}_{language}"
|
|
|
|
try:
|
|
await manager.connect(websocket, connectionId)
|
|
|
|
await manager.sendPersonalMessage({
|
|
"type": "connected",
|
|
"connection_id": connectionId,
|
|
"message": "Connected to speech-to-text"
|
|
}, websocket)
|
|
|
|
# Initialize voice interface
|
|
voiceInterface = _getVoiceInterface(User(id=userId))
|
|
|
|
while True:
|
|
data = await websocket.receive_text()
|
|
message = json.loads(data)
|
|
|
|
if message["type"] == "audio_chunk":
|
|
try:
|
|
audioData = base64.b64decode(message["data"])
|
|
|
|
# Process audio chunk
|
|
# This would integrate with Google Cloud Speech-to-Text streaming API
|
|
|
|
await manager.sendPersonalMessage({
|
|
"type": "transcription_result",
|
|
"text": "Audio chunk received", # Placeholder
|
|
"confidence": 0.95,
|
|
"is_final": False
|
|
}, websocket)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error processing audio: {e}")
|
|
await manager.sendPersonalMessage({
|
|
"type": "error",
|
|
"error": f"Failed to process audio: {str(e)}"
|
|
}, websocket)
|
|
|
|
elif message["type"] == "ping":
|
|
await manager.sendPersonalMessage({
|
|
"type": "pong",
|
|
"timestamp": message.get("timestamp")
|
|
}, websocket)
|
|
|
|
except WebSocketDisconnect:
|
|
manager.disconnect(websocket, connectionId)
|
|
except Exception as e:
|
|
logger.error(f"WebSocket error: {e}")
|
|
manager.disconnect(websocket, connectionId)
|
|
|
|
@router.websocket("/ws/text-to-speech")
|
|
async def websocket_text_to_speech(
|
|
websocket: WebSocket,
|
|
userId: str = "default",
|
|
language: str = "de-DE",
|
|
voice: str = "de-DE-Wavenet-A"
|
|
):
|
|
"""WebSocket endpoint for real-time text-to-speech"""
|
|
connectionId = f"tts_{userId}_{language}_{voice}"
|
|
|
|
try:
|
|
await manager.connect(websocket, connectionId)
|
|
|
|
await manager.sendPersonalMessage({
|
|
"type": "connected",
|
|
"connection_id": connectionId,
|
|
"message": "Connected to text-to-speech"
|
|
}, websocket)
|
|
|
|
while True:
|
|
data = await websocket.receive_text()
|
|
message = json.loads(data)
|
|
|
|
if message["type"] == "text_to_speak":
|
|
try:
|
|
text = message["text"]
|
|
|
|
# Process text-to-speech
|
|
# This would integrate with Google Cloud Text-to-Speech API
|
|
|
|
# For now, send a placeholder response
|
|
await manager.sendPersonalMessage({
|
|
"type": "audio_data",
|
|
"audio": "base64_encoded_audio_here", # Placeholder
|
|
"format": "mp3"
|
|
}, websocket)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error processing text-to-speech: {e}")
|
|
await manager.sendPersonalMessage({
|
|
"type": "error",
|
|
"error": f"Failed to process text: {str(e)}"
|
|
}, websocket)
|
|
|
|
elif message["type"] == "ping":
|
|
await manager.sendPersonalMessage({
|
|
"type": "pong",
|
|
"timestamp": message.get("timestamp")
|
|
}, websocket)
|
|
|
|
except WebSocketDisconnect:
|
|
manager.disconnect(websocket, connectionId)
|
|
except Exception as e:
|
|
logger.error(f"WebSocket error: {e}")
|
|
manager.disconnect(websocket, connectionId)
|