""" Voice Streaming WebSocket Routes Provides real-time audio streaming for voice services """ from fastapi import APIRouter, WebSocket, WebSocketDisconnect, Depends from fastapi.responses import JSONResponse import logging import json import base64 import asyncio from typing import Dict, List from modules.shared.configuration import APP_CONFIG from modules.connectors.connectorGoogleSpeech import ConnectorGoogleSpeech logger = logging.getLogger(__name__) router = APIRouter(prefix="/api/voice/ws", tags=["Voice Streaming"]) # Store active connections active_connections: Dict[str, WebSocket] = {} class ConnectionManager: def __init__(self): self.active_connections: List[WebSocket] = [] async def connect(self, websocket: WebSocket, connection_id: str): await websocket.accept() self.active_connections.append(websocket) active_connections[connection_id] = websocket logger.info(f"WebSocket connected: {connection_id}") def disconnect(self, websocket: WebSocket, connection_id: str): if websocket in self.active_connections: self.active_connections.remove(websocket) if connection_id in active_connections: del active_connections[connection_id] logger.info(f"WebSocket disconnected: {connection_id}") async def send_personal_message(self, message: dict, websocket: WebSocket): try: await websocket.send_text(json.dumps(message)) except Exception as e: logger.error(f"Error sending message: {e}") manager = ConnectionManager() @router.websocket("/realtime-interpreter") async def websocket_realtime_interpreter( websocket: WebSocket, user_id: str = "default", from_language: str = "de-DE", to_language: str = "en-US" ): """WebSocket endpoint for real-time voice interpretation""" connection_id = f"realtime_{user_id}_{from_language}_{to_language}" try: await manager.connect(websocket, connection_id) # Send connection confirmation await manager.send_personal_message({ "type": "connected", "connection_id": connection_id, "message": "Connected to real-time interpreter" }, websocket) # Initialize Google Speech connector google_speech = ConnectorGoogleSpeech() while True: # Receive message from client data = await websocket.receive_text() message = json.loads(data) if message["type"] == "audio_chunk": # Process audio chunk try: # Decode base64 audio data audio_data = base64.b64decode(message["data"]) # For now, just acknowledge receipt # In a full implementation, this would: # 1. Buffer audio chunks # 2. Process with Google Cloud Speech-to-Text streaming # 3. Send partial results back # 4. Handle translation await manager.send_personal_message({ "type": "audio_received", "chunk_size": len(audio_data), "timestamp": message.get("timestamp") }, websocket) except Exception as e: logger.error(f"Error processing audio chunk: {e}") await manager.send_personal_message({ "type": "error", "error": f"Failed to process audio: {str(e)}" }, websocket) elif message["type"] == "ping": # Respond to ping await manager.send_personal_message({ "type": "pong", "timestamp": message.get("timestamp") }, websocket) else: logger.warning(f"Unknown message type: {message['type']}") except WebSocketDisconnect: manager.disconnect(websocket, connection_id) logger.info(f"Client disconnected: {connection_id}") except Exception as e: logger.error(f"WebSocket error: {e}") manager.disconnect(websocket, connection_id) @router.websocket("/speech-to-text") async def websocket_speech_to_text( websocket: WebSocket, user_id: str = "default", language: str = "de-DE" ): """WebSocket endpoint for real-time speech-to-text""" connection_id = f"stt_{user_id}_{language}" try: await manager.connect(websocket, connection_id) await manager.send_personal_message({ "type": "connected", "connection_id": connection_id, "message": "Connected to speech-to-text" }, websocket) # Initialize Google Speech connector google_speech = ConnectorGoogleSpeech() while True: data = await websocket.receive_text() message = json.loads(data) if message["type"] == "audio_chunk": try: audio_data = base64.b64decode(message["data"]) # Process audio chunk # This would integrate with Google Cloud Speech-to-Text streaming API await manager.send_personal_message({ "type": "transcription_result", "text": "Audio chunk received", # Placeholder "confidence": 0.95, "is_final": False }, websocket) except Exception as e: logger.error(f"Error processing audio: {e}") await manager.send_personal_message({ "type": "error", "error": f"Failed to process audio: {str(e)}" }, websocket) elif message["type"] == "ping": await manager.send_personal_message({ "type": "pong", "timestamp": message.get("timestamp") }, websocket) except WebSocketDisconnect: manager.disconnect(websocket, connection_id) except Exception as e: logger.error(f"WebSocket error: {e}") manager.disconnect(websocket, connection_id) @router.websocket("/text-to-speech") async def websocket_text_to_speech( websocket: WebSocket, user_id: str = "default", language: str = "de-DE", voice: str = "de-DE-Wavenet-A" ): """WebSocket endpoint for real-time text-to-speech""" connection_id = f"tts_{user_id}_{language}_{voice}" try: await manager.connect(websocket, connection_id) await manager.send_personal_message({ "type": "connected", "connection_id": connection_id, "message": "Connected to text-to-speech" }, websocket) while True: data = await websocket.receive_text() message = json.loads(data) if message["type"] == "text_to_speak": try: text = message["text"] # Process text-to-speech # This would integrate with Google Cloud Text-to-Speech API # For now, send a placeholder response await manager.send_personal_message({ "type": "audio_data", "audio": "base64_encoded_audio_here", # Placeholder "format": "mp3" }, websocket) except Exception as e: logger.error(f"Error processing text-to-speech: {e}") await manager.send_personal_message({ "type": "error", "error": f"Failed to process text: {str(e)}" }, websocket) elif message["type"] == "ping": await manager.send_personal_message({ "type": "pong", "timestamp": message.get("timestamp") }, websocket) except WebSocketDisconnect: manager.disconnect(websocket, connection_id) except Exception as e: logger.error(f"WebSocket error: {e}") manager.disconnect(websocket, connection_id)