gateway/modules/routes/routeVoiceStreaming.py
2025-09-15 00:36:26 +02:00

231 lines
8.5 KiB
Python

"""
Voice Streaming WebSocket Routes
Provides real-time audio streaming for voice services
"""
from fastapi import APIRouter, WebSocket, WebSocketDisconnect, Depends
from fastapi.responses import JSONResponse
import logging
import json
import base64
import asyncio
from typing import Dict, List
from modules.shared.configuration import APP_CONFIG
from modules.connectors.connectorGoogleSpeech import ConnectorGoogleSpeech
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/api/voice/ws", tags=["Voice Streaming"])
# Store active connections
active_connections: Dict[str, WebSocket] = {}
class ConnectionManager:
def __init__(self):
self.active_connections: List[WebSocket] = []
async def connect(self, websocket: WebSocket, connection_id: str):
await websocket.accept()
self.active_connections.append(websocket)
active_connections[connection_id] = websocket
logger.info(f"WebSocket connected: {connection_id}")
def disconnect(self, websocket: WebSocket, connection_id: str):
if websocket in self.active_connections:
self.active_connections.remove(websocket)
if connection_id in active_connections:
del active_connections[connection_id]
logger.info(f"WebSocket disconnected: {connection_id}")
async def send_personal_message(self, message: dict, websocket: WebSocket):
try:
await websocket.send_text(json.dumps(message))
except Exception as e:
logger.error(f"Error sending message: {e}")
manager = ConnectionManager()
@router.websocket("/realtime-interpreter")
async def websocket_realtime_interpreter(
websocket: WebSocket,
user_id: str = "default",
from_language: str = "de-DE",
to_language: str = "en-US"
):
"""WebSocket endpoint for real-time voice interpretation"""
connection_id = f"realtime_{user_id}_{from_language}_{to_language}"
try:
await manager.connect(websocket, connection_id)
# Send connection confirmation
await manager.send_personal_message({
"type": "connected",
"connection_id": connection_id,
"message": "Connected to real-time interpreter"
}, websocket)
# Initialize Google Speech connector
google_speech = ConnectorGoogleSpeech()
while True:
# Receive message from client
data = await websocket.receive_text()
message = json.loads(data)
if message["type"] == "audio_chunk":
# Process audio chunk
try:
# Decode base64 audio data
audio_data = base64.b64decode(message["data"])
# For now, just acknowledge receipt
# In a full implementation, this would:
# 1. Buffer audio chunks
# 2. Process with Google Cloud Speech-to-Text streaming
# 3. Send partial results back
# 4. Handle translation
await manager.send_personal_message({
"type": "audio_received",
"chunk_size": len(audio_data),
"timestamp": message.get("timestamp")
}, websocket)
except Exception as e:
logger.error(f"Error processing audio chunk: {e}")
await manager.send_personal_message({
"type": "error",
"error": f"Failed to process audio: {str(e)}"
}, websocket)
elif message["type"] == "ping":
# Respond to ping
await manager.send_personal_message({
"type": "pong",
"timestamp": message.get("timestamp")
}, websocket)
else:
logger.warning(f"Unknown message type: {message['type']}")
except WebSocketDisconnect:
manager.disconnect(websocket, connection_id)
logger.info(f"Client disconnected: {connection_id}")
except Exception as e:
logger.error(f"WebSocket error: {e}")
manager.disconnect(websocket, connection_id)
@router.websocket("/speech-to-text")
async def websocket_speech_to_text(
websocket: WebSocket,
user_id: str = "default",
language: str = "de-DE"
):
"""WebSocket endpoint for real-time speech-to-text"""
connection_id = f"stt_{user_id}_{language}"
try:
await manager.connect(websocket, connection_id)
await manager.send_personal_message({
"type": "connected",
"connection_id": connection_id,
"message": "Connected to speech-to-text"
}, websocket)
# Initialize Google Speech connector
google_speech = ConnectorGoogleSpeech()
while True:
data = await websocket.receive_text()
message = json.loads(data)
if message["type"] == "audio_chunk":
try:
audio_data = base64.b64decode(message["data"])
# Process audio chunk
# This would integrate with Google Cloud Speech-to-Text streaming API
await manager.send_personal_message({
"type": "transcription_result",
"text": "Audio chunk received", # Placeholder
"confidence": 0.95,
"is_final": False
}, websocket)
except Exception as e:
logger.error(f"Error processing audio: {e}")
await manager.send_personal_message({
"type": "error",
"error": f"Failed to process audio: {str(e)}"
}, websocket)
elif message["type"] == "ping":
await manager.send_personal_message({
"type": "pong",
"timestamp": message.get("timestamp")
}, websocket)
except WebSocketDisconnect:
manager.disconnect(websocket, connection_id)
except Exception as e:
logger.error(f"WebSocket error: {e}")
manager.disconnect(websocket, connection_id)
@router.websocket("/text-to-speech")
async def websocket_text_to_speech(
websocket: WebSocket,
user_id: str = "default",
language: str = "de-DE",
voice: str = "de-DE-Wavenet-A"
):
"""WebSocket endpoint for real-time text-to-speech"""
connection_id = f"tts_{user_id}_{language}_{voice}"
try:
await manager.connect(websocket, connection_id)
await manager.send_personal_message({
"type": "connected",
"connection_id": connection_id,
"message": "Connected to text-to-speech"
}, websocket)
while True:
data = await websocket.receive_text()
message = json.loads(data)
if message["type"] == "text_to_speak":
try:
text = message["text"]
# Process text-to-speech
# This would integrate with Google Cloud Text-to-Speech API
# For now, send a placeholder response
await manager.send_personal_message({
"type": "audio_data",
"audio": "base64_encoded_audio_here", # Placeholder
"format": "mp3"
}, websocket)
except Exception as e:
logger.error(f"Error processing text-to-speech: {e}")
await manager.send_personal_message({
"type": "error",
"error": f"Failed to process text: {str(e)}"
}, websocket)
elif message["type"] == "ping":
await manager.send_personal_message({
"type": "pong",
"timestamp": message.get("timestamp")
}, websocket)
except WebSocketDisconnect:
manager.disconnect(websocket, connection_id)
except Exception as e:
logger.error(f"WebSocket error: {e}")
manager.disconnect(websocket, connection_id)