682 lines
25 KiB
Python
682 lines
25 KiB
Python
# Copyright (c) 2025 Patrick Motsch
|
|
# All rights reserved.
|
|
"""
|
|
Google Cloud Voice Services Routes
|
|
Replaces Azure voice services with Google Cloud Speech-to-Text and Translation
|
|
Includes WebSocket support for real-time voice streaming
|
|
"""
|
|
|
|
import asyncio
|
|
import logging
|
|
import json
|
|
import base64
|
|
import secrets
|
|
import time
|
|
from fastapi import APIRouter, File, Form, UploadFile, Depends, HTTPException, Body, Query, Request, WebSocket, WebSocketDisconnect
|
|
from fastapi.responses import Response
|
|
from typing import Optional, Dict, Any, List
|
|
from modules.auth import getCurrentUser, getRequestContext, RequestContext, limiter
|
|
from modules.datamodels.datamodelUam import User
|
|
from modules.interfaces.interfaceVoiceObjects import getVoiceInterface, VoiceObjects
|
|
from modules.shared.i18nRegistry import apiRouteContext
|
|
routeApiMsg = apiRouteContext("routeVoiceGoogle")
|
|
|
|
logger = logging.getLogger(__name__)
|
|
router = APIRouter(prefix="/voice-google", tags=["Voice Google"])
|
|
|
|
# Store active WebSocket connections
|
|
activeConnections: Dict[str, WebSocket] = {}
|
|
|
|
class ConnectionManager:
|
|
def __init__(self):
|
|
self.activeConnections: List[WebSocket] = []
|
|
|
|
async def connect(self, websocket: WebSocket, connectionId: str):
|
|
await websocket.accept()
|
|
self.activeConnections.append(websocket)
|
|
activeConnections[connectionId] = websocket
|
|
logger.info(f"WebSocket connected: {connectionId}")
|
|
|
|
def disconnect(self, websocket: WebSocket, connectionId: str):
|
|
if websocket in self.activeConnections:
|
|
self.activeConnections.remove(websocket)
|
|
if connectionId in activeConnections:
|
|
del activeConnections[connectionId]
|
|
logger.info(f"WebSocket disconnected: {connectionId}")
|
|
|
|
async def send_personal_message(self, message: dict, websocket: WebSocket):
|
|
try:
|
|
await websocket.send_text(json.dumps(message))
|
|
except Exception as e:
|
|
logger.error(f"Error sending message: {e}")
|
|
|
|
manager = ConnectionManager()
|
|
|
|
def _getVoiceInterface(currentUser: User) -> VoiceObjects:
|
|
"""Get voice interface instance with user context."""
|
|
try:
|
|
return getVoiceInterface(currentUser)
|
|
except Exception as e:
|
|
logger.error(f"Failed to initialize voice interface: {e}")
|
|
raise HTTPException(
|
|
status_code=500,
|
|
detail=f"Failed to initialize voice interface: {str(e)}"
|
|
)
|
|
|
|
@router.post("/speech-to-text")
|
|
async def speech_to_text(
|
|
audioFile: UploadFile = File(...),
|
|
language: str = Form("de-DE"),
|
|
currentUser: User = Depends(getCurrentUser)
|
|
):
|
|
"""Convert speech to text using Google Cloud Speech-to-Text API."""
|
|
try:
|
|
logger.info(f"🎤 Speech-to-text request: {audioFile.filename}, language: {language}")
|
|
|
|
# Read audio file
|
|
audioContent = await audioFile.read()
|
|
logger.info(f"📊 Audio file size: {len(audioContent)} bytes")
|
|
|
|
# Get voice interface
|
|
voiceInterface = _getVoiceInterface(currentUser)
|
|
|
|
# Validate audio format
|
|
validation = voiceInterface.validateAudioFormat(audioContent)
|
|
|
|
if not validation["valid"]:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail=f"Invalid audio format: {validation.get('error', 'Unknown error')}"
|
|
)
|
|
|
|
# Perform speech recognition
|
|
result = await voiceInterface.speechToText(
|
|
audioContent=audioContent,
|
|
language=language
|
|
)
|
|
|
|
if result["success"]:
|
|
return {
|
|
"success": True,
|
|
"text": result["text"],
|
|
"confidence": result["confidence"],
|
|
"language": result["language"],
|
|
"audio_info": {
|
|
"size": len(audioContent),
|
|
"format": validation["format"],
|
|
"estimated_duration": validation.get("estimated_duration", 0)
|
|
}
|
|
}
|
|
else:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail=f"Speech recognition failed: {result.get('error', 'Unknown error')}"
|
|
)
|
|
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"❌ Speech-to-text error: {e}")
|
|
raise HTTPException(
|
|
status_code=500,
|
|
detail=f"Speech-to-text processing failed: {str(e)}"
|
|
)
|
|
|
|
@router.post("/detect-language")
|
|
async def detect_language(
|
|
text: str = Form(...),
|
|
currentUser: User = Depends(getCurrentUser)
|
|
):
|
|
"""Detect the language of text using Google Cloud Translation API."""
|
|
try:
|
|
logger.info(f"🔍 Language detection request: '{text[:100]}...'")
|
|
|
|
if not text.strip():
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail=routeApiMsg("Empty text provided for language detection")
|
|
)
|
|
|
|
# Get voice interface
|
|
voiceInterface = _getVoiceInterface(currentUser)
|
|
|
|
# Perform language detection
|
|
result = await voiceInterface.detectLanguage(text)
|
|
|
|
if result["success"]:
|
|
return {
|
|
"success": True,
|
|
"language": result["language"],
|
|
"confidence": result.get("confidence", 1.0)
|
|
}
|
|
else:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail=f"Language detection failed: {result.get('error', 'Unknown error')}"
|
|
)
|
|
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"❌ Language detection error: {e}")
|
|
raise HTTPException(
|
|
status_code=500,
|
|
detail=f"Language detection processing failed: {str(e)}"
|
|
)
|
|
|
|
@router.post("/translate")
|
|
async def translate_text(
|
|
text: str = Form(...),
|
|
sourceLanguage: str = Form("de"),
|
|
targetLanguage: str = Form("en"),
|
|
currentUser: User = Depends(getCurrentUser)
|
|
):
|
|
"""Translate text using Google Cloud Translation API."""
|
|
try:
|
|
logger.info(f"🌐 Translation request: '{text}' ({sourceLanguage} -> {targetLanguage})")
|
|
|
|
if not text.strip():
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail=routeApiMsg("Empty text provided for translation")
|
|
)
|
|
|
|
# Get voice interface
|
|
voiceInterface = _getVoiceInterface(currentUser)
|
|
|
|
# Perform translation
|
|
result = await voiceInterface.translateText(
|
|
text=text,
|
|
sourceLanguage=sourceLanguage,
|
|
targetLanguage=targetLanguage
|
|
)
|
|
|
|
if result["success"]:
|
|
return {
|
|
"success": True,
|
|
"original_text": result["original_text"],
|
|
"translated_text": result["translated_text"],
|
|
"source_language": result["source_language"],
|
|
"target_language": result["target_language"]
|
|
}
|
|
else:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail=f"Translation failed: {result.get('error', 'Unknown error')}"
|
|
)
|
|
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"❌ Translation error: {e}")
|
|
raise HTTPException(
|
|
status_code=500,
|
|
detail=f"Translation processing failed: {str(e)}"
|
|
)
|
|
|
|
@router.post("/realtime-interpreter")
|
|
async def realtime_interpreter(
|
|
audioFile: UploadFile = File(...),
|
|
fromLanguage: str = Form("de-DE"),
|
|
toLanguage: str = Form("en-US"),
|
|
connectionId: str = Form(None),
|
|
currentUser: User = Depends(getCurrentUser)
|
|
):
|
|
"""Real-time interpreter: speech to translated text using Google Cloud APIs."""
|
|
try:
|
|
logger.info(f"🔄 Real-time interpreter request: {audioFile.filename}")
|
|
logger.info(f" From: {fromLanguage} -> To: {toLanguage}")
|
|
logger.info(f" MIME type: {audioFile.content_type}")
|
|
|
|
# Read audio file
|
|
audioContent = await audioFile.read()
|
|
logger.info(f"📊 Audio file size: {len(audioContent)} bytes")
|
|
|
|
# Save audio file for debugging with correct extension
|
|
# file_extension = "webm" if audio_file.filename.endswith('.webm') else "wav"
|
|
# debug_filename = f"debug_audio/audio_google_{audio_file.filename.replace('.wav', '.webm')}"
|
|
# os.makedirs("debug_audio", exist_ok=True)
|
|
# with open(debug_filename, "wb") as f:
|
|
# f.write(audio_content)
|
|
# logger.info(f"💾 Saved audio file for debugging: {debug_filename}")
|
|
|
|
# Get voice interface
|
|
voiceInterface = _getVoiceInterface(currentUser)
|
|
|
|
# Validate audio format
|
|
validation = voiceInterface.validateAudioFormat(audioContent)
|
|
|
|
if not validation["valid"]:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail=f"Invalid audio format: {validation.get('error', 'Unknown error')}"
|
|
)
|
|
|
|
# Perform complete pipeline: Speech-to-Text + Translation
|
|
result = await voiceInterface.speechToTranslatedText(
|
|
audioContent=audioContent,
|
|
fromLanguage=fromLanguage,
|
|
toLanguage=toLanguage
|
|
)
|
|
|
|
if result["success"]:
|
|
logger.info(f"✅ Real-time interpreter successful:")
|
|
logger.info(f" Original: '{result['original_text']}'")
|
|
logger.info(f" Translated: '{result['translated_text']}'")
|
|
|
|
return {
|
|
"success": True,
|
|
"original_text": result["original_text"],
|
|
"translated_text": result["translated_text"],
|
|
"confidence": result["confidence"],
|
|
"source_language": result["source_language"],
|
|
"target_language": result["target_language"],
|
|
"audio_info": {
|
|
"size": len(audioContent),
|
|
"format": validation["format"],
|
|
"estimated_duration": validation.get("estimated_duration", 0)
|
|
}
|
|
}
|
|
else:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail=f"Real-time interpreter failed: {result.get('error', 'Unknown error')}"
|
|
)
|
|
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"❌ Real-time interpreter error: {e}")
|
|
raise HTTPException(
|
|
status_code=500,
|
|
detail=f"Real-time interpreter processing failed: {str(e)}"
|
|
)
|
|
|
|
|
|
@router.post("/text-to-speech")
|
|
async def text_to_speech(
|
|
request: Request,
|
|
text: str = Form(...),
|
|
language: str = Form("de-DE"),
|
|
voice: str = Form(None),
|
|
context: RequestContext = Depends(getRequestContext),
|
|
):
|
|
"""Convert text to speech using Google Cloud Text-to-Speech."""
|
|
try:
|
|
logger.info(f"Text-to-Speech request: '{text[:50]}...' in {language}")
|
|
|
|
if not text.strip():
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail=routeApiMsg("Empty text provided for text-to-speech")
|
|
)
|
|
|
|
mandateId = str(getattr(context, "mandateId", "") or "")
|
|
voiceInterface = getVoiceInterface(context.user, mandateId)
|
|
try:
|
|
from modules.serviceCenter.services.serviceBilling.mainServiceBilling import getService as getBillingService
|
|
billingService = getBillingService(context.user, mandateId)
|
|
def _billingCb(data):
|
|
priceCHF = data.get("priceCHF", 0.0)
|
|
operation = data.get("operation", "voice")
|
|
if priceCHF > 0:
|
|
billingService.recordUsage(priceCHF=priceCHF, aicoreProvider="google-voice", aicoreModel=operation, description=f"Voice {operation}")
|
|
voiceInterface.billingCallback = _billingCb
|
|
except Exception as e:
|
|
logger.warning(f"TTS billing setup skipped: {e}")
|
|
|
|
result = await voiceInterface.textToSpeech(
|
|
text=text,
|
|
languageCode=language,
|
|
voiceName=voice
|
|
)
|
|
|
|
if result["success"]:
|
|
return Response(
|
|
content=result["audioContent"],
|
|
media_type="audio/mpeg",
|
|
headers={
|
|
"Content-Disposition": "attachment; filename=speech.mp3",
|
|
"X-Voice-Name": result.get("voiceName", ""),
|
|
"X-Language-Code": result.get("languageCode", language),
|
|
}
|
|
)
|
|
else:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail=f"Text-to-Speech failed: {result.get('error', 'Unknown error')}"
|
|
)
|
|
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"Text-to-Speech error: {e}")
|
|
raise HTTPException(
|
|
status_code=500,
|
|
detail=f"Text-to-Speech processing failed: {str(e)}"
|
|
)
|
|
|
|
@router.get("/languages")
|
|
async def get_available_languages(currentUser: User = Depends(getCurrentUser)):
|
|
"""Get available languages from Google Cloud Text-to-Speech."""
|
|
try:
|
|
logger.info("🌐 Getting available languages from Google Cloud TTS")
|
|
|
|
voiceInterface = _getVoiceInterface(currentUser)
|
|
result = await voiceInterface.getAvailableLanguages()
|
|
|
|
if result["success"]:
|
|
return {
|
|
"success": True,
|
|
"languages": result["languages"]
|
|
}
|
|
else:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail=f"Failed to get languages: {result.get('error', 'Unknown error')}"
|
|
)
|
|
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"❌ Get languages error: {e}")
|
|
raise HTTPException(
|
|
status_code=500,
|
|
detail=f"Failed to get available languages: {str(e)}"
|
|
)
|
|
|
|
@router.get("/voices")
|
|
async def get_available_voices(
|
|
languageCode: Optional[str] = None,
|
|
language_code: Optional[str] = None, # Accept both camelCase and snake_case
|
|
currentUser: User = Depends(getCurrentUser)
|
|
):
|
|
"""
|
|
Get available voices from Google Cloud Text-to-Speech.
|
|
Accepts languageCode (camelCase) or language_code (snake_case) query parameter.
|
|
"""
|
|
# Use language_code if provided (frontend sends this), otherwise use languageCode
|
|
if language_code:
|
|
languageCode = language_code
|
|
|
|
try:
|
|
logger.info(f"🎤 Getting available voices, language filter: {languageCode}")
|
|
|
|
voiceInterface = _getVoiceInterface(currentUser)
|
|
result = await voiceInterface.getAvailableVoices(languageCode=languageCode)
|
|
|
|
if result["success"]:
|
|
return {
|
|
"success": True,
|
|
"voices": result["voices"],
|
|
"language_filter": languageCode
|
|
}
|
|
else:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail=f"Failed to get voices: {result.get('error', 'Unknown error')}"
|
|
)
|
|
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"❌ Get voices error: {e}")
|
|
raise HTTPException(
|
|
status_code=500,
|
|
detail=f"Failed to get available voices: {str(e)}"
|
|
)
|
|
|
|
@router.get("/health")
|
|
async def health_check(currentUser: User = Depends(getCurrentUser)):
|
|
"""Health check for Google Cloud voice services."""
|
|
try:
|
|
voiceInterface = _getVoiceInterface(currentUser)
|
|
test_result = await voiceInterface.healthCheck()
|
|
|
|
return test_result
|
|
|
|
except Exception as e:
|
|
logger.error(f"❌ Health check failed: {e}")
|
|
return {
|
|
"status": "unhealthy",
|
|
"error": str(e)
|
|
}
|
|
|
|
@router.get("/settings")
|
|
async def get_voice_settings(currentUser: User = Depends(getCurrentUser)):
|
|
"""Get voice settings for the current user (reads from UserVoicePreferences)."""
|
|
from modules.datamodels.datamodelUam import UserVoicePreferences
|
|
from modules.interfaces.interfaceDbApp import getRootInterface
|
|
rootInterface = getRootInterface()
|
|
userId = str(currentUser.id)
|
|
|
|
prefs = rootInterface.db.getRecordset(
|
|
UserVoicePreferences, recordFilter={"userId": userId}
|
|
)
|
|
if prefs:
|
|
data = prefs[0] if isinstance(prefs[0], dict) else prefs[0].model_dump()
|
|
return {"success": True, "data": {"user_settings": data}}
|
|
return {"success": True, "data": {"user_settings": UserVoicePreferences(userId=userId).model_dump()}}
|
|
|
|
|
|
@router.post("/settings")
|
|
async def save_voice_settings(
|
|
settings: Dict[str, Any] = Body(...),
|
|
currentUser: User = Depends(getCurrentUser)
|
|
):
|
|
"""Save voice settings for the current user (writes to UserVoicePreferences)."""
|
|
from modules.datamodels.datamodelUam import UserVoicePreferences, _normalizeTtsVoiceMap
|
|
from modules.interfaces.interfaceDbApp import getRootInterface
|
|
rootInterface = getRootInterface()
|
|
userId = str(currentUser.id)
|
|
|
|
allowedFields = {
|
|
"sttLanguage", "ttsLanguage", "ttsVoice", "ttsVoiceMap",
|
|
"translationSourceLanguage", "translationTargetLanguage",
|
|
}
|
|
updateData = {k: v for k, v in settings.items() if k in allowedFields}
|
|
if "ttsVoiceMap" in updateData:
|
|
updateData["ttsVoiceMap"] = _normalizeTtsVoiceMap(updateData["ttsVoiceMap"])
|
|
|
|
existing = rootInterface.db.getRecordset(
|
|
UserVoicePreferences, recordFilter={"userId": userId}
|
|
)
|
|
if existing:
|
|
existingRecord = existing[0]
|
|
existingId = existingRecord.get("id") if isinstance(existingRecord, dict) else existingRecord.id
|
|
rootInterface.db.recordModify(UserVoicePreferences, existingId, updateData)
|
|
else:
|
|
newPrefs = UserVoicePreferences(userId=userId, **updateData)
|
|
rootInterface.db.recordCreate(UserVoicePreferences, newPrefs.model_dump())
|
|
|
|
return {"success": True, "message": "Voice settings saved successfully", "data": updateData}
|
|
|
|
# =========================================================================
|
|
# STT Streaming WebSocket — generic, used by all features
|
|
# =========================================================================
|
|
|
|
_sttTokens: Dict[str, Dict[str, Any]] = {}
|
|
_STT_TOKEN_TTL = 45
|
|
|
|
|
|
def _cleanupSttTokens():
|
|
now = time.time()
|
|
expired = [t for t, p in _sttTokens.items() if p.get("expiresAt", 0) <= now]
|
|
for t in expired:
|
|
_sttTokens.pop(t, None)
|
|
|
|
|
|
@router.post("/stt/token")
|
|
@limiter.limit("60/minute")
|
|
async def createSttToken(
|
|
request: Request,
|
|
context: RequestContext = Depends(getRequestContext),
|
|
):
|
|
"""Issue a short-lived single-use token for the STT streaming WebSocket."""
|
|
_cleanupSttTokens()
|
|
token = secrets.token_urlsafe(32)
|
|
_sttTokens[token] = {
|
|
"userId": str(context.user.id),
|
|
"mandateId": str(getattr(context, "mandateId", "") or ""),
|
|
"expiresAt": time.time() + _STT_TOKEN_TTL,
|
|
}
|
|
return {"wsToken": token, "expiresInSeconds": _STT_TOKEN_TTL}
|
|
|
|
|
|
@router.websocket("/stt/stream")
|
|
async def sttStream(
|
|
websocket: WebSocket,
|
|
wsToken: Optional[str] = Query(None),
|
|
):
|
|
"""
|
|
Generic STT streaming WebSocket.
|
|
|
|
Protocol:
|
|
Client sends JSON:
|
|
{"type": "open", "language": "de-DE"}
|
|
{"type": "audio", "chunk": "<base64>"}
|
|
{"type": "close"}
|
|
Server sends JSON:
|
|
{"type": "interim", "text": "..."}
|
|
{"type": "final", "text": "...", "confidence": 0.95}
|
|
{"type": "error", "message": "..."}
|
|
{"type": "closed"}
|
|
"""
|
|
await websocket.accept()
|
|
|
|
# --- authenticate via wsToken ---
|
|
if not wsToken:
|
|
await websocket.send_json({"type": "error", "code": "ws_token_required", "message": "wsToken query param required"})
|
|
await websocket.close(code=1008)
|
|
return
|
|
|
|
_cleanupSttTokens()
|
|
tokenPayload = _sttTokens.pop(wsToken, None)
|
|
if not tokenPayload:
|
|
await websocket.send_json({"type": "error", "code": "ws_token_invalid", "message": "Invalid or expired wsToken"})
|
|
await websocket.close(code=1008)
|
|
return
|
|
|
|
tokenUserId = tokenPayload["userId"]
|
|
tokenMandateId = tokenPayload.get("mandateId", "")
|
|
|
|
# Resolve real user for billing
|
|
from modules.interfaces.interfaceDbApp import getRootInterface
|
|
rootInterface = getRootInterface()
|
|
currentUser = rootInterface.getUser(tokenUserId)
|
|
if not currentUser:
|
|
await websocket.send_json({"type": "error", "code": "user_not_found", "message": "User not found"})
|
|
await websocket.close(code=1008)
|
|
return
|
|
|
|
# --- billing pre-flight ---
|
|
billingService = None
|
|
try:
|
|
from modules.serviceCenter.services.serviceBilling.mainServiceBilling import getService as getBillingService
|
|
billingService = getBillingService(currentUser, tokenMandateId)
|
|
billingCheck = billingService.checkBalance(0.0)
|
|
if not billingCheck.allowed:
|
|
await websocket.send_json({"type": "error", "code": "billing_insufficient", "message": "Insufficient balance for voice services"})
|
|
await websocket.close(code=1008)
|
|
return
|
|
except Exception as e:
|
|
logger.warning(f"STT billing pre-flight skipped: {e}")
|
|
|
|
audioQueue: asyncio.Queue = asyncio.Queue()
|
|
language = "de-DE"
|
|
streamingTask: Optional[asyncio.Task] = None
|
|
voiceInterface: Optional[VoiceObjects] = None
|
|
|
|
async def _sendJson(payload: Dict[str, Any]) -> bool:
|
|
try:
|
|
await websocket.send_json(payload)
|
|
return True
|
|
except Exception:
|
|
return False
|
|
|
|
async def _runStreaming():
|
|
nonlocal voiceInterface
|
|
voiceInterface = getVoiceInterface(currentUser, tokenMandateId)
|
|
if billingService:
|
|
def _billingCb(data):
|
|
priceCHF = data.get("priceCHF", 0.0)
|
|
operation = data.get("operation", "voice")
|
|
if priceCHF > 0:
|
|
billingService.recordUsage(
|
|
priceCHF=priceCHF,
|
|
aicoreProvider="google-voice",
|
|
aicoreModel=operation,
|
|
description=f"Voice {operation}",
|
|
)
|
|
voiceInterface.billingCallback = _billingCb
|
|
|
|
try:
|
|
async for event in voiceInterface.streamingSpeechToText(audioQueue, language):
|
|
if event.get("reconnectRequired"):
|
|
await _sendJson({"type": "reconnect_required"})
|
|
return
|
|
if event.get("isFinal"):
|
|
if event.get("transcript"):
|
|
await _sendJson({"type": "final", "text": event["transcript"], "confidence": event.get("confidence", 0.0)})
|
|
else:
|
|
if event.get("transcript"):
|
|
await _sendJson({"type": "interim", "text": event["transcript"]})
|
|
except asyncio.CancelledError:
|
|
pass
|
|
except Exception as e:
|
|
logger.error(f"STT streaming error: {e}")
|
|
await _sendJson({"type": "error", "message": str(e)})
|
|
|
|
try:
|
|
await _sendJson({"type": "status", "label": "STT stream connected"})
|
|
|
|
while True:
|
|
raw = await websocket.receive_text()
|
|
msg = json.loads(raw)
|
|
msgType = (msg.get("type") or "").strip()
|
|
|
|
if msgType == "open":
|
|
language = msg.get("language") or "de-DE"
|
|
if streamingTask and not streamingTask.done():
|
|
await audioQueue.put((b"", True))
|
|
streamingTask.cancel()
|
|
audioQueue = asyncio.Queue()
|
|
streamingTask = asyncio.create_task(_runStreaming())
|
|
await _sendJson({"type": "status", "label": "Listening..."})
|
|
|
|
elif msgType == "audio":
|
|
chunkB64 = msg.get("chunk")
|
|
if not chunkB64:
|
|
continue
|
|
chunkBytes = base64.b64decode(chunkB64)
|
|
if len(chunkBytes) > 400_000:
|
|
await _sendJson({"type": "error", "code": "chunk_too_large", "message": "Audio chunk too large"})
|
|
continue
|
|
await audioQueue.put((chunkBytes, False))
|
|
|
|
elif msgType == "close":
|
|
await audioQueue.put((b"", True))
|
|
if streamingTask:
|
|
try:
|
|
await asyncio.wait_for(streamingTask, timeout=10.0)
|
|
except (asyncio.TimeoutError, asyncio.CancelledError):
|
|
pass
|
|
await _sendJson({"type": "closed"})
|
|
await websocket.close()
|
|
break
|
|
|
|
elif msgType == "ping":
|
|
await _sendJson({"type": "pong"})
|
|
|
|
except WebSocketDisconnect:
|
|
logger.info(f"STT WebSocket disconnected: userId={tokenUserId}")
|
|
except Exception as e:
|
|
logger.error(f"STT WebSocket error: {e}", exc_info=True)
|
|
try:
|
|
await websocket.send_json({"type": "error", "message": str(e)})
|
|
except Exception:
|
|
pass
|
|
finally:
|
|
await audioQueue.put((b"", True))
|
|
if streamingTask and not streamingTask.done():
|
|
streamingTask.cancel()
|