gateway/modules/routes/routeVoiceGoogle.py
2025-09-14 20:05:46 +02:00

268 lines
9.7 KiB
Python

"""
Google Cloud Voice Services Routes
Replaces Azure voice services with Google Cloud Speech-to-Text and Translation
"""
import os
import logging
from fastapi import APIRouter, File, Form, UploadFile, Depends, HTTPException
from typing import Optional
from modules.connectors.connectorGoogleSpeech import ConnectorGoogleSpeech
from modules.security.auth import getCurrentUser
from modules.interfaces.interfaceAppModel import User
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/voice-google", tags=["voice-google"])
# Global connector instance
_google_speech_connector = None
def get_google_speech_connector() -> ConnectorGoogleSpeech:
"""Get or create Google Cloud Speech connector instance."""
global _google_speech_connector
if _google_speech_connector is None:
try:
# Get credentials path from environment or config
credentials_path = os.getenv("GOOGLE_APPLICATION_CREDENTIALS")
if not credentials_path:
# Try to find credentials in common locations
possible_paths = [
"credentials/google-service-account.json",
"config/google-credentials.json",
"google-credentials.json"
]
for path in possible_paths:
if os.path.exists(path):
credentials_path = path
break
if not credentials_path:
raise HTTPException(
status_code=500,
detail="Google Cloud credentials not found. Please set GOOGLE_APPLICATION_CREDENTIALS environment variable or place credentials file in project directory."
)
_google_speech_connector = ConnectorGoogleSpeech(credentials_path)
logger.info("✅ Google Cloud Speech connector initialized")
except Exception as e:
logger.error(f"❌ Failed to initialize Google Cloud Speech connector: {e}")
raise HTTPException(
status_code=500,
detail=f"Failed to initialize Google Cloud Speech connector: {str(e)}"
)
return _google_speech_connector
@router.post("/speech-to-text")
async def speech_to_text(
audio_file: UploadFile = File(...),
language: str = Form("de-DE"),
current_user: User = Depends(getCurrentUser)
):
"""Convert speech to text using Google Cloud Speech-to-Text API."""
try:
logger.info(f"🎤 Speech-to-text request: {audio_file.filename}, language: {language}")
# Read audio file
audio_content = await audio_file.read()
logger.info(f"📊 Audio file size: {len(audio_content)} bytes")
# Validate audio format
connector = get_google_speech_connector()
validation = connector.validate_audio_format(audio_content)
if not validation["valid"]:
raise HTTPException(
status_code=400,
detail=f"Invalid audio format: {validation.get('error', 'Unknown error')}"
)
# Perform speech recognition
result = await connector.speech_to_text(
audio_content=audio_content,
language=language
)
if result["success"]:
return {
"success": True,
"text": result["text"],
"confidence": result["confidence"],
"language": result["language"],
"audio_info": {
"size": len(audio_content),
"format": validation["format"],
"estimated_duration": validation.get("estimated_duration", 0)
}
}
else:
raise HTTPException(
status_code=400,
detail=f"Speech recognition failed: {result.get('error', 'Unknown error')}"
)
except HTTPException:
raise
except Exception as e:
logger.error(f"❌ Speech-to-text error: {e}")
raise HTTPException(
status_code=500,
detail=f"Speech-to-text processing failed: {str(e)}"
)
@router.post("/translate")
async def translate_text(
text: str = Form(...),
source_language: str = Form("de"),
target_language: str = Form("en"),
current_user: User = Depends(getCurrentUser)
):
"""Translate text using Google Cloud Translation API."""
try:
logger.info(f"🌐 Translation request: '{text}' ({source_language} -> {target_language})")
if not text.strip():
raise HTTPException(
status_code=400,
detail="Empty text provided for translation"
)
# Perform translation
connector = get_google_speech_connector()
result = await connector.translate_text(
text=text,
source_language=source_language,
target_language=target_language
)
if result["success"]:
return {
"success": True,
"original_text": result["original_text"],
"translated_text": result["translated_text"],
"source_language": result["source_language"],
"target_language": result["target_language"]
}
else:
raise HTTPException(
status_code=400,
detail=f"Translation failed: {result.get('error', 'Unknown error')}"
)
except HTTPException:
raise
except Exception as e:
logger.error(f"❌ Translation error: {e}")
raise HTTPException(
status_code=500,
detail=f"Translation processing failed: {str(e)}"
)
@router.post("/realtime-interpreter")
async def realtime_interpreter(
audio_file: UploadFile = File(...),
from_language: str = Form("de-DE"),
to_language: str = Form("en-US"),
connection_id: str = Form(None),
current_user: User = Depends(getCurrentUser)
):
"""Real-time interpreter: speech to translated text using Google Cloud APIs."""
try:
logger.info(f"🔄 Real-time interpreter request: {audio_file.filename}")
logger.info(f" From: {from_language} -> To: {to_language}")
# Read audio file
audio_content = await audio_file.read()
logger.info(f"📊 Audio file size: {len(audio_content)} bytes")
# Save audio file for debugging
debug_filename = f"debug_audio/audio_google_{audio_file.filename}"
os.makedirs("debug_audio", exist_ok=True)
with open(debug_filename, "wb") as f:
f.write(audio_content)
logger.info(f"💾 Saved audio file for debugging: {debug_filename}")
# Validate audio format
connector = get_google_speech_connector()
validation = connector.validate_audio_format(audio_content)
if not validation["valid"]:
raise HTTPException(
status_code=400,
detail=f"Invalid audio format: {validation.get('error', 'Unknown error')}"
)
# Perform complete pipeline: Speech-to-Text + Translation
result = await connector.speech_to_translated_text(
audio_content=audio_content,
from_language=from_language,
to_language=to_language
)
if result["success"]:
logger.info(f"✅ Real-time interpreter successful:")
logger.info(f" Original: '{result['original_text']}'")
logger.info(f" Translated: '{result['translated_text']}'")
return {
"success": True,
"original_text": result["original_text"],
"translated_text": result["translated_text"],
"confidence": result["confidence"],
"source_language": result["source_language"],
"target_language": result["target_language"],
"audio_info": {
"size": len(audio_content),
"format": validation["format"],
"estimated_duration": validation.get("estimated_duration", 0)
}
}
else:
raise HTTPException(
status_code=400,
detail=f"Real-time interpreter failed: {result.get('error', 'Unknown error')}"
)
except HTTPException:
raise
except Exception as e:
logger.error(f"❌ Real-time interpreter error: {e}")
raise HTTPException(
status_code=500,
detail=f"Real-time interpreter processing failed: {str(e)}"
)
@router.get("/health")
async def health_check(current_user: User = Depends(getCurrentUser)):
"""Health check for Google Cloud voice services."""
try:
connector = get_google_speech_connector()
# Test with a simple translation
test_result = await connector.translate_text(
text="Hello",
source_language="en",
target_language="de"
)
if test_result["success"]:
return {
"status": "healthy",
"service": "Google Cloud Speech-to-Text & Translation",
"test_translation": test_result["translated_text"]
}
else:
return {
"status": "unhealthy",
"error": test_result.get("error", "Unknown error")
}
except Exception as e:
logger.error(f"❌ Health check failed: {e}")
return {
"status": "unhealthy",
"error": str(e)
}