gateway/modules/connectors/connectorAzureSpeech.py

"""
Azure Speech Services Connector
Handles integration with Azure Speech Services for:
- Speech-to-Text (STT)
- Text-to-Speech (TTS)
- Translation services
"""

import logging
import asyncio
import json
import base64
from typing import Dict, Any, Optional, List, AsyncGenerator
import aiohttp
import io
import wave
import struct
import tempfile
import os
import time
from pathlib import Path
from datetime import datetime, timedelta

logger = logging.getLogger(__name__)

class ConnectorAzureSpeech:
    """Connector for Azure Speech Services."""

    def __init__(self, subscription_key: str, region: str = "westeurope"):
        """
        Initialize Azure Speech connector.

        Args:
            subscription_key: Azure Speech Services subscription key
            region: Azure region (default: westeurope)
        """
        self.subscription_key = subscription_key
        self.region = region
        self.base_url = f"https://{region}.stt.speech.microsoft.com"
        self.translator_url = "https://api.cognitive.microsofttranslator.com"

        # Supported audio formats
        self.supported_stt_formats = {
            "wav": {"mime": "audio/wav", "codec": "audio/pcm", "sample_rate": 16000},
            "mp3": {"mime": "audio/mp3", "codec": "audio/mp3", "sample_rate": 16000},
            "ogg": {"mime": "audio/ogg", "codec": "audio/ogg", "sample_rate": 16000}
        }

        self.supported_tts_formats = [
            "audio-16khz-128kbitrate-mono-mp3",
            "audio-16khz-32kbitrate-mono-mp3",
            "audio-16khz-64kbitrate-mono-mp3",
            "audio-24khz-160kbitrate-mono-mp3",
            "audio-24khz-48kbitrate-mono-mp3",
            "audio-24khz-96kbitrate-mono-mp3",
            "audio-48khz-192kbitrate-mono-mp3",
            "audio-48khz-96kbitrate-mono-mp3",
            "riff-16khz-16bit-mono-pcm",
            "riff-24khz-16bit-mono-pcm",
            "riff-48khz-16bit-mono-pcm"
        ]

        # Rate limiting
        self.rate_limits = {
            "stt": {"requests_per_minute": 20, "last_reset": time.time(), "request_count": 0},
            "tts": {"requests_per_minute": 20, "last_reset": time.time(), "request_count": 0},
            "translation": {"requests_per_minute": 20, "last_reset": time.time(), "request_count": 0}
        }

        # Request timeout settings
        self.timeout = aiohttp.ClientTimeout(total=30, connect=10)

    def _check_rate_limit(self, service_type: str) -> bool:
        """Check if rate limit is exceeded for a service type."""
        current_time = time.time()
        rate_limit = self.rate_limits[service_type]

        # Reset counter if minute has passed
        if current_time - rate_limit["last_reset"] >= 60:
            rate_limit["request_count"] = 0
            rate_limit["last_reset"] = current_time

        # Check if limit exceeded
        if rate_limit["request_count"] >= rate_limit["requests_per_minute"]:
            return False

        # Increment counter
        rate_limit["request_count"] += 1
        return True

    def _handle_azure_error(self, response_status: int, error_text: str) -> Exception:
        """Handle Azure API errors with specific error messages."""
        if response_status == 401:
            return Exception("Authentication failed. Please check your Azure Speech Services subscription key.")
        elif response_status == 403:
            return Exception("Access forbidden. Please check your Azure Speech Services permissions.")
        elif response_status == 429:
            return Exception("Rate limit exceeded. Please wait before making more requests.")
        elif response_status == 400:
            return Exception(f"Bad request: {error_text}")
        elif response_status == 500:
            return Exception("Azure Speech Services internal error. Please try again later.")
        elif response_status == 503:
            return Exception("Azure Speech Services temporarily unavailable. Please try again later.")
        else:
            return Exception(f"Azure API error {response_status}: {error_text}")

    async def _make_request_with_retry(self, url: str, method: str = "GET", headers: Dict = None, data: bytes = None, params: Dict = None, max_retries: int = 3) -> Dict:
        """Make HTTP request with retry logic."""
        if headers is None:
            headers = {}

        headers.update({
            "Ocp-Apim-Subscription-Key": self.subscription_key,
            "User-Agent": "PowerOn-Voice-Services/1.0"
        })

        # Debug: Log subscription key (masked for security)
        logger.debug(f"Using subscription key: {self.subscription_key[:8]}...{self.subscription_key[-8:] if len(self.subscription_key) > 16 else 'SHORT'}")
        logger.debug(f"Request URL: {url}")
        logger.debug(f"Request params: {params}")

        for attempt in range(max_retries):
            try:
                async with aiohttp.ClientSession(timeout=self.timeout) as session:
                    if method.upper() == "GET":
                        async with session.get(url, headers=headers, params=params) as response:
                            return await self._handle_response(response)
                    elif method.upper() == "POST":
                        async with session.post(url, headers=headers, data=data, params=params) as response:
                            return await self._handle_response(response)
                    else:
                        raise ValueError(f"Unsupported HTTP method: {method}")
            except asyncio.TimeoutError:
                if attempt == max_retries - 1:
                    raise Exception("Request timeout after multiple retries")
                logger.warning(f"Request timeout, retrying... (attempt {attempt + 1}/{max_retries})")
                await asyncio.sleep(2 ** attempt)  # Exponential backoff
            except Exception as e:
                if attempt == max_retries - 1:
                    raise
                logger.warning(f"Request failed, retrying... (attempt {attempt + 1}/{max_retries}): {str(e)}")
                await asyncio.sleep(2 ** attempt)  # Exponential backoff

    async def _make_request(self, url: str, method: str = "GET", headers: Dict = None, data: bytes = None) -> Dict:
        """Make HTTP request to Azure services."""
        if headers is None:
            headers = {}

        headers.update({
            "Ocp-Apim-Subscription-Key": self.subscription_key,
            "User-Agent": "PowerOn-Voice-Services/1.0"
        })

        async with aiohttp.ClientSession() as session:
            try:
                if method.upper() == "GET":
                    async with session.get(url, headers=headers) as response:
                        return await self._handle_response(response)
                elif method.upper() == "POST":
                    async with session.post(url, headers=headers, data=data) as response:
                        return await self._handle_response(response)
                else:
                    raise ValueError(f"Unsupported HTTP method: {method}")
            except Exception as e:
                logger.error(f"Request failed: {str(e)}")
                raise

    async def _handle_response(self, response) -> Dict:
        """Handle HTTP response."""
        if response.status == 200:
            content_type = response.headers.get('content-type', '')
            if 'application/json' in content_type:
                return await response.json()
            else:
                # For audio responses, return binary data
                return {"data": await response.read()}
        else:
            error_text = await response.text()
            logger.error(f"API request failed: {response.status} - {error_text}")
            raise self._handle_azure_error(response.status, error_text)

    def _validate_audio_format(self, audio_content: bytes, expected_format: str = "wav") -> Dict[str, Any]:
        """Validate audio format and return format information."""
        try:
            # Try to detect format from content
            if audio_content.startswith(b'RIFF') and b'WAVE' in audio_content[:12]:
                format_type = "wav"
            elif audio_content.startswith(b'\xff\xfb') or audio_content.startswith(b'ID3'):
                format_type = "mp3"
            elif audio_content.startswith(b'OggS'):
                format_type = "ogg"
            elif audio_content.startswith(b'fLaC'):
                format_type = "flac"
            else:
                # If we can't detect format, assume it's raw audio or WAV without proper header
                format_type = "wav"  # Azure Speech Services can handle this

            # Validate WAV format specifically
            if format_type == "wav":
                try:
                    with io.BytesIO(audio_content) as audio_io:
                        with wave.open(audio_io, 'rb') as wav_file:
                            sample_rate = wav_file.getframerate()
                            channels = wav_file.getnchannels()
                            sample_width = wav_file.getsampwidth()

                            return {
                                "valid": True,
                                "format": format_type,
                                "sample_rate": sample_rate,
                                "channels": channels,
                                "sample_width": sample_width,
                                "duration": wav_file.getnframes() / sample_rate
                            }
                except Exception as e:
                    # If WAV validation fails, it might be raw audio data
                    # Azure Speech Services can handle raw audio, so we'll allow it
                    logger.info(f"WAV validation failed, treating as raw audio: {str(e)}")
                    return {
                        "valid": True,
                        "format": "raw_audio",
                        "sample_rate": 16000,  # Default assumption for raw audio
                        "channels": 1,  # Default assumption
                        "sample_width": 2,  # Default assumption
                        "duration": len(audio_content) / (16000 * 2)  # Rough estimate
                    }

            # For other formats, assume valid if we can detect them
            return {
                "valid": True,
                "format": format_type,
                "sample_rate": 16000,  # Default assumption
                "channels": 1,  # Default assumption
                "sample_width": 2,  # Default assumption
                "duration": 0  # Unknown
            }

        except Exception as e:
            logger.error(f"Audio validation failed: {str(e)}")
            return {"valid": False, "error": str(e)}

    def _convert_audio_to_wav(self, audio_content: bytes, target_sample_rate: int = 16000) -> bytes:
        """Convert audio to WAV format with specified sample rate."""
        try:
            # If it's already WAV, try to resample if needed
            if audio_content.startswith(b'RIFF') and b'WAVE' in audio_content[:12]:
                with io.BytesIO(audio_content) as audio_io:
                    with wave.open(audio_io, 'rb') as wav_file:
                        current_sample_rate = wav_file.getframerate()

                        # If sample rate matches, return as-is
                        if current_sample_rate == target_sample_rate:
                            return audio_content

                        # For now, return original (in production, implement resampling)
                        logger.warning(f"Audio sample rate {current_sample_rate} doesn't match target {target_sample_rate}")
                        return audio_content

            # If it's raw audio data (no header), create a basic WAV header
            elif not audio_content.startswith(b'RIFF'):
                logger.info("Converting raw audio data to WAV format")
                return self._create_wav_header(audio_content, target_sample_rate)

            # For other formats, return as-is for now
            # In production, implement proper conversion with pydub or ffmpeg
            logger.info("Audio format conversion not fully implemented - returning original")
            return audio_content

        except Exception as e:
            logger.error(f"Audio conversion failed: {str(e)}")
            raise Exception(f"Audio conversion failed: {str(e)}")

    def _create_wav_header(self, audio_data: bytes, sample_rate: int = 16000, channels: int = 1, sample_width: int = 2) -> bytes:
        """Create a WAV header for raw audio data."""
        try:
            import struct

            # Calculate data size
            data_size = len(audio_data)
            file_size = 36 + data_size

            # Create WAV header
            header = struct.pack('<4sI4s4sIHHIIHH4sI',
                b'RIFF',                    # Chunk ID
                file_size,                  # Chunk size
                b'WAVE',                    # Format
                b'fmt ',                    # Subchunk1 ID
                16,                         # Subchunk1 size
                1,                          # Audio format (PCM)
                channels,                   # Number of channels
                sample_rate,                # Sample rate
                sample_rate * channels * sample_width,  # Byte rate
                channels * sample_width,    # Block align
                sample_width * 8,           # Bits per sample
                b'data',                    # Subchunk2 ID
                data_size                   # Subchunk2 size
            )

            return header + audio_data

        except Exception as e:
            logger.error(f"Failed to create WAV header: {str(e)}")
            # Return original data if header creation fails
            return audio_data

    def _get_audio_content_type(self, audio_format: str) -> str:
        """Get MIME type for audio format."""
        if audio_format in self.supported_stt_formats:
            return self.supported_stt_formats[audio_format]["mime"]
        return "audio/wav"  # Default

    async def speech_to_text(self, audio_content: bytes, language: str = "de-DE", format: str = "detailed", audio_format: str = "wav") -> Dict:
        """
        Convert speech to text using Azure Speech Services.

        Args:
            audio_content: Audio file content as bytes
            language: Language code (e.g., "de-DE")
            format: Response format ("simple" or "detailed")
            audio_format: Audio format ("wav", "mp3", "ogg")

        Returns:
            Dict with transcription results
        """
        try:
            # Check rate limit
            if not self._check_rate_limit("stt"):
                raise Exception("Rate limit exceeded for speech-to-text service. Please wait before making more requests.")

            # Validate audio format
            validation_result = self._validate_audio_format(audio_content, audio_format)
            if not validation_result.get("valid", False):
                raise Exception(f"Invalid audio format: {validation_result.get('error', 'Unknown error')}")

            # Convert audio to required format if needed
            processed_audio = self._convert_audio_to_wav(audio_content)

            # Update audio_format based on validation result
            detected_format = validation_result.get("format", audio_format)
            if detected_format == "raw_audio":
                audio_format = "wav"  # Treat raw audio as WAV for Azure

            url = f"{self.base_url}/speech/recognition/conversation/cognitiveservices/v1"

            # Get appropriate content type
            content_type = self._get_audio_content_type(audio_format)
            if audio_format == "wav":
                content_type = f"{content_type}; codecs=audio/pcm; samplerate=16000"

            headers = {
                "Content-Type": content_type,
                "Accept": "application/json",
                "Ocp-Apim-Subscription-Region": self.region
            }

            params = {
                "language": language,
                "format": "detailed" if format == "detailed" else "simple"
            }

            # Make API call with retry logic
            result = await self._make_request_with_retry(
                url=url,
                method="POST",
                headers=headers,
                data=processed_audio,
                params=params
            )

            # Parse the response based on format
            if format == "detailed":
                return {
                    "text": result.get("DisplayText", ""),
                    "confidence": result.get("Confidence", 0.0),
                    "language": result.get("RecognitionStatus", language),
                    "format": format,
                    "audio_info": validation_result,
                    "raw_result": result
                }
            else:
                return {
                    "text": result.get("DisplayText", ""),
                    "confidence": 1.0,  # Simple format doesn't provide confidence
                    "language": language,
                    "format": format,
                    "audio_info": validation_result
                }

        except Exception as e:
            logger.error(f"Speech-to-text failed: {str(e)}")
            raise

    async def text_to_speech(self, text: str, language: str = "de-DE", voice: str = "de-DE-KatjaNeural", format: str = "audio-16khz-128kbitrate-mono-mp3") -> bytes:
        """
        Convert text to speech using Azure Speech Services.

        Args:
            text: Text to convert to speech
            language: Language code
            voice: Voice name
            format: Audio format

        Returns:
            Audio data as bytes
        """
        try:
            # Check rate limit
            if not self._check_rate_limit("tts"):
                raise Exception("Rate limit exceeded for text-to-speech service. Please wait before making more requests.")

            # Validate format
            if format not in self.supported_tts_formats:
                raise Exception(f"Unsupported TTS format: {format}. Supported formats: {', '.join(self.supported_tts_formats)}")

            url = f"https://{self.region}.tts.speech.microsoft.com/cognitiveservices/v1"

            headers = {
                "Content-Type": "application/ssml+xml",
                "X-Microsoft-OutputFormat": format,
                "Ocp-Apim-Subscription-Key": self.subscription_key,
                "User-Agent": "PowerOn-Voice-Services/1.0"
            }

            # Create SSML with proper escaping
            escaped_text = text.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;").replace('"', "&quot;").replace("'", "&apos;")
            ssml = f"""<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xml:lang='{language}'>
                <voice name='{voice}'>
                    {escaped_text}
                </voice>
            </speak>"""

            # Make API call with retry logic
            result = await self._make_request_with_retry(
                url=url,
                method="POST",
                headers=headers,
                data=ssml.encode('utf-8'),
                params=None
            )

            # Return audio data
            return result.get("data", b"")

        except Exception as e:
            logger.error(f"Text-to-speech failed: {str(e)}")
            raise

    async def translate_text(self, text: str, from_language: str, to_language: str) -> str:
        """
        Translate text using Azure Translator.

        Args:
            text: Text to translate
            from_language: Source language code
            to_language: Target language code

        Returns:
            Translated text
        """
        try:
            # Check if text is empty
            if not text or not text.strip():
                logger.debug("Empty text provided, returning original text")
                return text

            # Check rate limit
            if not self._check_rate_limit("translation"):
                raise Exception("Rate limit exceeded for translation service. Please wait before making more requests.")

            url = f"{self.translator_url}/translate"

            headers = {
                "Ocp-Apim-Subscription-Key": self.subscription_key,
                "Ocp-Apim-Subscription-Region": self.region,
                "Content-Type": "application/json"
            }

            params = {
                "api-version": "3.0",
                "from": from_language,
                "to": to_language
            }

            data = [{"text": text}]

            # Debug: Log translation request details
            logger.debug(f"Translation request - URL: {url}")
            logger.debug(f"Translation request - Headers: {headers}")
            logger.debug(f"Translation request - Data: {data}")

            # Make API call with retry logic
            result = await self._make_request_with_retry(
                url=url,
                method="POST",
                headers=headers,
                data=json.dumps(data).encode('utf-8'),
                params=None
            )

            if result and len(result) > 0 and 'translations' in result[0]:
                return result[0]['translations'][0]['text']
            else:
                logger.warning(f"Unexpected translation response format: {result}")
                return text  # Return original text if translation fails

        except Exception as e:
            logger.error(f"Translation failed: {str(e)}")
            raise

    async def get_available_voices(self) -> List[Dict]:
        """Get list of available voices from Azure Speech Services."""
        try:
            # Azure doesn't provide a direct API for voice list, so we return a comprehensive list
            # based on Azure's supported voices
            voices = [
                # German voices
                {"name": "de-DE-KatjaNeural", "language": "de-DE", "gender": "Female", "style": "Neural", "locale": "de-DE"},
                {"name": "de-DE-ConradNeural", "language": "de-DE", "gender": "Male", "style": "Neural", "locale": "de-DE"},
                {"name": "de-DE-AmalaNeural", "language": "de-DE", "gender": "Female", "style": "Neural", "locale": "de-DE"},
                {"name": "de-DE-BerndNeural", "language": "de-DE", "gender": "Male", "style": "Neural", "locale": "de-DE"},
                {"name": "de-DE-ChristophNeural", "language": "de-DE", "gender": "Male", "style": "Neural", "locale": "de-DE"},
                {"name": "de-DE-ElkeNeural", "language": "de-DE", "gender": "Female", "style": "Neural", "locale": "de-DE"},
                {"name": "de-DE-GiselaNeural", "language": "de-DE", "gender": "Female", "style": "Neural", "locale": "de-DE"},
                {"name": "de-DE-JoergNeural", "language": "de-DE", "gender": "Male", "style": "Neural", "locale": "de-DE"},
                {"name": "de-DE-KasperNeural", "language": "de-DE", "gender": "Male", "style": "Neural", "locale": "de-DE"},
                {"name": "de-DE-KillianNeural", "language": "de-DE", "gender": "Male", "style": "Neural", "locale": "de-DE"},
                {"name": "de-DE-KlausNeural", "language": "de-DE", "gender": "Male", "style": "Neural", "locale": "de-DE"},
                {"name": "de-DE-LouisaNeural", "language": "de-DE", "gender": "Female", "style": "Neural", "locale": "de-DE"},
                {"name": "de-DE-MajaNeural", "language": "de-DE", "gender": "Female", "style": "Neural", "locale": "de-DE"},
                {"name": "de-DE-RalfNeural", "language": "de-DE", "gender": "Male", "style": "Neural", "locale": "de-DE"},
                {"name": "de-DE-TanjaNeural", "language": "de-DE", "gender": "Female", "style": "Neural", "locale": "de-DE"},

                # English (US) voices
                {"name": "en-US-AriaNeural", "language": "en-US", "gender": "Female", "style": "Neural", "locale": "en-US"},
                {"name": "en-US-DavisNeural", "language": "en-US", "gender": "Male", "style": "Neural", "locale": "en-US"},
                {"name": "en-US-GuyNeural", "language": "en-US", "gender": "Male", "style": "Neural", "locale": "en-US"},
                {"name": "en-US-JaneNeural", "language": "en-US", "gender": "Female", "style": "Neural", "locale": "en-US"},
                {"name": "en-US-JasonNeural", "language": "en-US", "gender": "Male", "style": "Neural", "locale": "en-US"},
                {"name": "en-US-JennyNeural", "language": "en-US", "gender": "Female", "style": "Neural", "locale": "en-US"},
                {"name": "en-US-MichelleNeural", "language": "en-US", "gender": "Female", "style": "Neural", "locale": "en-US"},
                {"name": "en-US-RyanNeural", "language": "en-US", "gender": "Male", "style": "Neural", "locale": "en-US"},
                {"name": "en-US-SaraNeural", "language": "en-US", "gender": "Female", "style": "Neural", "locale": "en-US"},
                {"name": "en-US-TonyNeural", "language": "en-US", "gender": "Male", "style": "Neural", "locale": "en-US"},

                # English (UK) voices
                {"name": "en-GB-LibbyNeural", "language": "en-GB", "gender": "Female", "style": "Neural", "locale": "en-GB"},
                {"name": "en-GB-MaisieNeural", "language": "en-GB", "gender": "Female", "style": "Neural", "locale": "en-GB"},
                {"name": "en-GB-RyanNeural", "language": "en-GB", "gender": "Male", "style": "Neural", "locale": "en-GB"},
                {"name": "en-GB-SoniaNeural", "language": "en-GB", "gender": "Female", "style": "Neural", "locale": "en-GB"},
                {"name": "en-GB-ThomasNeural", "language": "en-GB", "gender": "Male", "style": "Neural", "locale": "en-GB"},

                # French voices
                {"name": "fr-FR-DeniseNeural", "language": "fr-FR", "gender": "Female", "style": "Neural", "locale": "fr-FR"},
                {"name": "fr-FR-HenriNeural", "language": "fr-FR", "gender": "Male", "style": "Neural", "locale": "fr-FR"},
                {"name": "fr-FR-ArianeNeural", "language": "fr-FR", "gender": "Female", "style": "Neural", "locale": "fr-FR"},
                {"name": "fr-FR-ClaudeNeural", "language": "fr-FR", "gender": "Male", "style": "Neural", "locale": "fr-FR"},
                {"name": "fr-FR-JacquelineNeural", "language": "fr-FR", "gender": "Female", "style": "Neural", "locale": "fr-FR"},
                {"name": "fr-FR-JeromeNeural", "language": "fr-FR", "gender": "Male", "style": "Neural", "locale": "fr-FR"},
                {"name": "fr-FR-JosephineNeural", "language": "fr-FR", "gender": "Female", "style": "Neural", "locale": "fr-FR"},
                {"name": "fr-FR-MauriceNeural", "language": "fr-FR", "gender": "Male", "style": "Neural", "locale": "fr-FR"},
                {"name": "fr-FR-YvetteNeural", "language": "fr-FR", "gender": "Female", "style": "Neural", "locale": "fr-FR"},

                # Spanish voices
                {"name": "es-ES-ElviraNeural", "language": "es-ES", "gender": "Female", "style": "Neural", "locale": "es-ES"},
                {"name": "es-ES-AlvaroNeural", "language": "es-ES", "gender": "Male", "style": "Neural", "locale": "es-ES"},
                {"name": "es-ES-ArnauNeural", "language": "es-ES", "gender": "Male", "style": "Neural", "locale": "es-ES"},
                {"name": "es-ES-DarioNeural", "language": "es-ES", "gender": "Male", "style": "Neural", "locale": "es-ES"},
                {"name": "es-ES-EliasNeural", "language": "es-ES", "gender": "Male", "style": "Neural", "locale": "es-ES"},
                {"name": "es-ES-EstrellaNeural", "language": "es-ES", "gender": "Female", "style": "Neural", "locale": "es-ES"},
                {"name": "es-ES-IreneNeural", "language": "es-ES", "gender": "Female", "style": "Neural", "locale": "es-ES"},
                {"name": "es-ES-LaiaNeural", "language": "es-ES", "gender": "Female", "style": "Neural", "locale": "es-ES"},
                {"name": "es-ES-LiaNeural", "language": "es-ES", "gender": "Female", "style": "Neural", "locale": "es-ES"},
                {"name": "es-ES-NilNeural", "language": "es-ES", "gender": "Male", "style": "Neural", "locale": "es-ES"},
                {"name": "es-ES-SaulNeural", "language": "es-ES", "gender": "Male", "style": "Neural", "locale": "es-ES"},
                {"name": "es-ES-TeoNeural", "language": "es-ES", "gender": "Male", "style": "Neural", "locale": "es-ES"},
                {"name": "es-ES-TrianaNeural", "language": "es-ES", "gender": "Female", "style": "Neural", "locale": "es-ES"},
                {"name": "es-ES-VeraNeural", "language": "es-ES", "gender": "Female", "style": "Neural", "locale": "es-ES"}
            ]
            return voices

        except Exception as e:
            logger.error(f"Failed to get voices: {str(e)}")
            return []

    async def get_available_languages(self) -> List[Dict]:
        """Get list of available languages supported by Azure Speech Services."""
        try:
            # Comprehensive list of Azure Speech Services supported languages
            languages = [
                # European languages
                {"code": "de-DE", "name": "German (Germany)", "stt": True, "tts": True, "translation": True},
                {"code": "en-US", "name": "English (United States)", "stt": True, "tts": True, "translation": True},
                {"code": "en-GB", "name": "English (United Kingdom)", "stt": True, "tts": True, "translation": True},
                {"code": "fr-FR", "name": "French (France)", "stt": True, "tts": True, "translation": True},
                {"code": "es-ES", "name": "Spanish (Spain)", "stt": True, "tts": True, "translation": True},
                {"code": "es-MX", "name": "Spanish (Mexico)", "stt": True, "tts": True, "translation": True},
                {"code": "it-IT", "name": "Italian (Italy)", "stt": True, "tts": True, "translation": True},
                {"code": "pt-BR", "name": "Portuguese (Brazil)", "stt": True, "tts": True, "translation": True},
                {"code": "pt-PT", "name": "Portuguese (Portugal)", "stt": True, "tts": True, "translation": True},
                {"code": "ru-RU", "name": "Russian (Russia)", "stt": True, "tts": True, "translation": True},
                {"code": "nl-NL", "name": "Dutch (Netherlands)", "stt": True, "tts": True, "translation": True},
                {"code": "sv-SE", "name": "Swedish (Sweden)", "stt": True, "tts": True, "translation": True},
                {"code": "no-NO", "name": "Norwegian (Norway)", "stt": True, "tts": True, "translation": True},
                {"code": "da-DK", "name": "Danish (Denmark)", "stt": True, "tts": True, "translation": True},
                {"code": "fi-FI", "name": "Finnish (Finland)", "stt": True, "tts": True, "translation": True},
                {"code": "pl-PL", "name": "Polish (Poland)", "stt": True, "tts": True, "translation": True},
                {"code": "cs-CZ", "name": "Czech (Czech Republic)", "stt": True, "tts": True, "translation": True},
                {"code": "hu-HU", "name": "Hungarian (Hungary)", "stt": True, "tts": True, "translation": True},
                {"code": "ro-RO", "name": "Romanian (Romania)", "stt": True, "tts": True, "translation": True},
                {"code": "bg-BG", "name": "Bulgarian (Bulgaria)", "stt": True, "tts": True, "translation": True},
                {"code": "hr-HR", "name": "Croatian (Croatia)", "stt": True, "tts": True, "translation": True},
                {"code": "sk-SK", "name": "Slovak (Slovakia)", "stt": True, "tts": True, "translation": True},
                {"code": "sl-SI", "name": "Slovenian (Slovenia)", "stt": True, "tts": True, "translation": True},
                {"code": "et-EE", "name": "Estonian (Estonia)", "stt": True, "tts": True, "translation": True},
                {"code": "lv-LV", "name": "Latvian (Latvia)", "stt": True, "tts": True, "translation": True},
                {"code": "lt-LT", "name": "Lithuanian (Lithuania)", "stt": True, "tts": True, "translation": True},
                {"code": "mt-MT", "name": "Maltese (Malta)", "stt": True, "tts": True, "translation": True},
                {"code": "ga-IE", "name": "Irish (Ireland)", "stt": True, "tts": True, "translation": True},
                {"code": "cy-GB", "name": "Welsh (United Kingdom)", "stt": True, "tts": True, "translation": True},

                # Asian languages
                {"code": "ja-JP", "name": "Japanese (Japan)", "stt": True, "tts": True, "translation": True},
                {"code": "ko-KR", "name": "Korean (Korea)", "stt": True, "tts": True, "translation": True},
                {"code": "zh-CN", "name": "Chinese (Simplified)", "stt": True, "tts": True, "translation": True},
                {"code": "zh-TW", "name": "Chinese (Traditional)", "stt": True, "tts": True, "translation": True},
                {"code": "zh-HK", "name": "Chinese (Hong Kong)", "stt": True, "tts": True, "translation": True},
                {"code": "th-TH", "name": "Thai (Thailand)", "stt": True, "tts": True, "translation": True},
                {"code": "vi-VN", "name": "Vietnamese (Vietnam)", "stt": True, "tts": True, "translation": True},
                {"code": "id-ID", "name": "Indonesian (Indonesia)", "stt": True, "tts": True, "translation": True},
                {"code": "ms-MY", "name": "Malay (Malaysia)", "stt": True, "tts": True, "translation": True},
                {"code": "tl-PH", "name": "Filipino (Philippines)", "stt": True, "tts": True, "translation": True},

                # Middle Eastern and African languages
                {"code": "ar-SA", "name": "Arabic (Saudi Arabia)", "stt": True, "tts": True, "translation": True},
                {"code": "ar-EG", "name": "Arabic (Egypt)", "stt": True, "tts": True, "translation": True},
                {"code": "ar-AE", "name": "Arabic (UAE)", "stt": True, "tts": True, "translation": True},
                {"code": "ar-KW", "name": "Arabic (Kuwait)", "stt": True, "tts": True, "translation": True},
                {"code": "ar-QA", "name": "Arabic (Qatar)", "stt": True, "tts": True, "translation": True},
                {"code": "ar-BH", "name": "Arabic (Bahrain)", "stt": True, "tts": True, "translation": True},
                {"code": "ar-OM", "name": "Arabic (Oman)", "stt": True, "tts": True, "translation": True},
                {"code": "ar-JO", "name": "Arabic (Jordan)", "stt": True, "tts": True, "translation": True},
                {"code": "ar-LB", "name": "Arabic (Lebanon)", "stt": True, "tts": True, "translation": True},
                {"code": "ar-PS", "name": "Arabic (Palestine)", "stt": True, "tts": True, "translation": True},
                {"code": "ar-SY", "name": "Arabic (Syria)", "stt": True, "tts": True, "translation": True},
                {"code": "ar-IQ", "name": "Arabic (Iraq)", "stt": True, "tts": True, "translation": True},
                {"code": "ar-MA", "name": "Arabic (Morocco)", "stt": True, "tts": True, "translation": True},
                {"code": "ar-DZ", "name": "Arabic (Algeria)", "stt": True, "tts": True, "translation": True},
                {"code": "ar-TN", "name": "Arabic (Tunisia)", "stt": True, "tts": True, "translation": True},
                {"code": "ar-LY", "name": "Arabic (Libya)", "stt": True, "tts": True, "translation": True},
                {"code": "ar-SD", "name": "Arabic (Sudan)", "stt": True, "tts": True, "translation": True},
                {"code": "he-IL", "name": "Hebrew (Israel)", "stt": True, "tts": True, "translation": True},
                {"code": "tr-TR", "name": "Turkish (Turkey)", "stt": True, "tts": True, "translation": True},
                {"code": "fa-IR", "name": "Persian (Iran)", "stt": True, "tts": True, "translation": True},
                {"code": "ur-PK", "name": "Urdu (Pakistan)", "stt": True, "tts": True, "translation": True},
                {"code": "hi-IN", "name": "Hindi (India)", "stt": True, "tts": True, "translation": True},
                {"code": "bn-BD", "name": "Bengali (Bangladesh)", "stt": True, "tts": True, "translation": True},
                {"code": "ta-IN", "name": "Tamil (India)", "stt": True, "tts": True, "translation": True},
                {"code": "te-IN", "name": "Telugu (India)", "stt": True, "tts": True, "translation": True},
                {"code": "ml-IN", "name": "Malayalam (India)", "stt": True, "tts": True, "translation": True},
                {"code": "kn-IN", "name": "Kannada (India)", "stt": True, "tts": True, "translation": True},
                {"code": "gu-IN", "name": "Gujarati (India)", "stt": True, "tts": True, "translation": True},
                {"code": "pa-IN", "name": "Punjabi (India)", "stt": True, "tts": True, "translation": True},
                {"code": "mr-IN", "name": "Marathi (India)", "stt": True, "tts": True, "translation": True},
                {"code": "ne-NP", "name": "Nepali (Nepal)", "stt": True, "tts": True, "translation": True},
                {"code": "si-LK", "name": "Sinhala (Sri Lanka)", "stt": True, "tts": True, "translation": True},
                {"code": "my-MM", "name": "Burmese (Myanmar)", "stt": True, "tts": True, "translation": True},
                {"code": "km-KH", "name": "Khmer (Cambodia)", "stt": True, "tts": True, "translation": True},
                {"code": "lo-LA", "name": "Lao (Laos)", "stt": True, "tts": True, "translation": True},

                # African languages
                {"code": "sw-KE", "name": "Swahili (Kenya)", "stt": True, "tts": True, "translation": True},
                {"code": "sw-TZ", "name": "Swahili (Tanzania)", "stt": True, "tts": True, "translation": True},
                {"code": "am-ET", "name": "Amharic (Ethiopia)", "stt": True, "tts": True, "translation": True},
                {"code": "zu-ZA", "name": "Zulu (South Africa)", "stt": True, "tts": True, "translation": True},
                {"code": "af-ZA", "name": "Afrikaans (South Africa)", "stt": True, "tts": True, "translation": True},
                {"code": "yo-NG", "name": "Yoruba (Nigeria)", "stt": True, "tts": True, "translation": True},
                {"code": "ig-NG", "name": "Igbo (Nigeria)", "stt": True, "tts": True, "translation": True},
                {"code": "ha-NG", "name": "Hausa (Nigeria)", "stt": True, "tts": True, "translation": True},

                # Other languages
                {"code": "is-IS", "name": "Icelandic (Iceland)", "stt": True, "tts": True, "translation": True},
                {"code": "mk-MK", "name": "Macedonian (North Macedonia)", "stt": True, "tts": True, "translation": True},
                {"code": "sq-AL", "name": "Albanian (Albania)", "stt": True, "tts": True, "translation": True},
                {"code": "sr-RS", "name": "Serbian (Serbia)", "stt": True, "tts": True, "translation": True},
                {"code": "bs-BA", "name": "Bosnian (Bosnia and Herzegovina)", "stt": True, "tts": True, "translation": True},
                {"code": "me-ME", "name": "Montenegrin (Montenegro)", "stt": True, "tts": True, "translation": True},
                {"code": "uk-UA", "name": "Ukrainian (Ukraine)", "stt": True, "tts": True, "translation": True},
                {"code": "be-BY", "name": "Belarusian (Belarus)", "stt": True, "tts": True, "translation": True},
                {"code": "ka-GE", "name": "Georgian (Georgia)", "stt": True, "tts": True, "translation": True},
                {"code": "hy-AM", "name": "Armenian (Armenia)", "stt": True, "tts": True, "translation": True},
                {"code": "az-AZ", "name": "Azerbaijani (Azerbaijan)", "stt": True, "tts": True, "translation": True},
                {"code": "kk-KZ", "name": "Kazakh (Kazakhstan)", "stt": True, "tts": True, "translation": True},
                {"code": "ky-KG", "name": "Kyrgyz (Kyrgyzstan)", "stt": True, "tts": True, "translation": True},
                {"code": "uz-UZ", "name": "Uzbek (Uzbekistan)", "stt": True, "tts": True, "translation": True},
                {"code": "tg-TJ", "name": "Tajik (Tajikistan)", "stt": True, "tts": True, "translation": True},
                {"code": "mn-MN", "name": "Mongolian (Mongolia)", "stt": True, "tts": True, "translation": True}
            ]
            return languages

        except Exception as e:
            logger.error(f"Failed to get languages: {str(e)}")
            return []

    async def test_connection(self) -> bool:
        """Test Azure Speech Services connection."""
        try:
            # Test with a simple TTS request
            test_audio = await self.text_to_speech("Test", "en-US", "en-US-AriaNeural")
            return len(test_audio) > 0
        except Exception as e:
            logger.error(f"Connection test failed: {str(e)}")
            return False

    async def stream_speech_to_text(self, audio_stream: AsyncGenerator[bytes, None], language: str = "de-DE", format: str = "detailed", audio_format: str = "wav") -> AsyncGenerator[Dict, None]:
        """
        Stream speech to text using Azure Speech Services.

        Args:
            audio_stream: Async generator yielding audio chunks
            language: Language code (e.g., "de-DE")
            format: Response format ("simple" or "detailed")
            audio_format: Audio format ("wav", "mp3", "ogg")

        Yields:
            Dict with partial transcription results
        """
        try:
            # Check rate limit
            if not self._check_rate_limit("stt"):
                raise Exception("Rate limit exceeded for speech-to-text service. Please wait before making more requests.")

            url = f"{self.base_url}/speech/recognition/conversation/cognitiveservices/v1"

            # Get appropriate content type
            content_type = self._get_audio_content_type(audio_format)
            if audio_format == "wav":
                content_type = f"{content_type}; codecs=audio/pcm; samplerate=16000"

            headers = {
                "Content-Type": content_type,
                "Accept": "application/json",
                "Ocp-Apim-Subscription-Key": self.subscription_key,
                "Ocp-Apim-Subscription-Region": self.region
            }

            params = {
                "language": language,
                "format": "detailed" if format == "detailed" else "simple"
            }

            # Process audio stream in chunks
            async with aiohttp.ClientSession(timeout=self.timeout) as session:
                async for audio_chunk in audio_stream:
                    try:
                        # Validate chunk
                        if not audio_chunk:
                            continue

                        # Make API call for this chunk
                        async with session.post(
                            url,
                            headers=headers,
                            params=params,
                            data=audio_chunk
                        ) as response:
                            if response.status == 200:
                                result = await response.json()

                                # Yield partial result
                                if format == "detailed":
                                    yield {
                                        "text": result.get("DisplayText", ""),
                                        "confidence": result.get("Confidence", 0.0),
                                        "language": result.get("RecognitionStatus", language),
                                        "format": format,
                                        "is_final": result.get("RecognitionStatus") == "Success",
                                        "raw_result": result
                                    }
                                else:
                                    yield {
                                        "text": result.get("DisplayText", ""),
                                        "confidence": 1.0,
                                        "language": language,
                                        "format": format,
                                        "is_final": result.get("RecognitionStatus") == "Success"
                                    }
                            else:
                                error_text = await response.text()
                                logger.error(f"Streaming STT API failed: {response.status} - {error_text}")
                                yield {
                                    "error": f"API error {response.status}: {error_text}",
                                    "is_final": True
                                }

                    except Exception as e:
                        logger.error(f"Error processing audio chunk: {str(e)}")
                        yield {
                            "error": str(e),
                            "is_final": True
                        }

        except Exception as e:
            logger.error(f"Streaming speech-to-text failed: {str(e)}")
            yield {
                "error": str(e),
                "is_final": True
            }

    async def stream_text_to_speech(self, text_stream: AsyncGenerator[str, None], language: str = "de-DE", voice: str = "de-DE-KatjaNeural", format: str = "audio-16khz-128kbitrate-mono-mp3") -> AsyncGenerator[bytes, None]:
        """
        Stream text to speech using Azure Speech Services.

        Args:
            text_stream: Async generator yielding text chunks
            language: Language code
            voice: Voice name
            format: Audio format

        Yields:
            Audio data chunks as bytes
        """
        try:
            # Check rate limit
            if not self._check_rate_limit("tts"):
                raise Exception("Rate limit exceeded for text-to-speech service. Please wait before making more requests.")

            # Validate format
            if format not in self.supported_tts_formats:
                raise Exception(f"Unsupported TTS format: {format}. Supported formats: {', '.join(self.supported_tts_formats)}")

            url = f"https://{self.region}.tts.speech.microsoft.com/cognitiveservices/v1"

            headers = {
                "Content-Type": "application/ssml+xml",
                "X-Microsoft-OutputFormat": format,
                "Ocp-Apim-Subscription-Key": self.subscription_key,
                "User-Agent": "PowerOn-Voice-Services/1.0"
            }

            # Process text stream in chunks
            async with aiohttp.ClientSession(timeout=self.timeout) as session:
                async for text_chunk in text_stream:
                    try:
                        if not text_chunk.strip():
                            continue

                        # Create SSML for this chunk
                        escaped_text = text_chunk.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;").replace('"', "&quot;").replace("'", "&apos;")
                        ssml = f"""<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xml:lang='{language}'>
                            <voice name='{voice}'>
                                {escaped_text}
                            </voice>
                        </speak>"""

                        # Make API call for this chunk
                        async with session.post(
                            url,
                            headers=headers,
                            data=ssml.encode('utf-8')
                        ) as response:
                            if response.status == 200:
                                audio_data = await response.read()
                                if audio_data:
                                    yield audio_data
                            else:
                                error_text = await response.text()
                                logger.error(f"Streaming TTS API failed: {response.status} - {error_text}")

                    except Exception as e:
                        logger.error(f"Error processing text chunk: {str(e)}")

        except Exception as e:
            logger.error(f"Streaming text-to-speech failed: {str(e)}")

    async def stream_realtime_interpreter(self, audio_stream: AsyncGenerator[bytes, None], from_language: str = "de-DE", to_language: str = "en-US") -> AsyncGenerator[Dict, None]:
        """
        Stream real-time interpreter: speech to translated text.

        Args:
            audio_stream: Async generator yielding audio chunks
            from_language: Source language code
            to_language: Target language code

        Yields:
            Dict with translation results
        """
        try:
            # Check rate limits
            if not self._check_rate_limit("stt") or not self._check_rate_limit("translation"):
                raise Exception("Rate limit exceeded for interpreter service. Please wait before making more requests.")

            # Process audio stream
            async for stt_result in self.stream_speech_to_text(audio_stream, from_language):
                if "error" in stt_result:
                    yield stt_result
                    continue

                original_text = stt_result.get("text", "")

                # Translate text if different languages
                translated_text = original_text
                if from_language != to_language and original_text.strip():
                    try:
                        translated_text = await self.translate_text(
                            text=original_text,
                            from_language=from_language,
                            to_language=to_language
                        )
                    except Exception as e:
                        logger.warning(f"Translation failed: {str(e)}")
                        translated_text = original_text

                yield {
                    "original_text": original_text,
                    "translated_text": translated_text,
                    "from_language": from_language,
                    "to_language": to_language,
                    "confidence": stt_result.get("confidence", 0.0),
                    "is_final": stt_result.get("is_final", False)
                }

        except Exception as e:
            logger.error(f"Streaming realtime interpreter failed: {str(e)}")
            yield {
                "error": str(e),
                "is_final": True
            }