fixed delta sync - added voice basics

2025-09-14 20:05:46 +02:00 · 2025-09-14 20:05:46 +02:00 · ff079f5428
commit ff079f5428
parent ada61061c4
35 changed files with 582 additions and 2300 deletions
--- a/app.py
+++ b/app.py
@ -249,8 +249,5 @@ app.include_router(msftRouter)
 from modules.routes.routeSecurityGoogle import router as googleRouter
 app.include_router(googleRouter)
-from modules.routes.routeVoiceAzure import router as voiceRouter
+from modules.routes.routeVoiceGoogle import router as voiceGoogleRouter
-app.include_router(voiceRouter)
+app.include_router(voiceGoogleRouter)
 from modules.routes.routeVoiceWebSocket import router as voiceWebSocketRouter
 app.include_router(voiceWebSocketRouter)
--- a/config.ini
+++ b/config.ini
@ -48,9 +48,8 @@ Service_GOOGLE_CLIENT_SECRET = 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
 # Tavily Web Search configuration
 Connector_WebTavily_API_KEY = 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
-# Azure Speech Services configuration
+# Google Cloud Speech Services configuration
-Connector_AzureSpeech_SUBSCRIPTION_KEY = 4HuOIbNfeZRyQp3ouqCzyYKpRf0rJxMNwPaoKwdoJDUxl5u9FzK2JQQJ99BIACI8hq2XJ3w3AAAYACOGrE9c
+# Set GOOGLE_APPLICATION_CREDENTIALS environment variable or place credentials file in project directory
 Connector_AzureSpeech_REGION = switzerlandnorth
 # Web Search configuration
 Web_Search_MAX_QUERY_LENGTH = 400
--- a/debug_audio/audio_20250913_223438.wav
+++ b/debug_audio/audio_20250913_223438.wav
--- a/debug_audio/audio_20250913_223658.wav
+++ b/debug_audio/audio_20250913_223658.wav
--- a/debug_audio/audio_20250913_224003.wav
+++ b/debug_audio/audio_20250913_224003.wav
--- a/debug_audio/audio_20250913_224258.wav
+++ b/debug_audio/audio_20250913_224258.wav
--- a/debug_audio/audio_20250913_224524.wav
+++ b/debug_audio/audio_20250913_224524.wav
--- a/debug_audio/audio_20250913_224801.wav
+++ b/debug_audio/audio_20250913_224801.wav
--- a/debug_audio/audio_20250913_230817.wav
+++ b/debug_audio/audio_20250913_230817.wav
--- a/debug_audio/audio_20250913_230927.wav
+++ b/debug_audio/audio_20250913_230927.wav
--- a/debug_audio/audio_20250913_231253.wav
+++ b/debug_audio/audio_20250913_231253.wav
--- a/debug_audio/audio_20250913_231321.wav
+++ b/debug_audio/audio_20250913_231321.wav
--- a/debug_audio/audio_20250913_231611.wav
+++ b/debug_audio/audio_20250913_231611.wav
--- a/debug_audio/audio_20250913_231935.wav
+++ b/debug_audio/audio_20250913_231935.wav
--- a/debug_audio/audio_20250913_232141.wav
+++ b/debug_audio/audio_20250913_232141.wav
--- a/debug_audio/audio_20250913_232309.wav
+++ b/debug_audio/audio_20250913_232309.wav
--- a/debug_audio/audio_20250913_232518.wav
+++ b/debug_audio/audio_20250913_232518.wav
--- a/debug_audio/audio_20250913_232659.wav
+++ b/debug_audio/audio_20250913_232659.wav
--- a/debug_audio/audio_20250913_232941.wav
+++ b/debug_audio/audio_20250913_232941.wav
--- a/debug_audio/audio_20250913_233053.wav
+++ b/debug_audio/audio_20250913_233053.wav
--- a/debug_audio/audio_20250913_233155.wav
+++ b/debug_audio/audio_20250913_233155.wav
--- a/debug_audio/audio_20250913_233607.wav
+++ b/debug_audio/audio_20250913_233607.wav
--- a/debug_audio/audio_20250913_234106.wav
+++ b/debug_audio/audio_20250913_234106.wav
--- a/debug_audio/audio_20250913_234245.wav
+++ b/debug_audio/audio_20250913_234245.wav
--- a/debug_audio/audio_20250913_234843.wav
+++ b/debug_audio/audio_20250913_234843.wav
--- a/debug_audio/audio_20250913_235136.wav
+++ b/debug_audio/audio_20250913_235136.wav
--- a/debug_audio/audio_20250913_235409.wav
+++ b/debug_audio/audio_20250913_235409.wav
--- a/force_reauth.py
+++ b/force_reauth.py
@ -1,44 +0,0 @@
 #!/usr/bin/env python3
 """
 Script to force Microsoft re-authentication for SharePoint access.
 This will disconnect the existing connection and provide a new login URL.
 """
 import requests
 import json
 # Configuration
 BASE_URL = "http://localhost:8000"  # Adjust if your server runs on different port
 CONNECTION_ID = "cc62583d-3a68-44b6-8283-726725916a7e"  # From the logs
 def force_reauth():
    """Force Microsoft re-authentication by disconnecting and providing new login URL."""
    print("🔄 Forcing Microsoft re-authentication for SharePoint access...")
    # Step 1: Disconnect existing connection
    print(f"1. Disconnecting connection {CONNECTION_ID}...")
    disconnect_url = f"{BASE_URL}/api/connections/{CONNECTION_ID}/disconnect"
    try:
        response = requests.post(disconnect_url)
        if response.status_code == 200:
            print("✅ Connection disconnected successfully")
        else:
            print(f"❌ Failed to disconnect: {response.status_code} - {response.text}")
            return
    except Exception as e:
        print(f"❌ Error disconnecting: {e}")
        return
    # Step 2: Get new login URL
    print("2. Getting new Microsoft login URL...")
    login_url = f"{BASE_URL}/api/msft/login?state=connection&connectionId={CONNECTION_ID}"
    print(f"\n🔗 Please visit this URL to re-authenticate with SharePoint permissions:")
    print(f"   {login_url}")
    print("\nAfter re-authentication, the JIRA sync should work with SharePoint access.")
    print("\nNote: The new token will include Sites.ReadWrite.All and Files.ReadWrite.All scopes.")
 if __name__ == "__main__":
    force_reauth()
--- a/modules/connectors/connectorAzureSpeech.py
+++ b/modules/connectors/connectorAzureSpeech.py
@ -1,928 +0,0 @@
 """
 Azure Speech Services Connector
 Handles integration with Azure Speech Services for:
 - Speech-to-Text (STT)
 - Text-to-Speech (TTS)
 - Translation services
 """
 import logging
 import asyncio
 import json
 import base64
 from typing import Dict, Any, Optional, List, AsyncGenerator
 import aiohttp
 import io
 import wave
 import struct
 import tempfile
 import os
 import time
 from pathlib import Path
 from datetime import datetime, timedelta
 logger = logging.getLogger(__name__)
 class ConnectorAzureSpeech:
    """Connector for Azure Speech Services."""
    def __init__(self, subscription_key: str, region: str = "westeurope"):
        """
        Initialize Azure Speech connector.
        Args:
            subscription_key: Azure Speech Services subscription key
            region: Azure region (default: westeurope)
        """
        self.subscription_key = subscription_key
        self.region = region
        self.base_url = f"https://{region}.stt.speech.microsoft.com"
        self.translator_url = "https://api.cognitive.microsofttranslator.com"
        # Supported audio formats
        self.supported_stt_formats = {
            "wav": {"mime": "audio/wav", "codec": "audio/pcm", "sample_rate": 16000},
            "mp3": {"mime": "audio/mp3", "codec": "audio/mp3", "sample_rate": 16000},
            "ogg": {"mime": "audio/ogg", "codec": "audio/ogg", "sample_rate": 16000}
        }
        self.supported_tts_formats = [
            "audio-16khz-128kbitrate-mono-mp3",
            "audio-16khz-32kbitrate-mono-mp3", 
            "audio-16khz-64kbitrate-mono-mp3",
            "audio-24khz-160kbitrate-mono-mp3",
            "audio-24khz-48kbitrate-mono-mp3",
            "audio-24khz-96kbitrate-mono-mp3",
            "audio-48khz-192kbitrate-mono-mp3",
            "audio-48khz-96kbitrate-mono-mp3",
            "riff-16khz-16bit-mono-pcm",
            "riff-24khz-16bit-mono-pcm",
            "riff-48khz-16bit-mono-pcm"
        ]
        # Rate limiting
        self.rate_limits = {
            "stt": {"requests_per_minute": 20, "last_reset": time.time(), "request_count": 0},
            "tts": {"requests_per_minute": 20, "last_reset": time.time(), "request_count": 0},
            "translation": {"requests_per_minute": 20, "last_reset": time.time(), "request_count": 0}
        }
        # Request timeout settings
        self.timeout = aiohttp.ClientTimeout(total=30, connect=10)
    def _check_rate_limit(self, service_type: str) -> bool:
        """Check if rate limit is exceeded for a service type."""
        current_time = time.time()
        rate_limit = self.rate_limits[service_type]
        # Reset counter if minute has passed
        if current_time - rate_limit["last_reset"] >= 60:
            rate_limit["request_count"] = 0
            rate_limit["last_reset"] = current_time
        # Check if limit exceeded
        if rate_limit["request_count"] >= rate_limit["requests_per_minute"]:
            return False
        # Increment counter
        rate_limit["request_count"] += 1
        return True
    def _handle_azure_error(self, response_status: int, error_text: str) -> Exception:
        """Handle Azure API errors with specific error messages."""
        if response_status == 401:
            return Exception("Authentication failed. Please check your Azure Speech Services subscription key.")
        elif response_status == 403:
            return Exception("Access forbidden. Please check your Azure Speech Services permissions.")
        elif response_status == 429:
            return Exception("Rate limit exceeded. Please wait before making more requests.")
        elif response_status == 400:
            return Exception(f"Bad request: {error_text}")
        elif response_status == 500:
            return Exception("Azure Speech Services internal error. Please try again later.")
        elif response_status == 503:
            return Exception("Azure Speech Services temporarily unavailable. Please try again later.")
        else:
            return Exception(f"Azure API error {response_status}: {error_text}")
    async def _make_request_with_retry(self, url: str, method: str = "GET", headers: Dict = None, data: bytes = None, params: Dict = None, max_retries: int = 3) -> Dict:
        """Make HTTP request with retry logic."""
        if headers is None:
            headers = {}
        headers.update({
            "Ocp-Apim-Subscription-Key": self.subscription_key,
            "User-Agent": "PowerOn-Voice-Services/1.0"
        })
        # Debug: Log subscription key (masked for security)
        logger.debug(f"Using subscription key: {self.subscription_key[:8]}...{self.subscription_key[-8:] if len(self.subscription_key) > 16 else 'SHORT'}")
        logger.debug(f"Request URL: {url}")
        logger.debug(f"Request params: {params}")
        for attempt in range(max_retries):
            try:
                async with aiohttp.ClientSession(timeout=self.timeout) as session:
                    if method.upper() == "GET":
                        async with session.get(url, headers=headers, params=params) as response:
                            return await self._handle_response(response)
                    elif method.upper() == "POST":
                        async with session.post(url, headers=headers, data=data, params=params) as response:
                            return await self._handle_response(response)
                    else:
                        raise ValueError(f"Unsupported HTTP method: {method}")
            except asyncio.TimeoutError:
                if attempt == max_retries - 1:
                    raise Exception("Request timeout after multiple retries")
                logger.warning(f"Request timeout, retrying... (attempt {attempt + 1}/{max_retries})")
                await asyncio.sleep(2 ** attempt)  # Exponential backoff
            except Exception as e:
                if attempt == max_retries - 1:
                    raise
                logger.warning(f"Request failed, retrying... (attempt {attempt + 1}/{max_retries}): {str(e)}")
                await asyncio.sleep(2 ** attempt)  # Exponential backoff
    async def _make_request(self, url: str, method: str = "GET", headers: Dict = None, data: bytes = None) -> Dict:
        """Make HTTP request to Azure services."""
        if headers is None:
            headers = {}
        headers.update({
            "Ocp-Apim-Subscription-Key": self.subscription_key,
            "User-Agent": "PowerOn-Voice-Services/1.0"
        })
        async with aiohttp.ClientSession() as session:
            try:
                if method.upper() == "GET":
                    async with session.get(url, headers=headers) as response:
                        return await self._handle_response(response)
                elif method.upper() == "POST":
                    async with session.post(url, headers=headers, data=data) as response:
                        return await self._handle_response(response)
                else:
                    raise ValueError(f"Unsupported HTTP method: {method}")
            except Exception as e:
                logger.error(f"Request failed: {str(e)}")
                raise
    async def _handle_response(self, response) -> Dict:
        """Handle HTTP response."""
        if response.status == 200:
            content_type = response.headers.get('content-type', '')
            if 'application/json' in content_type:
                return await response.json()
            else:
                # For audio responses, return binary data
                return {"data": await response.read()}
        else:
            error_text = await response.text()
            logger.error(f"API request failed: {response.status} - {error_text}")
            raise self._handle_azure_error(response.status, error_text)
    def _validate_audio_format(self, audio_content: bytes, expected_format: str = "wav") -> Dict[str, Any]:
        """Validate audio format and return format information."""
        try:
            # Try to detect format from content
            if audio_content.startswith(b'RIFF') and b'WAVE' in audio_content[:12]:
                format_type = "wav"
            elif audio_content.startswith(b'\xff\xfb') or audio_content.startswith(b'ID3'):
                format_type = "mp3"
            elif audio_content.startswith(b'OggS'):
                format_type = "ogg"
            elif audio_content.startswith(b'fLaC'):
                format_type = "flac"
            else:
                # If we can't detect format, assume it's raw audio or WAV without proper header
                format_type = "wav"  # Azure Speech Services can handle this
            # Validate WAV format specifically
            if format_type == "wav":
                try:
                    with io.BytesIO(audio_content) as audio_io:
                        with wave.open(audio_io, 'rb') as wav_file:
                            sample_rate = wav_file.getframerate()
                            channels = wav_file.getnchannels()
                            sample_width = wav_file.getsampwidth()
                            return {
                                "valid": True,
                                "format": format_type,
                                "sample_rate": sample_rate,
                                "channels": channels,
                                "sample_width": sample_width,
                                "duration": wav_file.getnframes() / sample_rate
                            }
                except Exception as e:
                    # If WAV validation fails, it might be raw audio data
                    # Azure Speech Services can handle raw audio, so we'll allow it
                    logger.info(f"WAV validation failed, treating as raw audio: {str(e)}")
                    return {
                        "valid": True,
                        "format": "raw_audio",
                        "sample_rate": 16000,  # Default assumption for raw audio
                        "channels": 1,  # Default assumption
                        "sample_width": 2,  # Default assumption
                        "duration": len(audio_content) / (16000 * 2)  # Rough estimate
                    }
            # For other formats, assume valid if we can detect them
            return {
                "valid": True,
                "format": format_type,
                "sample_rate": 16000,  # Default assumption
                "channels": 1,  # Default assumption
                "sample_width": 2,  # Default assumption
                "duration": 0  # Unknown
            }
        except Exception as e:
            logger.error(f"Audio validation failed: {str(e)}")
            return {"valid": False, "error": str(e)}
    def _convert_audio_to_wav(self, audio_content: bytes, target_sample_rate: int = 16000) -> bytes:
        """Convert audio to WAV format with specified sample rate."""
        try:
            # If it's already WAV, try to resample if needed
            if audio_content.startswith(b'RIFF') and b'WAVE' in audio_content[:12]:
                with io.BytesIO(audio_content) as audio_io:
                    with wave.open(audio_io, 'rb') as wav_file:
                        current_sample_rate = wav_file.getframerate()
                        # If sample rate matches, return as-is
                        if current_sample_rate == target_sample_rate:
                            return audio_content
                        # For now, return original (in production, implement resampling)
                        logger.warning(f"Audio sample rate {current_sample_rate} doesn't match target {target_sample_rate}")
                        return audio_content
            # If it's raw audio data (no header), create a basic WAV header
            elif not audio_content.startswith(b'RIFF'):
                logger.info("Converting raw audio data to WAV format")
                return self._create_wav_header(audio_content, target_sample_rate)
            # For other formats, return as-is for now
            # In production, implement proper conversion with pydub or ffmpeg
            logger.info("Audio format conversion not fully implemented - returning original")
            return audio_content
        except Exception as e:
            logger.error(f"Audio conversion failed: {str(e)}")
            raise Exception(f"Audio conversion failed: {str(e)}")
    def _create_wav_header(self, audio_data: bytes, sample_rate: int = 16000, channels: int = 1, sample_width: int = 2) -> bytes:
        """Create a WAV header for raw audio data."""
        try:
            import struct
            # Calculate data size
            data_size = len(audio_data)
            file_size = 36 + data_size
            # Create WAV header
            header = struct.pack('<4sI4s4sIHHIIHH4sI',
                b'RIFF',                    # Chunk ID
                file_size,                  # Chunk size
                b'WAVE',                    # Format
                b'fmt ',                    # Subchunk1 ID
                16,                         # Subchunk1 size
                1,                          # Audio format (PCM)
                channels,                   # Number of channels
                sample_rate,                # Sample rate
                sample_rate * channels * sample_width,  # Byte rate
                channels * sample_width,    # Block align
                sample_width * 8,           # Bits per sample
                b'data',                    # Subchunk2 ID
                data_size                   # Subchunk2 size
            )
            return header + audio_data
        except Exception as e:
            logger.error(f"Failed to create WAV header: {str(e)}")
            # Return original data if header creation fails
            return audio_data
    def _get_audio_content_type(self, audio_format: str) -> str:
        """Get MIME type for audio format."""
        if audio_format in self.supported_stt_formats:
            return self.supported_stt_formats[audio_format]["mime"]
        return "audio/wav"  # Default
    async def speech_to_text(self, audio_content: bytes, language: str = "de-DE", format: str = "detailed", audio_format: str = "wav") -> Dict:
        """
        Convert speech to text using Azure Speech Services.
        Args:
            audio_content: Audio file content as bytes
            language: Language code (e.g., "de-DE")
            format: Response format ("simple" or "detailed")
            audio_format: Audio format ("wav", "mp3", "ogg")
        Returns:
            Dict with transcription results
        """
        try:
            # Check rate limit
            if not self._check_rate_limit("stt"):
                raise Exception("Rate limit exceeded for speech-to-text service. Please wait before making more requests.")
            # Validate audio format
            validation_result = self._validate_audio_format(audio_content, audio_format)
            if not validation_result.get("valid", False):
                raise Exception(f"Invalid audio format: {validation_result.get('error', 'Unknown error')}")
            # Convert audio to required format if needed
            processed_audio = self._convert_audio_to_wav(audio_content)
            # Update audio_format based on validation result
            detected_format = validation_result.get("format", audio_format)
            if detected_format == "raw_audio":
                audio_format = "wav"  # Treat raw audio as WAV for Azure
            url = f"{self.base_url}/speech/recognition/conversation/cognitiveservices/v1"
            # Get appropriate content type
            content_type = self._get_audio_content_type(audio_format)
            if audio_format == "wav":
                content_type = f"{content_type}; codecs=audio/pcm; samplerate=16000"
            headers = {
                "Content-Type": content_type,
                "Accept": "application/json",
                "Ocp-Apim-Subscription-Region": self.region
            }
            params = {
                "language": language,
                "format": "detailed" if format == "detailed" else "simple"
            }
            # Make API call with retry logic
            result = await self._make_request_with_retry(
                url=url,
                method="POST",
                headers=headers,
                data=processed_audio,
                params=params
            )
            # Parse the response based on format
            if format == "detailed":
                return {
                    "text": result.get("DisplayText", ""),
                    "confidence": result.get("Confidence", 0.0),
                    "language": result.get("RecognitionStatus", language),
                    "format": format,
                    "audio_info": validation_result,
                    "raw_result": result
                }
            else:
                return {
                    "text": result.get("DisplayText", ""),
                    "confidence": 1.0,  # Simple format doesn't provide confidence
                    "language": language,
                    "format": format,
                    "audio_info": validation_result
                }
        except Exception as e:
            logger.error(f"Speech-to-text failed: {str(e)}")
            raise
    async def text_to_speech(self, text: str, language: str = "de-DE", voice: str = "de-DE-KatjaNeural", format: str = "audio-16khz-128kbitrate-mono-mp3") -> bytes:
        """
        Convert text to speech using Azure Speech Services.
        Args:
            text: Text to convert to speech
            language: Language code
            voice: Voice name
            format: Audio format
        Returns:
            Audio data as bytes
        """
        try:
            # Check rate limit
            if not self._check_rate_limit("tts"):
                raise Exception("Rate limit exceeded for text-to-speech service. Please wait before making more requests.")
            # Validate format
            if format not in self.supported_tts_formats:
                raise Exception(f"Unsupported TTS format: {format}. Supported formats: {', '.join(self.supported_tts_formats)}")
            url = f"https://{self.region}.tts.speech.microsoft.com/cognitiveservices/v1"
            headers = {
                "Content-Type": "application/ssml+xml",
                "X-Microsoft-OutputFormat": format,
                "Ocp-Apim-Subscription-Key": self.subscription_key,
                "User-Agent": "PowerOn-Voice-Services/1.0"
            }
            # Create SSML with proper escaping
            escaped_text = text.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;").replace('"', "&quot;").replace("'", "&apos;")
            ssml = f"""<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xml:lang='{language}'>
                <voice name='{voice}'>
                    {escaped_text}
                </voice>
            </speak>"""
            # Make API call with retry logic
            result = await self._make_request_with_retry(
                url=url,
                method="POST",
                headers=headers,
                data=ssml.encode('utf-8'),
                params=None
            )
            # Return audio data
            return result.get("data", b"")
        except Exception as e:
            logger.error(f"Text-to-speech failed: {str(e)}")
            raise
    async def translate_text(self, text: str, from_language: str, to_language: str) -> str:
        """
        Translate text using Azure Translator.
        Args:
            text: Text to translate
            from_language: Source language code
            to_language: Target language code
        Returns:
            Translated text
        """
        try:
            # Check if text is empty
            if not text or not text.strip():
                logger.debug("Empty text provided, returning original text")
                return text
            # Check rate limit
            if not self._check_rate_limit("translation"):
                raise Exception("Rate limit exceeded for translation service. Please wait before making more requests.")
            url = f"{self.translator_url}/translate"
            headers = {
                "Ocp-Apim-Subscription-Key": self.subscription_key,
                "Ocp-Apim-Subscription-Region": self.region,
                "Content-Type": "application/json"
            }
            params = {
                "api-version": "3.0",
                "from": from_language,
                "to": to_language
            }
            data = [{"text": text}]
            # Debug: Log translation request details
            logger.debug(f"Translation request - URL: {url}")
            logger.debug(f"Translation request - Headers: {headers}")
            logger.debug(f"Translation request - Data: {data}")
            # Make API call with retry logic
            result = await self._make_request_with_retry(
                url=url,
                method="POST",
                headers=headers,
                data=json.dumps(data).encode('utf-8'),
                params=None
            )
            if result and len(result) > 0 and 'translations' in result[0]:
                return result[0]['translations'][0]['text']
            else:
                logger.warning(f"Unexpected translation response format: {result}")
                return text  # Return original text if translation fails
        except Exception as e:
            logger.error(f"Translation failed: {str(e)}")
            raise
    async def get_available_voices(self) -> List[Dict]:
        """Get list of available voices from Azure Speech Services."""
        try:
            # Azure doesn't provide a direct API for voice list, so we return a comprehensive list
            # based on Azure's supported voices
            voices = [
                # German voices
                {"name": "de-DE-KatjaNeural", "language": "de-DE", "gender": "Female", "style": "Neural", "locale": "de-DE"},
                {"name": "de-DE-ConradNeural", "language": "de-DE", "gender": "Male", "style": "Neural", "locale": "de-DE"},
                {"name": "de-DE-AmalaNeural", "language": "de-DE", "gender": "Female", "style": "Neural", "locale": "de-DE"},
                {"name": "de-DE-BerndNeural", "language": "de-DE", "gender": "Male", "style": "Neural", "locale": "de-DE"},
                {"name": "de-DE-ChristophNeural", "language": "de-DE", "gender": "Male", "style": "Neural", "locale": "de-DE"},
                {"name": "de-DE-ElkeNeural", "language": "de-DE", "gender": "Female", "style": "Neural", "locale": "de-DE"},
                {"name": "de-DE-GiselaNeural", "language": "de-DE", "gender": "Female", "style": "Neural", "locale": "de-DE"},
                {"name": "de-DE-JoergNeural", "language": "de-DE", "gender": "Male", "style": "Neural", "locale": "de-DE"},
                {"name": "de-DE-KasperNeural", "language": "de-DE", "gender": "Male", "style": "Neural", "locale": "de-DE"},
                {"name": "de-DE-KillianNeural", "language": "de-DE", "gender": "Male", "style": "Neural", "locale": "de-DE"},
                {"name": "de-DE-KlausNeural", "language": "de-DE", "gender": "Male", "style": "Neural", "locale": "de-DE"},
                {"name": "de-DE-LouisaNeural", "language": "de-DE", "gender": "Female", "style": "Neural", "locale": "de-DE"},
                {"name": "de-DE-MajaNeural", "language": "de-DE", "gender": "Female", "style": "Neural", "locale": "de-DE"},
                {"name": "de-DE-RalfNeural", "language": "de-DE", "gender": "Male", "style": "Neural", "locale": "de-DE"},
                {"name": "de-DE-TanjaNeural", "language": "de-DE", "gender": "Female", "style": "Neural", "locale": "de-DE"},
                # English (US) voices
                {"name": "en-US-AriaNeural", "language": "en-US", "gender": "Female", "style": "Neural", "locale": "en-US"},
                {"name": "en-US-DavisNeural", "language": "en-US", "gender": "Male", "style": "Neural", "locale": "en-US"},
                {"name": "en-US-GuyNeural", "language": "en-US", "gender": "Male", "style": "Neural", "locale": "en-US"},
                {"name": "en-US-JaneNeural", "language": "en-US", "gender": "Female", "style": "Neural", "locale": "en-US"},
                {"name": "en-US-JasonNeural", "language": "en-US", "gender": "Male", "style": "Neural", "locale": "en-US"},
                {"name": "en-US-JennyNeural", "language": "en-US", "gender": "Female", "style": "Neural", "locale": "en-US"},
                {"name": "en-US-MichelleNeural", "language": "en-US", "gender": "Female", "style": "Neural", "locale": "en-US"},
                {"name": "en-US-RyanNeural", "language": "en-US", "gender": "Male", "style": "Neural", "locale": "en-US"},
                {"name": "en-US-SaraNeural", "language": "en-US", "gender": "Female", "style": "Neural", "locale": "en-US"},
                {"name": "en-US-TonyNeural", "language": "en-US", "gender": "Male", "style": "Neural", "locale": "en-US"},
                # English (UK) voices
                {"name": "en-GB-LibbyNeural", "language": "en-GB", "gender": "Female", "style": "Neural", "locale": "en-GB"},
                {"name": "en-GB-MaisieNeural", "language": "en-GB", "gender": "Female", "style": "Neural", "locale": "en-GB"},
                {"name": "en-GB-RyanNeural", "language": "en-GB", "gender": "Male", "style": "Neural", "locale": "en-GB"},
                {"name": "en-GB-SoniaNeural", "language": "en-GB", "gender": "Female", "style": "Neural", "locale": "en-GB"},
                {"name": "en-GB-ThomasNeural", "language": "en-GB", "gender": "Male", "style": "Neural", "locale": "en-GB"},
                # French voices
                {"name": "fr-FR-DeniseNeural", "language": "fr-FR", "gender": "Female", "style": "Neural", "locale": "fr-FR"},
                {"name": "fr-FR-HenriNeural", "language": "fr-FR", "gender": "Male", "style": "Neural", "locale": "fr-FR"},
                {"name": "fr-FR-ArianeNeural", "language": "fr-FR", "gender": "Female", "style": "Neural", "locale": "fr-FR"},
                {"name": "fr-FR-ClaudeNeural", "language": "fr-FR", "gender": "Male", "style": "Neural", "locale": "fr-FR"},
                {"name": "fr-FR-JacquelineNeural", "language": "fr-FR", "gender": "Female", "style": "Neural", "locale": "fr-FR"},
                {"name": "fr-FR-JeromeNeural", "language": "fr-FR", "gender": "Male", "style": "Neural", "locale": "fr-FR"},
                {"name": "fr-FR-JosephineNeural", "language": "fr-FR", "gender": "Female", "style": "Neural", "locale": "fr-FR"},
                {"name": "fr-FR-MauriceNeural", "language": "fr-FR", "gender": "Male", "style": "Neural", "locale": "fr-FR"},
                {"name": "fr-FR-YvetteNeural", "language": "fr-FR", "gender": "Female", "style": "Neural", "locale": "fr-FR"},
                # Spanish voices
                {"name": "es-ES-ElviraNeural", "language": "es-ES", "gender": "Female", "style": "Neural", "locale": "es-ES"},
                {"name": "es-ES-AlvaroNeural", "language": "es-ES", "gender": "Male", "style": "Neural", "locale": "es-ES"},
                {"name": "es-ES-ArnauNeural", "language": "es-ES", "gender": "Male", "style": "Neural", "locale": "es-ES"},
                {"name": "es-ES-DarioNeural", "language": "es-ES", "gender": "Male", "style": "Neural", "locale": "es-ES"},
                {"name": "es-ES-EliasNeural", "language": "es-ES", "gender": "Male", "style": "Neural", "locale": "es-ES"},
                {"name": "es-ES-EstrellaNeural", "language": "es-ES", "gender": "Female", "style": "Neural", "locale": "es-ES"},
                {"name": "es-ES-IreneNeural", "language": "es-ES", "gender": "Female", "style": "Neural", "locale": "es-ES"},
                {"name": "es-ES-LaiaNeural", "language": "es-ES", "gender": "Female", "style": "Neural", "locale": "es-ES"},
                {"name": "es-ES-LiaNeural", "language": "es-ES", "gender": "Female", "style": "Neural", "locale": "es-ES"},
                {"name": "es-ES-NilNeural", "language": "es-ES", "gender": "Male", "style": "Neural", "locale": "es-ES"},
                {"name": "es-ES-SaulNeural", "language": "es-ES", "gender": "Male", "style": "Neural", "locale": "es-ES"},
                {"name": "es-ES-TeoNeural", "language": "es-ES", "gender": "Male", "style": "Neural", "locale": "es-ES"},
                {"name": "es-ES-TrianaNeural", "language": "es-ES", "gender": "Female", "style": "Neural", "locale": "es-ES"},
                {"name": "es-ES-VeraNeural", "language": "es-ES", "gender": "Female", "style": "Neural", "locale": "es-ES"}
            ]
            return voices
        except Exception as e:
            logger.error(f"Failed to get voices: {str(e)}")
            return []
    async def get_available_languages(self) -> List[Dict]:
        """Get list of available languages supported by Azure Speech Services."""
        try:
            # Comprehensive list of Azure Speech Services supported languages
            languages = [
                # European languages
                {"code": "de-DE", "name": "German (Germany)", "stt": True, "tts": True, "translation": True},
                {"code": "en-US", "name": "English (United States)", "stt": True, "tts": True, "translation": True},
                {"code": "en-GB", "name": "English (United Kingdom)", "stt": True, "tts": True, "translation": True},
                {"code": "fr-FR", "name": "French (France)", "stt": True, "tts": True, "translation": True},
                {"code": "es-ES", "name": "Spanish (Spain)", "stt": True, "tts": True, "translation": True},
                {"code": "es-MX", "name": "Spanish (Mexico)", "stt": True, "tts": True, "translation": True},
                {"code": "it-IT", "name": "Italian (Italy)", "stt": True, "tts": True, "translation": True},
                {"code": "pt-BR", "name": "Portuguese (Brazil)", "stt": True, "tts": True, "translation": True},
                {"code": "pt-PT", "name": "Portuguese (Portugal)", "stt": True, "tts": True, "translation": True},
                {"code": "ru-RU", "name": "Russian (Russia)", "stt": True, "tts": True, "translation": True},
                {"code": "nl-NL", "name": "Dutch (Netherlands)", "stt": True, "tts": True, "translation": True},
                {"code": "sv-SE", "name": "Swedish (Sweden)", "stt": True, "tts": True, "translation": True},
                {"code": "no-NO", "name": "Norwegian (Norway)", "stt": True, "tts": True, "translation": True},
                {"code": "da-DK", "name": "Danish (Denmark)", "stt": True, "tts": True, "translation": True},
                {"code": "fi-FI", "name": "Finnish (Finland)", "stt": True, "tts": True, "translation": True},
                {"code": "pl-PL", "name": "Polish (Poland)", "stt": True, "tts": True, "translation": True},
                {"code": "cs-CZ", "name": "Czech (Czech Republic)", "stt": True, "tts": True, "translation": True},
                {"code": "hu-HU", "name": "Hungarian (Hungary)", "stt": True, "tts": True, "translation": True},
                {"code": "ro-RO", "name": "Romanian (Romania)", "stt": True, "tts": True, "translation": True},
                {"code": "bg-BG", "name": "Bulgarian (Bulgaria)", "stt": True, "tts": True, "translation": True},
                {"code": "hr-HR", "name": "Croatian (Croatia)", "stt": True, "tts": True, "translation": True},
                {"code": "sk-SK", "name": "Slovak (Slovakia)", "stt": True, "tts": True, "translation": True},
                {"code": "sl-SI", "name": "Slovenian (Slovenia)", "stt": True, "tts": True, "translation": True},
                {"code": "et-EE", "name": "Estonian (Estonia)", "stt": True, "tts": True, "translation": True},
                {"code": "lv-LV", "name": "Latvian (Latvia)", "stt": True, "tts": True, "translation": True},
                {"code": "lt-LT", "name": "Lithuanian (Lithuania)", "stt": True, "tts": True, "translation": True},
                {"code": "mt-MT", "name": "Maltese (Malta)", "stt": True, "tts": True, "translation": True},
                {"code": "ga-IE", "name": "Irish (Ireland)", "stt": True, "tts": True, "translation": True},
                {"code": "cy-GB", "name": "Welsh (United Kingdom)", "stt": True, "tts": True, "translation": True},
                # Asian languages
                {"code": "ja-JP", "name": "Japanese (Japan)", "stt": True, "tts": True, "translation": True},
                {"code": "ko-KR", "name": "Korean (Korea)", "stt": True, "tts": True, "translation": True},
                {"code": "zh-CN", "name": "Chinese (Simplified)", "stt": True, "tts": True, "translation": True},
                {"code": "zh-TW", "name": "Chinese (Traditional)", "stt": True, "tts": True, "translation": True},
                {"code": "zh-HK", "name": "Chinese (Hong Kong)", "stt": True, "tts": True, "translation": True},
                {"code": "th-TH", "name": "Thai (Thailand)", "stt": True, "tts": True, "translation": True},
                {"code": "vi-VN", "name": "Vietnamese (Vietnam)", "stt": True, "tts": True, "translation": True},
                {"code": "id-ID", "name": "Indonesian (Indonesia)", "stt": True, "tts": True, "translation": True},
                {"code": "ms-MY", "name": "Malay (Malaysia)", "stt": True, "tts": True, "translation": True},
                {"code": "tl-PH", "name": "Filipino (Philippines)", "stt": True, "tts": True, "translation": True},
                # Middle Eastern and African languages
                {"code": "ar-SA", "name": "Arabic (Saudi Arabia)", "stt": True, "tts": True, "translation": True},
                {"code": "ar-EG", "name": "Arabic (Egypt)", "stt": True, "tts": True, "translation": True},
                {"code": "ar-AE", "name": "Arabic (UAE)", "stt": True, "tts": True, "translation": True},
                {"code": "ar-KW", "name": "Arabic (Kuwait)", "stt": True, "tts": True, "translation": True},
                {"code": "ar-QA", "name": "Arabic (Qatar)", "stt": True, "tts": True, "translation": True},
                {"code": "ar-BH", "name": "Arabic (Bahrain)", "stt": True, "tts": True, "translation": True},
                {"code": "ar-OM", "name": "Arabic (Oman)", "stt": True, "tts": True, "translation": True},
                {"code": "ar-JO", "name": "Arabic (Jordan)", "stt": True, "tts": True, "translation": True},
                {"code": "ar-LB", "name": "Arabic (Lebanon)", "stt": True, "tts": True, "translation": True},
                {"code": "ar-PS", "name": "Arabic (Palestine)", "stt": True, "tts": True, "translation": True},
                {"code": "ar-SY", "name": "Arabic (Syria)", "stt": True, "tts": True, "translation": True},
                {"code": "ar-IQ", "name": "Arabic (Iraq)", "stt": True, "tts": True, "translation": True},
                {"code": "ar-MA", "name": "Arabic (Morocco)", "stt": True, "tts": True, "translation": True},
                {"code": "ar-DZ", "name": "Arabic (Algeria)", "stt": True, "tts": True, "translation": True},
                {"code": "ar-TN", "name": "Arabic (Tunisia)", "stt": True, "tts": True, "translation": True},
                {"code": "ar-LY", "name": "Arabic (Libya)", "stt": True, "tts": True, "translation": True},
                {"code": "ar-SD", "name": "Arabic (Sudan)", "stt": True, "tts": True, "translation": True},
                {"code": "he-IL", "name": "Hebrew (Israel)", "stt": True, "tts": True, "translation": True},
                {"code": "tr-TR", "name": "Turkish (Turkey)", "stt": True, "tts": True, "translation": True},
                {"code": "fa-IR", "name": "Persian (Iran)", "stt": True, "tts": True, "translation": True},
                {"code": "ur-PK", "name": "Urdu (Pakistan)", "stt": True, "tts": True, "translation": True},
                {"code": "hi-IN", "name": "Hindi (India)", "stt": True, "tts": True, "translation": True},
                {"code": "bn-BD", "name": "Bengali (Bangladesh)", "stt": True, "tts": True, "translation": True},
                {"code": "ta-IN", "name": "Tamil (India)", "stt": True, "tts": True, "translation": True},
                {"code": "te-IN", "name": "Telugu (India)", "stt": True, "tts": True, "translation": True},
                {"code": "ml-IN", "name": "Malayalam (India)", "stt": True, "tts": True, "translation": True},
                {"code": "kn-IN", "name": "Kannada (India)", "stt": True, "tts": True, "translation": True},
                {"code": "gu-IN", "name": "Gujarati (India)", "stt": True, "tts": True, "translation": True},
                {"code": "pa-IN", "name": "Punjabi (India)", "stt": True, "tts": True, "translation": True},
                {"code": "mr-IN", "name": "Marathi (India)", "stt": True, "tts": True, "translation": True},
                {"code": "ne-NP", "name": "Nepali (Nepal)", "stt": True, "tts": True, "translation": True},
                {"code": "si-LK", "name": "Sinhala (Sri Lanka)", "stt": True, "tts": True, "translation": True},
                {"code": "my-MM", "name": "Burmese (Myanmar)", "stt": True, "tts": True, "translation": True},
                {"code": "km-KH", "name": "Khmer (Cambodia)", "stt": True, "tts": True, "translation": True},
                {"code": "lo-LA", "name": "Lao (Laos)", "stt": True, "tts": True, "translation": True},
                # African languages
                {"code": "sw-KE", "name": "Swahili (Kenya)", "stt": True, "tts": True, "translation": True},
                {"code": "sw-TZ", "name": "Swahili (Tanzania)", "stt": True, "tts": True, "translation": True},
                {"code": "am-ET", "name": "Amharic (Ethiopia)", "stt": True, "tts": True, "translation": True},
                {"code": "zu-ZA", "name": "Zulu (South Africa)", "stt": True, "tts": True, "translation": True},
                {"code": "af-ZA", "name": "Afrikaans (South Africa)", "stt": True, "tts": True, "translation": True},
                {"code": "yo-NG", "name": "Yoruba (Nigeria)", "stt": True, "tts": True, "translation": True},
                {"code": "ig-NG", "name": "Igbo (Nigeria)", "stt": True, "tts": True, "translation": True},
                {"code": "ha-NG", "name": "Hausa (Nigeria)", "stt": True, "tts": True, "translation": True},
                # Other languages
                {"code": "is-IS", "name": "Icelandic (Iceland)", "stt": True, "tts": True, "translation": True},
                {"code": "mk-MK", "name": "Macedonian (North Macedonia)", "stt": True, "tts": True, "translation": True},
                {"code": "sq-AL", "name": "Albanian (Albania)", "stt": True, "tts": True, "translation": True},
                {"code": "sr-RS", "name": "Serbian (Serbia)", "stt": True, "tts": True, "translation": True},
                {"code": "bs-BA", "name": "Bosnian (Bosnia and Herzegovina)", "stt": True, "tts": True, "translation": True},
                {"code": "me-ME", "name": "Montenegrin (Montenegro)", "stt": True, "tts": True, "translation": True},
                {"code": "uk-UA", "name": "Ukrainian (Ukraine)", "stt": True, "tts": True, "translation": True},
                {"code": "be-BY", "name": "Belarusian (Belarus)", "stt": True, "tts": True, "translation": True},
                {"code": "ka-GE", "name": "Georgian (Georgia)", "stt": True, "tts": True, "translation": True},
                {"code": "hy-AM", "name": "Armenian (Armenia)", "stt": True, "tts": True, "translation": True},
                {"code": "az-AZ", "name": "Azerbaijani (Azerbaijan)", "stt": True, "tts": True, "translation": True},
                {"code": "kk-KZ", "name": "Kazakh (Kazakhstan)", "stt": True, "tts": True, "translation": True},
                {"code": "ky-KG", "name": "Kyrgyz (Kyrgyzstan)", "stt": True, "tts": True, "translation": True},
                {"code": "uz-UZ", "name": "Uzbek (Uzbekistan)", "stt": True, "tts": True, "translation": True},
                {"code": "tg-TJ", "name": "Tajik (Tajikistan)", "stt": True, "tts": True, "translation": True},
                {"code": "mn-MN", "name": "Mongolian (Mongolia)", "stt": True, "tts": True, "translation": True}
            ]
            return languages
        except Exception as e:
            logger.error(f"Failed to get languages: {str(e)}")
            return []
    async def test_connection(self) -> bool:
        """Test Azure Speech Services connection."""
        try:
            # Test with a simple TTS request
            test_audio = await self.text_to_speech("Test", "en-US", "en-US-AriaNeural")
            return len(test_audio) > 0
        except Exception as e:
            logger.error(f"Connection test failed: {str(e)}")
            return False
    async def stream_speech_to_text(self, audio_stream: AsyncGenerator[bytes, None], language: str = "de-DE", format: str = "detailed", audio_format: str = "wav") -> AsyncGenerator[Dict, None]:
        """
        Stream speech to text using Azure Speech Services.
        Args:
            audio_stream: Async generator yielding audio chunks
            language: Language code (e.g., "de-DE")
            format: Response format ("simple" or "detailed")
            audio_format: Audio format ("wav", "mp3", "ogg")
        Yields:
            Dict with partial transcription results
        """
        try:
            # Check rate limit
            if not self._check_rate_limit("stt"):
                raise Exception("Rate limit exceeded for speech-to-text service. Please wait before making more requests.")
            url = f"{self.base_url}/speech/recognition/conversation/cognitiveservices/v1"
            # Get appropriate content type
            content_type = self._get_audio_content_type(audio_format)
            if audio_format == "wav":
                content_type = f"{content_type}; codecs=audio/pcm; samplerate=16000"
            headers = {
                "Content-Type": content_type,
                "Accept": "application/json",
                "Ocp-Apim-Subscription-Key": self.subscription_key,
                "Ocp-Apim-Subscription-Region": self.region
            }
            params = {
                "language": language,
                "format": "detailed" if format == "detailed" else "simple"
            }
            # Process audio stream in chunks
            async with aiohttp.ClientSession(timeout=self.timeout) as session:
                async for audio_chunk in audio_stream:
                    try:
                        # Validate chunk
                        if not audio_chunk:
                            continue
                        # Make API call for this chunk
                        async with session.post(
                            url, 
                            headers=headers, 
                            params=params, 
                            data=audio_chunk
                        ) as response:
                            if response.status == 200:
                                result = await response.json()
                                # Yield partial result
                                if format == "detailed":
                                    yield {
                                        "text": result.get("DisplayText", ""),
                                        "confidence": result.get("Confidence", 0.0),
                                        "language": result.get("RecognitionStatus", language),
                                        "format": format,
                                        "is_final": result.get("RecognitionStatus") == "Success",
                                        "raw_result": result
                                    }
                                else:
                                    yield {
                                        "text": result.get("DisplayText", ""),
                                        "confidence": 1.0,
                                        "language": language,
                                        "format": format,
                                        "is_final": result.get("RecognitionStatus") == "Success"
                                    }
                            else:
                                error_text = await response.text()
                                logger.error(f"Streaming STT API failed: {response.status} - {error_text}")
                                yield {
                                    "error": f"API error {response.status}: {error_text}",
                                    "is_final": True
                                }
                    except Exception as e:
                        logger.error(f"Error processing audio chunk: {str(e)}")
                        yield {
                            "error": str(e),
                            "is_final": True
                        }
        except Exception as e:
            logger.error(f"Streaming speech-to-text failed: {str(e)}")
            yield {
                "error": str(e),
                "is_final": True
            }
    async def stream_text_to_speech(self, text_stream: AsyncGenerator[str, None], language: str = "de-DE", voice: str = "de-DE-KatjaNeural", format: str = "audio-16khz-128kbitrate-mono-mp3") -> AsyncGenerator[bytes, None]:
        """
        Stream text to speech using Azure Speech Services.
        Args:
            text_stream: Async generator yielding text chunks
            language: Language code
            voice: Voice name
            format: Audio format
        Yields:
            Audio data chunks as bytes
        """
        try:
            # Check rate limit
            if not self._check_rate_limit("tts"):
                raise Exception("Rate limit exceeded for text-to-speech service. Please wait before making more requests.")
            # Validate format
            if format not in self.supported_tts_formats:
                raise Exception(f"Unsupported TTS format: {format}. Supported formats: {', '.join(self.supported_tts_formats)}")
            url = f"https://{self.region}.tts.speech.microsoft.com/cognitiveservices/v1"
            headers = {
                "Content-Type": "application/ssml+xml",
                "X-Microsoft-OutputFormat": format,
                "Ocp-Apim-Subscription-Key": self.subscription_key,
                "User-Agent": "PowerOn-Voice-Services/1.0"
            }
            # Process text stream in chunks
            async with aiohttp.ClientSession(timeout=self.timeout) as session:
                async for text_chunk in text_stream:
                    try:
                        if not text_chunk.strip():
                            continue
                        # Create SSML for this chunk
                        escaped_text = text_chunk.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;").replace('"', "&quot;").replace("'", "&apos;")
                        ssml = f"""<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xml:lang='{language}'>
                            <voice name='{voice}'>
                                {escaped_text}
                            </voice>
                        </speak>"""
                        # Make API call for this chunk
                        async with session.post(
                            url, 
                            headers=headers, 
                            data=ssml.encode('utf-8')
                        ) as response:
                            if response.status == 200:
                                audio_data = await response.read()
                                if audio_data:
                                    yield audio_data
                            else:
                                error_text = await response.text()
                                logger.error(f"Streaming TTS API failed: {response.status} - {error_text}")
                    except Exception as e:
                        logger.error(f"Error processing text chunk: {str(e)}")
        except Exception as e:
            logger.error(f"Streaming text-to-speech failed: {str(e)}")
    async def stream_realtime_interpreter(self, audio_stream: AsyncGenerator[bytes, None], from_language: str = "de-DE", to_language: str = "en-US") -> AsyncGenerator[Dict, None]:
        """
        Stream real-time interpreter: speech to translated text.
        Args:
            audio_stream: Async generator yielding audio chunks
            from_language: Source language code
            to_language: Target language code
        Yields:
            Dict with translation results
        """
        try:
            # Check rate limits
            if not self._check_rate_limit("stt") or not self._check_rate_limit("translation"):
                raise Exception("Rate limit exceeded for interpreter service. Please wait before making more requests.")
            # Process audio stream
            async for stt_result in self.stream_speech_to_text(audio_stream, from_language):
                if "error" in stt_result:
                    yield stt_result
                    continue
                original_text = stt_result.get("text", "")
                # Translate text if different languages
                translated_text = original_text
                if from_language != to_language and original_text.strip():
                    try:
                        translated_text = await self.translate_text(
                            text=original_text,
                            from_language=from_language,
                            to_language=to_language
                        )
                    except Exception as e:
                        logger.warning(f"Translation failed: {str(e)}")
                        translated_text = original_text
                yield {
                    "original_text": original_text,
                    "translated_text": translated_text,
                    "from_language": from_language,
                    "to_language": to_language,
                    "confidence": stt_result.get("confidence", 0.0),
                    "is_final": stt_result.get("is_final", False)
                }
        except Exception as e:
            logger.error(f"Streaming realtime interpreter failed: {str(e)}")
            yield {
                "error": str(e),
                "is_final": True
            }
--- a/modules/connectors/connectorGoogleSpeech.py
+++ b/modules/connectors/connectorGoogleSpeech.py
@ -0,0 +1,285 @@
 """
 Google Cloud Speech-to-Text and Translation Connector
 Replaces Azure Speech Services with Google Cloud APIs
 """
 import os
 import io
 import logging
 import asyncio
 from typing import Dict, Optional, Any
 from google.cloud import speech
 from google.cloud import translate_v2 as translate
 logger = logging.getLogger(__name__)
 class ConnectorGoogleSpeech:
    """
    Google Cloud Speech-to-Text and Translation connector.
    Handles audio processing, speech recognition, and translation.
    """
    def __init__(self, credentials_path: Optional[str] = None):
        """
        Initialize Google Cloud Speech and Translation clients.
        Args:
            credentials_path: Path to Google Cloud service account JSON file
        """
        try:
            # Set up authentication
            if credentials_path:
                os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = credentials_path
            # Initialize clients
            self.speech_client = speech.SpeechClient()
            self.translate_client = translate.Client()
            logger.info("✅ Google Cloud Speech and Translation clients initialized successfully")
        except Exception as e:
            logger.error(f"❌ Failed to initialize Google Cloud clients: {e}")
            raise
    async def speech_to_text(self, audio_content: bytes, language: str = "de-DE", 
                           sample_rate: int = 16000, channels: int = 1) -> Dict:
        """
        Convert speech to text using Google Cloud Speech-to-Text API.
        Args:
            audio_content: Raw audio data (PCM format)
            language: Language code (e.g., 'de-DE', 'en-US')
            sample_rate: Audio sample rate (default: 16000 Hz)
            channels: Number of audio channels (default: 1)
        Returns:
            Dict containing transcribed text, confidence, and metadata
        """
        try:
            logger.info(f"🎤 Processing audio with Google Cloud Speech-to-Text")
            logger.info(f"📊 Audio: {len(audio_content)} bytes, {sample_rate}Hz, {channels}ch")
            # Configure audio settings
            audio = speech.RecognitionAudio(content=audio_content)
            config = speech.RecognitionConfig(
                encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
                sample_rate_hertz=sample_rate,
                audio_channel_count=channels,
                language_code=language,
                enable_automatic_punctuation=True,
                model="latest_long"  # Use the latest model
            )
            # Perform speech recognition
            logger.info("🔄 Sending audio to Google Cloud Speech-to-Text...")
            response = self.speech_client.recognize(config=config, audio=audio)
            # Process results
            if response.results:
                result = response.results[0]
                if result.alternatives:
                    alternative = result.alternatives[0]
                    transcribed_text = alternative.transcript
                    confidence = alternative.confidence
                    logger.info(f"✅ Transcription successful: '{transcribed_text}' (confidence: {confidence:.2f})")
                    return {
                        "success": True,
                        "text": transcribed_text,
                        "confidence": confidence,
                        "language": language,
                        "raw_result": {
                            "transcript": transcribed_text,
                            "confidence": confidence,
                            "language_code": language
                        }
                    }
                else:
                    logger.warning("⚠️ No transcription alternatives found")
                    return {
                        "success": False,
                        "text": "",
                        "confidence": 0.0,
                        "error": "No transcription alternatives found"
                    }
            else:
                logger.warning("⚠️ No recognition results from Google Cloud")
                return {
                    "success": False,
                    "text": "",
                    "confidence": 0.0,
                    "error": "No recognition results"
                }
        except Exception as e:
            logger.error(f"❌ Google Cloud Speech-to-Text error: {e}")
            return {
                "success": False,
                "text": "",
                "confidence": 0.0,
                "error": str(e)
            }
    async def translate_text(self, text: str, target_language: str = "en", 
                           source_language: str = "de") -> Dict:
        """
        Translate text using Google Cloud Translation API.
        Args:
            text: Text to translate
            target_language: Target language code (e.g., 'en', 'de')
            source_language: Source language code (e.g., 'de', 'en')
        Returns:
            Dict containing translated text and metadata
        """
        try:
            if not text.strip():
                logger.warning("⚠️ Empty text provided for translation")
                return {
                    "success": False,
                    "translated_text": "",
                    "error": "Empty text provided"
                }
            logger.info(f"🌐 Translating: '{text}' ({source_language} -> {target_language})")
            # Perform translation
            result = self.translate_client.translate(
                text,
                source_language=source_language,
                target_language=target_language
            )
            translated_text = result['translatedText']
            detected_language = result.get('detectedSourceLanguage', source_language)
            logger.info(f"✅ Translation successful: '{translated_text}'")
            return {
                "success": True,
                "translated_text": translated_text,
                "source_language": detected_language,
                "target_language": target_language,
                "original_text": text
            }
        except Exception as e:
            logger.error(f"❌ Google Cloud Translation error: {e}")
            return {
                "success": False,
                "translated_text": "",
                "error": str(e)
            }
    async def speech_to_translated_text(self, audio_content: bytes, 
                                     from_language: str = "de-DE", 
                                     to_language: str = "en") -> Dict:
        """
        Complete pipeline: Speech-to-Text + Translation.
        Args:
            audio_content: Raw audio data
            from_language: Source language for speech recognition
            to_language: Target language for translation
        Returns:
            Dict containing original text, translated text, and metadata
        """
        try:
            logger.info(f"🔄 Starting speech-to-translation pipeline: {from_language} -> {to_language}")
            # Step 1: Speech-to-Text
            speech_result = await self.speech_to_text(
                audio_content=audio_content,
                language=from_language
            )
            if not speech_result["success"]:
                return {
                    "success": False,
                    "original_text": "",
                    "translated_text": "",
                    "error": f"Speech recognition failed: {speech_result.get('error', 'Unknown error')}"
                }
            original_text = speech_result["text"]
            # Step 2: Translation
            translation_result = await self.translate_text(
                text=original_text,
                source_language=from_language.split('-')[0],  # Convert 'de-DE' to 'de'
                target_language=to_language.split('-')[0]     # Convert 'en-US' to 'en'
            )
            if not translation_result["success"]:
                return {
                    "success": False,
                    "original_text": original_text,
                    "translated_text": "",
                    "error": f"Translation failed: {translation_result.get('error', 'Unknown error')}"
                }
            translated_text = translation_result["translated_text"]
            logger.info(f"✅ Complete pipeline successful:")
            logger.info(f"   Original: '{original_text}'")
            logger.info(f"   Translated: '{translated_text}'")
            return {
                "success": True,
                "original_text": original_text,
                "translated_text": translated_text,
                "confidence": speech_result["confidence"],
                "source_language": from_language,
                "target_language": to_language
            }
        except Exception as e:
            logger.error(f"❌ Speech-to-translation pipeline error: {e}")
            return {
                "success": False,
                "original_text": "",
                "translated_text": "",
                "error": str(e)
            }
    def validate_audio_format(self, audio_content: bytes) -> Dict:
        """
        Validate audio format for Google Cloud Speech-to-Text.
        Args:
            audio_content: Raw audio data
        Returns:
            Dict containing validation results
        """
        try:
            # Google Cloud Speech-to-Text supports various formats
            # We'll do basic validation
            if len(audio_content) < 100:
                return {
                    "valid": False,
                    "error": "Audio too short (less than 100 bytes)"
                }
            # Check if it looks like PCM audio (basic check)
            if len(audio_content) % 2 != 0:
                return {
                    "valid": False,
                    "error": "Audio data length is odd (not 16-bit PCM)"
                }
            return {
                "valid": True,
                "format": "pcm",
                "size": len(audio_content),
                "estimated_duration": len(audio_content) / (16000 * 2)  # Rough estimate for 16kHz, 16-bit
            }
        except Exception as e:
            return {
                "valid": False,
                "error": f"Validation error: {e}"
            }
--- a/modules/routes/routeVoiceAzure.py
+++ b/modules/routes/routeVoiceAzure.py
@ -1,813 +0,0 @@
 """
 Azure Voice Services Route
 Provides endpoints for Azure Speech Services integration including:
 - Speech-to-Text (STT)
 - Text-to-Speech (TTS) 
 - Translation services
 - Real-time conversation
 """
 import logging
 import asyncio
 import json
 from typing import Dict, Any, Optional
 from fastapi import APIRouter, HTTPException, Depends, UploadFile, File, Form
 from fastapi.responses import StreamingResponse
 from pydantic import BaseModel
 import io
 import base64
 import asyncio
 from typing import AsyncGenerator
 from datetime import datetime
 from modules.interfaces.interfaceAppObjects import getRootInterface
 from modules.interfaces.interfaceAppModel import UserInDB
 from modules.security.auth import getCurrentUser
 from modules.connectors.connectorAzureSpeech import ConnectorAzureSpeech
 logger = logging.getLogger(__name__)
 router = APIRouter(prefix="/api/voice", tags=["voice"])
 # Pydantic models for request/response
 class SpeechToTextRequest(BaseModel):
    language: str = "de-DE"
    format: str = "detailed"  # "simple" or "detailed"
 class TextToSpeechRequest(BaseModel):
    text: str
    language: str = "de-DE"
    voice: str = "de-DE-KatjaNeural"
    format: str = "audio-16khz-128kbitrate-mono-mp3"
 class TranslationRequest(BaseModel):
    text: str
    from_language: str = "de-DE"
    to_language: str = "en-US"
 class ConversationRequest(BaseModel):
    message: str
    language: str = "de-DE"
    response_voice: str = "de-DE-KatjaNeural"
 class VoiceSettingsRequest(BaseModel):
    stt_language: str = "de-DE"
    tts_language: str = "de-DE"
    tts_voice: str = "de-DE-KatjaNeural"
    translation_enabled: bool = True
    target_language: str = "en-US"
 # Get Azure Speech connector for current user
 async def get_azure_speech_connector(current_user: UserInDB = Depends(getCurrentUser), connection_id: Optional[str] = None) -> ConnectorAzureSpeech:
    """Get Azure Speech connector for the current user."""
    try:
        root_interface = getRootInterface()
        # Get user connections
        user_connections = root_interface.getUserConnections(current_user.id)
        azure_connection = None
        if connection_id:
            # Find specific connection by ID
            for connection in user_connections:
                if connection.id == connection_id and connection.authority == "msft":
                    azure_connection = connection
                    break
        else:
            # Find first Azure connection
            for connection in user_connections:
                if connection.authority == "msft":
                    azure_connection = connection
                    break
        if not azure_connection:
            if connection_id:
                raise HTTPException(
                    status_code=400, 
                    detail=f"Azure connection with ID '{connection_id}' not found."
                )
            else:
                raise HTTPException(
                    status_code=400, 
                    detail="No Azure connection found. Please connect your Microsoft account first."
                )
        # Get connection token
        connection_token = root_interface.getConnectionToken(azure_connection.id)
        if not connection_token:
            raise HTTPException(
                status_code=400,
                detail="Azure connection token not found. Please reconnect your Microsoft account."
            )
        # For Azure Speech Services, we need a subscription key, not an access token
        # Use Azure Speech Services subscription key from configuration
        from modules.shared.configuration import APP_CONFIG
        subscription_key = APP_CONFIG.get("Connector_AzureSpeech_SUBSCRIPTION_KEY")
        # Debug: Log subscription key (masked for security)
        logger.info(f"Loaded subscription key: {subscription_key[:8]}...{subscription_key[-8:] if subscription_key and len(subscription_key) > 16 else 'INVALID'}")
        logger.info(f"Key length: {len(subscription_key) if subscription_key else 'None'}")
        if not subscription_key or subscription_key == "your-azure-speech-subscription-key-here":
            raise HTTPException(
                status_code=500, 
                detail="Azure Speech Services subscription key not configured. Please set Connector_AzureSpeech_SUBSCRIPTION_KEY in config.ini"
            )
        # Get region from configuration
        region = APP_CONFIG.get("Connector_AzureSpeech_REGION", "westeurope")
        # Create Azure Speech connector
        connector = ConnectorAzureSpeech(subscription_key=subscription_key, region=region)
        return connector
    except Exception as e:
        logger.error(f"Error getting Azure Speech connector: {str(e)}")
        raise HTTPException(status_code=500, detail=f"Failed to initialize Azure Speech connector: {str(e)}")
@router.get("/connections")
 async def get_user_connections(current_user: UserInDB = Depends(getCurrentUser)):
    """Get all Microsoft connections for the current user."""
    try:
        root_interface = getRootInterface()
        # Get user connections
        user_connections = root_interface.getUserConnections(current_user.id)
        # Filter for Microsoft connections only
        msft_connections = []
        for connection in user_connections:
            if connection.authority == "msft":
                # Check if this connection has speech services subscription key
                has_speech_key = False
                if hasattr(connection, 'metadata') and connection.metadata:
                    try:
                        metadata = json.loads(connection.metadata) if isinstance(connection.metadata, str) else connection.metadata
                        has_speech_key = bool(metadata.get('speech_subscription_key') or metadata.get('service_type') == 'speech_services')
                    except (json.JSONDecodeError, AttributeError):
                        pass
                msft_connections.append({
                    "id": connection.id,
                    "externalUsername": connection.externalUsername,
                    "authority": connection.authority,
                    "isActive": connection.status == "active",
                    "hasSpeechKey": has_speech_key,
                    "connectedAt": connection.connectedAt,
                    "lastChecked": connection.lastChecked
                })
        return {
            "success": True,
            "connections": msft_connections,
            "count": len(msft_connections)
        }
    except Exception as e:
        logger.error(f"Error getting user connections: {str(e)}")
        raise HTTPException(status_code=500, detail=str(e))
 class SpeechSubscriptionRequest(BaseModel):
    subscription_key: str
    region: str = "westeurope"
    connection_id: Optional[str] = None
@router.post("/subscription")
 async def set_speech_subscription(
    request: SpeechSubscriptionRequest,
    current_user: UserInDB = Depends(getCurrentUser)
 ):
    """Set Azure Speech Services subscription key for the user."""
    try:
        root_interface = getRootInterface()
        # Validate subscription key format (basic validation)
        if not request.subscription_key or len(request.subscription_key) < 32:
            raise HTTPException(
                status_code=400,
                detail="Invalid subscription key format. Please provide a valid Azure Speech Services subscription key."
            )
        # Validate region
        valid_regions = ["westeurope", "eastus", "westus2", "eastus2", "southeastasia", "westcentralus", "eastasia", "northeurope", "southcentralus", "centralus", "australiaeast", "brazilsouth", "canadacentral", "centralindia", "francecentral", "germanywestcentral", "japaneast", "koreacentral", "norwayeast", "southafricanorth", "switzerlandnorth", "uaenorth", "uksouth", "westus3"]
        if request.region not in valid_regions:
            raise HTTPException(
                status_code=400,
                detail=f"Invalid region. Supported regions: {', '.join(valid_regions)}"
            )
        # Test the subscription key by creating a temporary connector
        try:
            test_connector = ConnectorAzureSpeech(subscription_key=request.subscription_key, region=request.region)
            # Test with a simple TTS request
            test_audio = await test_connector.text_to_speech("Test", "en-US", "en-US-AriaNeural")
            if len(test_audio) == 0:
                raise Exception("Subscription key test failed")
        except Exception as e:
            raise HTTPException(
                status_code=400,
                detail=f"Invalid subscription key or region. Test failed: {str(e)}"
            )
        # Get user connections
        user_connections = root_interface.getUserConnections(current_user.id)
        # Find the target connection
        target_connection = None
        if request.connection_id:
            # Use specific connection
            for connection in user_connections:
                if connection.id == request.connection_id and connection.authority == "msft":
                    target_connection = connection
                    break
            if not target_connection:
                raise HTTPException(
                    status_code=400,
                    detail=f"Connection with ID '{request.connection_id}' not found."
                )
        else:
            # Use first Microsoft connection or create a new one
            target_connection = None
            for connection in user_connections:
                if connection.authority == "msft":
                    target_connection = connection
                    break
            if not target_connection:
                # Create a new connection for speech services
                # This would require implementing connection creation in the interface
                raise HTTPException(
                    status_code=400,
                    detail="No Microsoft connection found. Please connect your Microsoft account first."
                )
        # Update connection metadata with speech subscription key
        metadata = {}
        if hasattr(target_connection, 'metadata') and target_connection.metadata:
            try:
                metadata = json.loads(target_connection.metadata) if isinstance(target_connection.metadata, str) else target_connection.metadata
            except (json.JSONDecodeError, AttributeError):
                metadata = {}
        # Add speech services information
        metadata['speech_subscription_key'] = request.subscription_key
        metadata['speech_region'] = request.region
        metadata['speech_configured_at'] = datetime.now().isoformat()
        # Update the connection (this would require implementing update in the interface)
        # For now, we'll just return success - the actual update would need to be implemented
        # in the database interface
        return {
            "success": True,
            "message": "Azure Speech Services subscription key configured successfully",
            "connection_id": target_connection.id,
            "region": request.region,
            "configured_at": metadata['speech_configured_at']
        }
    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Error setting speech subscription: {str(e)}")
        raise HTTPException(status_code=500, detail=str(e))
@router.get("/subscription")
 async def get_speech_subscription(
    current_user: UserInDB = Depends(getCurrentUser)
 ):
    """Get Azure Speech Services subscription information for the user."""
    try:
        # Use Azure Speech Services subscription key from configuration
        from modules.shared.configuration import APP_CONFIG
        subscription_key = APP_CONFIG.get("Connector_AzureSpeech_SUBSCRIPTION_KEY")
        region = APP_CONFIG.get("Connector_AzureSpeech_REGION", "westeurope")
        if not subscription_key or subscription_key == "your-azure-speech-subscription-key-here":
            return {
                "success": True,
                "has_subscription": False,
                "message": "Azure Speech Services subscription key not configured. Please set Connector_AzureSpeech_SUBSCRIPTION_KEY in config.ini"
            }
        # Test the connection
        try:
            connector = ConnectorAzureSpeech(subscription_key=subscription_key, region=region)
            test_result = await connector.test_connection()
            return {
                "success": True,
                "has_subscription": True,
                "subscription": {
                    "subscription_key": subscription_key[:8] + "..." + subscription_key[-8:],
                    "region": region,
                    "connection_test": test_result,
                    "source": "config.ini"
                }
            }
        except Exception as test_error:
            return {
                "success": True,
                "has_subscription": True,
                "subscription": {
                    "subscription_key": subscription_key[:8] + "..." + subscription_key[-8:],
                    "region": region,
                    "connection_test": False,
                    "error": str(test_error),
                    "source": "config.ini"
                }
            }
    except Exception as e:
        logger.error(f"Error getting speech subscription: {str(e)}")
        raise HTTPException(status_code=500, detail=str(e))
@router.get("/settings")
 async def get_voice_settings(
    current_user: UserInDB = Depends(getCurrentUser),
    connection_id: Optional[str] = None
 ):
    """Get available voice settings and languages."""
    try:
        connector = await get_azure_speech_connector(current_user, connection_id)
        # Get available voices and languages
        voices = await connector.get_available_voices()
        languages = await connector.get_available_languages()
        # Get user's saved settings from database
        from modules.interfaces.interfaceComponentObjects import getInterface
        component_interface = getInterface(current_user)
        user_settings = component_interface.getVoiceSettings(current_user.id)
        # If no settings exist, use defaults
        if not user_settings:
            user_settings = {
                "sttLanguage": "de-DE",
                "ttsLanguage": "de-DE",
                "ttsVoice": "de-DE-KatjaNeural",
                "translationEnabled": True,
                "targetLanguage": "en-US"
            }
        return {
            "voices": voices,
            "languages": languages,
            "user_settings": {
                "sttLanguage": user_settings.sttLanguage if hasattr(user_settings, 'sttLanguage') else user_settings.get("sttLanguage"),
                "ttsLanguage": user_settings.ttsLanguage if hasattr(user_settings, 'ttsLanguage') else user_settings.get("ttsLanguage"),
                "ttsVoice": user_settings.ttsVoice if hasattr(user_settings, 'ttsVoice') else user_settings.get("ttsVoice"),
                "translationEnabled": user_settings.translationEnabled if hasattr(user_settings, 'translationEnabled') else user_settings.get("translationEnabled"),
                "targetLanguage": user_settings.targetLanguage if hasattr(user_settings, 'targetLanguage') else user_settings.get("targetLanguage")
            },
            "default_settings": {
                "sttLanguage": "de-DE",
                "ttsLanguage": "de-DE", 
                "ttsVoice": "de-DE-KatjaNeural",
                "translationEnabled": True,
                "targetLanguage": "en-US"
            }
        }
    except Exception as e:
        logger.error(f"Error getting voice settings: {str(e)}")
        raise HTTPException(status_code=500, detail=str(e))
@router.post("/settings")
 async def save_voice_settings(
    settings: dict,
    current_user: UserInDB = Depends(getCurrentUser)
 ):
    """Save voice settings for the current user."""
    try:
        from modules.interfaces.interfaceComponentObjects import getInterface
        component_interface = getInterface(current_user)
        # Check if settings exist, if not create them
        existing_settings = component_interface.getVoiceSettings(current_user.id)
        if not existing_settings:
            # Create new settings
            settings["userId"] = current_user.id
            settings["mandateId"] = current_user.mandateId
            created_settings = component_interface.createVoiceSettings(settings)
            return {
                "success": True,
                "message": "Voice settings created successfully",
                "settings": created_settings
            }
        else:
            # Update existing settings
            updated_settings = component_interface.updateVoiceSettings(
                current_user.id, 
                settings
            )
            return {
                "success": True,
                "message": "Voice settings saved successfully",
                "settings": updated_settings
            }
    except Exception as e:
        logger.error(f"Error saving voice settings: {str(e)}")
        raise HTTPException(status_code=500, detail=str(e))
@router.post("/speech-to-text")
 async def speech_to_text(
    audio_file: UploadFile = File(...),
    language: str = Form("de-DE"),
    format: str = Form("detailed"),
    connection_id: str = Form(None),
    current_user: UserInDB = Depends(getCurrentUser)
 ):
    """Convert speech to text using Azure Speech Services."""
    try:
        connector = await get_azure_speech_connector(current_user, connection_id)
        # Read audio file
        audio_content = await audio_file.read()
        # Convert speech to text
        result = await connector.speech_to_text(
            audio_content=audio_content,
            language=language,
            format=format
        )
        return {
            "success": True,
            "text": result.get("text", ""),
            "confidence": result.get("confidence", 0.0),
            "language": result.get("language", language),
            "format": format
        }
    except Exception as e:
        logger.error(f"Error in speech-to-text: {str(e)}")
        raise HTTPException(status_code=500, detail=str(e))
@router.post("/text-to-speech")
 async def text_to_speech(
    request: TextToSpeechRequest,
    current_user: UserInDB = Depends(getCurrentUser),
    connection_id: Optional[str] = None
 ):
    """Convert text to speech using Azure Speech Services."""
    try:
        connector = await get_azure_speech_connector(current_user, connection_id)
        # Convert text to speech
        audio_data = await connector.text_to_speech(
            text=request.text,
            language=request.language,
            voice=request.voice,
            format=request.format
        )
        # Return audio as base64 encoded string
        audio_base64 = base64.b64encode(audio_data).decode('utf-8')
        return {
            "success": True,
            "audio_data": audio_base64,
            "format": request.format,
            "voice": request.voice,
            "language": request.language
        }
    except Exception as e:
        logger.error(f"Error in text-to-speech: {str(e)}")
        raise HTTPException(status_code=500, detail=str(e))
@router.post("/translate")
 async def translate_text(
    request: TranslationRequest,
    current_user: UserInDB = Depends(getCurrentUser),
    connection_id: Optional[str] = None
 ):
    """Translate text using Azure Translator."""
    try:
        connector = await get_azure_speech_connector(current_user, connection_id)
        # Translate text
        translated_text = await connector.translate_text(
            text=request.text,
            from_language=request.from_language,
            to_language=request.to_language
        )
        return {
            "success": True,
            "original_text": request.text,
            "translated_text": translated_text,
            "from_language": request.from_language,
            "to_language": request.to_language
        }
    except Exception as e:
        logger.error(f"Error in translation: {str(e)}")
        raise HTTPException(status_code=500, detail=str(e))
@router.post("/conversation")
 async def conversation(
    request: ConversationRequest,
    current_user: UserInDB = Depends(getCurrentUser),
    connection_id: Optional[str] = None
 ):
    """Handle conversation with voice response."""
    try:
        connector = await get_azure_speech_connector(current_user, connection_id)
        # Simple AI response logic - in production, integrate with OpenAI, Azure OpenAI, or similar
        response_text = await _generate_ai_response(request.message, request.language)
        # Convert response to speech
        audio_data = await connector.text_to_speech(
            text=response_text,
            language=request.language,
            voice=request.response_voice,
            format="audio-16khz-128kbitrate-mono-mp3"
        )
        # Return both text and audio
        audio_base64 = base64.b64encode(audio_data).decode('utf-8')
        return {
            "success": True,
            "response_text": response_text,
            "audio_data": audio_base64,
            "voice": request.response_voice,
            "language": request.language
        }
    except Exception as e:
        logger.error(f"Error in conversation: {str(e)}")
        raise HTTPException(status_code=500, detail=str(e))
 async def _generate_ai_response(message: str, language: str) -> str:
    """Generate AI response for conversation."""
    try:
        # Simple rule-based responses for demonstration
        # In production, this would call an AI service like OpenAI or Azure OpenAI
        message_lower = message.lower()
        # German responses
        if language.startswith("de"):
            if any(word in message_lower for word in ["hallo", "hi", "hey"]):
                return "Hallo! Wie kann ich Ihnen heute helfen?"
            elif any(word in message_lower for word in ["wie geht", "wie gehts"]):
                return "Mir geht es gut, danke der Nachfrage! Wie geht es Ihnen?"
            elif any(word in message_lower for word in ["danke", "dankeschön"]):
                return "Gern geschehen! Gibt es noch etwas, womit ich helfen kann?"
            elif any(word in message_lower for word in ["zeit", "uhr"]):
                from datetime import datetime
                current_time = datetime.now().strftime("%H:%M")
                return f"Es ist jetzt {current_time} Uhr."
            elif any(word in message_lower for word in ["wetter", "temperatur"]):
                return "Ich kann leider keine aktuellen Wetterdaten abrufen. Bitte schauen Sie in eine Wetter-App."
            else:
                return f"Das ist interessant: '{message}'. Können Sie mir mehr darüber erzählen?"
        # English responses
        elif language.startswith("en"):
            if any(word in message_lower for word in ["hello", "hi", "hey"]):
                return "Hello! How can I help you today?"
            elif any(word in message_lower for word in ["how are you", "how's it going"]):
                return "I'm doing well, thank you for asking! How are you?"
            elif any(word in message_lower for word in ["thank you", "thanks"]):
                return "You're welcome! Is there anything else I can help you with?"
            elif any(word in message_lower for word in ["time", "clock"]):
                from datetime import datetime
                current_time = datetime.now().strftime("%H:%M")
                return f"It's currently {current_time}."
            elif any(word in message_lower for word in ["weather", "temperature"]):
                return "I'm sorry, I can't retrieve current weather data. Please check a weather app."
            else:
                return f"That's interesting: '{message}'. Can you tell me more about that?"
        # Default response
        else:
            return f"I heard: '{message}'. How can I help you with that?"
    except Exception as e:
        logger.error(f"Error generating AI response: {str(e)}")
        return "I'm sorry, I didn't understand that. Could you please repeat?"
@router.post("/realtime-interpreter")
 async def realtime_interpreter(
    audio_file: UploadFile = File(...),
    from_language: str = Form("de-DE"),
    to_language: str = Form("en-US"),
    connection_id: str = Form(None),
    current_user: UserInDB = Depends(getCurrentUser)
 ):
    """Real-time interpreter: speech to translated text."""
    try:
        logger.info(f"Realtime interpreter called with from_language='{from_language}', to_language='{to_language}'")
        connector = await get_azure_speech_connector(current_user, connection_id)
        # Read audio file
        audio_content = await audio_file.read()
        # Convert speech to text
        stt_result = await connector.speech_to_text(
            audio_content=audio_content,
            language=from_language,
            format="detailed"
        )
        original_text = stt_result.get("text", "")
        # Translate text if different languages
        translated_text = original_text
        if from_language != to_language:
            try:
                translated_text = await connector.translate_text(
                    text=original_text,
                    from_language=from_language,
                    to_language=to_language
                )
            except Exception as e:
                logger.warning(f"Translation failed, using original text: {str(e)}")
                translated_text = original_text
        return {
            "success": True,
            "original_text": original_text,
            "translated_text": translated_text,
            "from_language": from_language,
            "to_language": to_language,
            "confidence": stt_result.get("confidence", 0.0)
        }
    except Exception as e:
        logger.error(f"Error in realtime interpreter: {str(e)}")
        raise HTTPException(status_code=500, detail=str(e))
@router.post("/stream/speech-to-text")
 async def stream_speech_to_text(
    audio_file: UploadFile = File(...),
    language: str = Form("de-DE"),
    format: str = Form("detailed"),
    audio_format: str = Form("wav"),
    connection_id: str = Form(None),
    current_user: UserInDB = Depends(getCurrentUser)
 ):
    """Stream speech to text using Azure Speech Services."""
    try:
        connector = await get_azure_speech_connector(current_user, connection_id)
        async def audio_chunk_generator():
            """Generate audio chunks from uploaded file."""
            chunk_size = 4096  # 4KB chunks
            while True:
                chunk = await audio_file.read(chunk_size)
                if not chunk:
                    break
                yield chunk
        async def response_generator():
            """Generate streaming responses."""
            try:
                async for result in connector.stream_speech_to_text(
                    audio_chunk_generator(),
                    language=language,
                    format=format,
                    audio_format=audio_format
                ):
                    yield f"data: {json.dumps(result)}\n\n"
            except Exception as e:
                error_result = {"error": str(e), "is_final": True}
                yield f"data: {json.dumps(error_result)}\n\n"
        return StreamingResponse(
            response_generator(),
            media_type="text/plain",
            headers={
                "Cache-Control": "no-cache",
                "Connection": "keep-alive",
                "Content-Type": "text/event-stream"
            }
        )
    except Exception as e:
        logger.error(f"Error in streaming speech-to-text: {str(e)}")
        raise HTTPException(status_code=500, detail=str(e))
@router.post("/stream/text-to-speech")
 async def stream_text_to_speech(
    request: TextToSpeechRequest,
    current_user: UserInDB = Depends(getCurrentUser),
    connection_id: Optional[str] = None
 ):
    """Stream text to speech using Azure Speech Services."""
    try:
        connector = await get_azure_speech_connector(current_user, connection_id)
        async def text_chunk_generator():
            """Generate text chunks from the request."""
            # Split text into sentences for better streaming
            sentences = request.text.split('. ')
            for sentence in sentences:
                if sentence.strip():
                    yield sentence.strip() + '. '
        async def audio_generator():
            """Generate streaming audio data."""
            try:
                async for audio_chunk in connector.stream_text_to_speech(
                    text_chunk_generator(),
                    language=request.language,
                    voice=request.voice,
                    format=request.format
                ):
                    yield audio_chunk
            except Exception as e:
                logger.error(f"Error in audio generation: {str(e)}")
                # Return error as audio (could be a beep or silence)
                yield b""
        return StreamingResponse(
            audio_generator(),
            media_type="audio/mpeg",
            headers={
                "Cache-Control": "no-cache",
                "Connection": "keep-alive",
                "Content-Type": "audio/mpeg"
            }
        )
    except Exception as e:
        logger.error(f"Error in streaming text-to-speech: {str(e)}")
        raise HTTPException(status_code=500, detail=str(e))
@router.post("/stream/realtime-interpreter")
 async def stream_realtime_interpreter(
    audio_file: UploadFile = File(...),
    from_language: str = Form("de-DE"),
    to_language: str = Form("en-US"),
    connection_id: str = Form(None),
    current_user: UserInDB = Depends(getCurrentUser)
 ):
    """Stream real-time interpreter: speech to translated text."""
    try:
        connector = await get_azure_speech_connector(current_user, connection_id)
        async def audio_chunk_generator():
            """Generate audio chunks from uploaded file."""
            chunk_size = 4096  # 4KB chunks
            while True:
                chunk = await audio_file.read(chunk_size)
                if not chunk:
                    break
                yield chunk
        async def response_generator():
            """Generate streaming translation responses."""
            try:
                async for result in connector.stream_realtime_interpreter(
                    audio_chunk_generator(),
                    from_language=from_language,
                    to_language=to_language
                ):
                    yield f"data: {json.dumps(result)}\n\n"
            except Exception as e:
                error_result = {"error": str(e), "is_final": True}
                yield f"data: {json.dumps(error_result)}\n\n"
        return StreamingResponse(
            response_generator(),
            media_type="text/plain",
            headers={
                "Cache-Control": "no-cache",
                "Connection": "keep-alive",
                "Content-Type": "text/event-stream"
            }
        )
    except Exception as e:
        logger.error(f"Error in streaming realtime interpreter: {str(e)}")
        raise HTTPException(status_code=500, detail=str(e))
@router.get("/health")
 async def health_check():
    """Health check endpoint for voice services."""
    return {
        "status": "healthy",
        "service": "azure-voice",
        "endpoints": [
            "speech-to-text",
            "text-to-speech", 
            "translate",
            "conversation",
            "realtime-interpreter",
            "stream/speech-to-text",
            "stream/text-to-speech",
            "stream/realtime-interpreter",
            "subscription",
            "connections"
        ]
    }
--- a/modules/routes/routeVoiceGoogle.py
+++ b/modules/routes/routeVoiceGoogle.py
@ -0,0 +1,268 @@
 """
 Google Cloud Voice Services Routes
 Replaces Azure voice services with Google Cloud Speech-to-Text and Translation
 """
 import os
 import logging
 from fastapi import APIRouter, File, Form, UploadFile, Depends, HTTPException
 from typing import Optional
 from modules.connectors.connectorGoogleSpeech import ConnectorGoogleSpeech
 from modules.security.auth import getCurrentUser
 from modules.interfaces.interfaceAppModel import User
 logger = logging.getLogger(__name__)
 router = APIRouter(prefix="/voice-google", tags=["voice-google"])
 # Global connector instance
 _google_speech_connector = None
 def get_google_speech_connector() -> ConnectorGoogleSpeech:
    """Get or create Google Cloud Speech connector instance."""
    global _google_speech_connector
    if _google_speech_connector is None:
        try:
            # Get credentials path from environment or config
            credentials_path = os.getenv("GOOGLE_APPLICATION_CREDENTIALS")
            if not credentials_path:
                # Try to find credentials in common locations
                possible_paths = [
                    "credentials/google-service-account.json",
                    "config/google-credentials.json",
                    "google-credentials.json"
                ]
                for path in possible_paths:
                    if os.path.exists(path):
                        credentials_path = path
                        break
                if not credentials_path:
                    raise HTTPException(
                        status_code=500,
                        detail="Google Cloud credentials not found. Please set GOOGLE_APPLICATION_CREDENTIALS environment variable or place credentials file in project directory."
                    )
            _google_speech_connector = ConnectorGoogleSpeech(credentials_path)
            logger.info("✅ Google Cloud Speech connector initialized")
        except Exception as e:
            logger.error(f"❌ Failed to initialize Google Cloud Speech connector: {e}")
            raise HTTPException(
                status_code=500,
                detail=f"Failed to initialize Google Cloud Speech connector: {str(e)}"
            )
    return _google_speech_connector
@router.post("/speech-to-text")
 async def speech_to_text(
    audio_file: UploadFile = File(...),
    language: str = Form("de-DE"),
    current_user: User = Depends(getCurrentUser)
 ):
    """Convert speech to text using Google Cloud Speech-to-Text API."""
    try:
        logger.info(f"🎤 Speech-to-text request: {audio_file.filename}, language: {language}")
        # Read audio file
        audio_content = await audio_file.read()
        logger.info(f"📊 Audio file size: {len(audio_content)} bytes")
        # Validate audio format
        connector = get_google_speech_connector()
        validation = connector.validate_audio_format(audio_content)
        if not validation["valid"]:
            raise HTTPException(
                status_code=400,
                detail=f"Invalid audio format: {validation.get('error', 'Unknown error')}"
            )
        # Perform speech recognition
        result = await connector.speech_to_text(
            audio_content=audio_content,
            language=language
        )
        if result["success"]:
            return {
                "success": True,
                "text": result["text"],
                "confidence": result["confidence"],
                "language": result["language"],
                "audio_info": {
                    "size": len(audio_content),
                    "format": validation["format"],
                    "estimated_duration": validation.get("estimated_duration", 0)
                }
            }
        else:
            raise HTTPException(
                status_code=400,
                detail=f"Speech recognition failed: {result.get('error', 'Unknown error')}"
            )
    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"❌ Speech-to-text error: {e}")
        raise HTTPException(
            status_code=500,
            detail=f"Speech-to-text processing failed: {str(e)}"
        )
@router.post("/translate")
 async def translate_text(
    text: str = Form(...),
    source_language: str = Form("de"),
    target_language: str = Form("en"),
    current_user: User = Depends(getCurrentUser)
 ):
    """Translate text using Google Cloud Translation API."""
    try:
        logger.info(f"🌐 Translation request: '{text}' ({source_language} -> {target_language})")
        if not text.strip():
            raise HTTPException(
                status_code=400,
                detail="Empty text provided for translation"
            )
        # Perform translation
        connector = get_google_speech_connector()
        result = await connector.translate_text(
            text=text,
            source_language=source_language,
            target_language=target_language
        )
        if result["success"]:
            return {
                "success": True,
                "original_text": result["original_text"],
                "translated_text": result["translated_text"],
                "source_language": result["source_language"],
                "target_language": result["target_language"]
            }
        else:
            raise HTTPException(
                status_code=400,
                detail=f"Translation failed: {result.get('error', 'Unknown error')}"
            )
    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"❌ Translation error: {e}")
        raise HTTPException(
            status_code=500,
            detail=f"Translation processing failed: {str(e)}"
        )
@router.post("/realtime-interpreter")
 async def realtime_interpreter(
    audio_file: UploadFile = File(...),
    from_language: str = Form("de-DE"),
    to_language: str = Form("en-US"),
    connection_id: str = Form(None),
    current_user: User = Depends(getCurrentUser)
 ):
    """Real-time interpreter: speech to translated text using Google Cloud APIs."""
    try:
        logger.info(f"🔄 Real-time interpreter request: {audio_file.filename}")
        logger.info(f"   From: {from_language} -> To: {to_language}")
        # Read audio file
        audio_content = await audio_file.read()
        logger.info(f"📊 Audio file size: {len(audio_content)} bytes")
        # Save audio file for debugging
        debug_filename = f"debug_audio/audio_google_{audio_file.filename}"
        os.makedirs("debug_audio", exist_ok=True)
        with open(debug_filename, "wb") as f:
            f.write(audio_content)
        logger.info(f"💾 Saved audio file for debugging: {debug_filename}")
        # Validate audio format
        connector = get_google_speech_connector()
        validation = connector.validate_audio_format(audio_content)
        if not validation["valid"]:
            raise HTTPException(
                status_code=400,
                detail=f"Invalid audio format: {validation.get('error', 'Unknown error')}"
            )
        # Perform complete pipeline: Speech-to-Text + Translation
        result = await connector.speech_to_translated_text(
            audio_content=audio_content,
            from_language=from_language,
            to_language=to_language
        )
        if result["success"]:
            logger.info(f"✅ Real-time interpreter successful:")
            logger.info(f"   Original: '{result['original_text']}'")
            logger.info(f"   Translated: '{result['translated_text']}'")
            return {
                "success": True,
                "original_text": result["original_text"],
                "translated_text": result["translated_text"],
                "confidence": result["confidence"],
                "source_language": result["source_language"],
                "target_language": result["target_language"],
                "audio_info": {
                    "size": len(audio_content),
                    "format": validation["format"],
                    "estimated_duration": validation.get("estimated_duration", 0)
                }
            }
        else:
            raise HTTPException(
                status_code=400,
                detail=f"Real-time interpreter failed: {result.get('error', 'Unknown error')}"
            )
    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"❌ Real-time interpreter error: {e}")
        raise HTTPException(
            status_code=500,
            detail=f"Real-time interpreter processing failed: {str(e)}"
        )
@router.get("/health")
 async def health_check(current_user: User = Depends(getCurrentUser)):
    """Health check for Google Cloud voice services."""
    try:
        connector = get_google_speech_connector()
        # Test with a simple translation
        test_result = await connector.translate_text(
            text="Hello",
            source_language="en",
            target_language="de"
        )
        if test_result["success"]:
            return {
                "status": "healthy",
                "service": "Google Cloud Speech-to-Text & Translation",
                "test_translation": test_result["translated_text"]
            }
        else:
            return {
                "status": "unhealthy",
                "error": test_result.get("error", "Unknown error")
            }
    except Exception as e:
        logger.error(f"❌ Health check failed: {e}")
        return {
            "status": "unhealthy",
            "error": str(e)
        }
--- a/modules/routes/routeVoiceWebSocket.py
+++ b/modules/routes/routeVoiceWebSocket.py
@ -1,494 +0,0 @@
 """
 Azure Voice Services WebSocket Routes
 Provides real-time WebSocket endpoints for:
 - Live microphone audio streaming
 - Real-time speech-to-text
 - Real-time translation
 - Real-time text-to-speech
 """
 import logging
 import asyncio
 import json
 from typing import Dict, Any, Optional, List
 from fastapi import APIRouter, WebSocket, WebSocketDisconnect, Depends, Query
 from fastapi.websockets import WebSocketState
 import base64
 import io
 from datetime import datetime
 from modules.interfaces.interfaceAppObjects import getRootInterface
 from modules.interfaces.interfaceAppModel import UserInDB
 from modules.security.auth import getCurrentUser
 from modules.connectors.connectorAzureSpeech import ConnectorAzureSpeech
 logger = logging.getLogger(__name__)
 router = APIRouter(prefix="/api/voice/ws", tags=["voice-websocket"])
 class ConnectionManager:
    """Manages WebSocket connections for real-time voice services."""
    def __init__(self):
        self.active_connections: Dict[str, WebSocket] = {}
        self.user_connections: Dict[str, str] = {}  # user_id -> connection_id
        self.connection_metadata: Dict[str, Dict] = {}
    async def connect(self, websocket: WebSocket, connection_id: str, user_id: str, metadata: Dict = None):
        """Accept a WebSocket connection."""
        await websocket.accept()
        self.active_connections[connection_id] = websocket
        self.user_connections[user_id] = connection_id
        self.connection_metadata[connection_id] = metadata or {}
        logger.info(f"WebSocket connected: {connection_id} for user {user_id}")
    def disconnect(self, connection_id: str):
        """Remove a WebSocket connection."""
        if connection_id in self.active_connections:
            del self.active_connections[connection_id]
            # Remove from user mapping
            user_id = None
            for uid, cid in self.user_connections.items():
                if cid == connection_id:
                    user_id = uid
                    break
            if user_id:
                del self.user_connections[user_id]
            if connection_id in self.connection_metadata:
                del self.connection_metadata[connection_id]
            logger.info(f"WebSocket disconnected: {connection_id}")
    async def send_message(self, connection_id: str, message: Dict):
        """Send a message to a specific connection."""
        if connection_id in self.active_connections:
            websocket = self.active_connections[connection_id]
            try:
                if websocket.client_state == WebSocketState.CONNECTED:
                    await websocket.send_text(json.dumps(message))
                else:
                    self.disconnect(connection_id)
            except Exception as e:
                logger.error(f"Error sending message to {connection_id}: {str(e)}")
                self.disconnect(connection_id)
    async def send_error(self, connection_id: str, error: str, error_code: str = "GENERAL_ERROR"):
        """Send an error message to a connection."""
        await self.send_message(connection_id, {
            "type": "error",
            "error": error,
            "error_code": error_code,
            "timestamp": datetime.now().isoformat()
        })
    def get_connection_metadata(self, connection_id: str) -> Dict:
        """Get metadata for a connection."""
        return self.connection_metadata.get(connection_id, {})
 # Global connection manager
 manager = ConnectionManager()
 async def get_azure_speech_connector_ws(user_id: str) -> Optional[ConnectorAzureSpeech]:
    """Get Azure Speech connector for WebSocket user."""
    try:
        root_interface = getRootInterface()
        # Get user by ID
        user = root_interface.getUser(user_id)
        if not user:
            logger.error(f"User with ID {user_id} not found")
            return None
        # Get user connections
        user_connections = root_interface.getUserConnections(user_id)
        azure_connection = None
        # Find first Azure connection
        for connection in user_connections:
            if connection.authority == "msft":
                azure_connection = connection
                break
        if not azure_connection:
            logger.error(f"No Azure connection found for user {user_id}")
            return None
        # Get connection token
        connection_token = root_interface.getConnectionToken(azure_connection.id)
        if not connection_token:
            logger.error(f"No connection token found for user {user_id}")
            return None
        # Use Azure Speech Services subscription key from configuration
        from modules.shared.configuration import APP_CONFIG
        subscription_key = APP_CONFIG.get("Connector_AzureSpeech_SUBSCRIPTION_KEY")
        if not subscription_key or subscription_key == "your-azure-speech-subscription-key-here":
            logger.error("Azure Speech Services subscription key not configured")
            return None
        # Get region from configuration
        region = APP_CONFIG.get("Connector_AzureSpeech_REGION", "westeurope")
        # Create Azure Speech connector
        connector = ConnectorAzureSpeech(subscription_key=subscription_key, region=region)
        return connector
    except Exception as e:
        logger.error(f"Error getting Azure Speech connector for WebSocket: {str(e)}")
        return None
@router.websocket("/realtime-interpreter")
 async def websocket_realtime_interpreter(
    websocket: WebSocket,
    user_id: str = Query(...),
    from_language: str = Query("de-DE"),
    to_language: str = Query("en-US"),
    audio_format: str = Query("wav")
 ):
    """WebSocket endpoint for real-time interpreter with live audio streaming."""
    connection_id = f"interpreter_{user_id}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
    try:
        # Connect WebSocket
        await manager.connect(websocket, connection_id, user_id, {
            "service": "realtime_interpreter",
            "from_language": from_language,
            "to_language": to_language,
            "audio_format": audio_format
        })
        # Get Azure Speech connector
        connector = await get_azure_speech_connector_ws(user_id)
        if not connector:
            await manager.send_error(connection_id, "Azure Speech Services not configured", "NO_CONNECTOR")
            return
        # Send connection confirmation
        await manager.send_message(connection_id, {
            "type": "connected",
            "connection_id": connection_id,
            "service": "realtime_interpreter",
            "from_language": from_language,
            "to_language": to_language,
            "timestamp": datetime.now().isoformat()
        })
        # Process incoming audio chunks
        audio_buffer = io.BytesIO()
        chunk_count = 0
        while True:
            try:
                # Receive message from client
                data = await websocket.receive_text()
                message = json.loads(data)
                if message.get("type") == "audio_chunk":
                    # Decode base64 audio data
                    audio_data = base64.b64decode(message.get("data", ""))
                    audio_buffer.write(audio_data)
                    chunk_count += 1
                    # Process every 10 chunks or when buffer is large enough
                    if chunk_count % 10 == 0 or len(audio_data) > 8192:
                        audio_buffer.seek(0)
                        audio_content = audio_buffer.read()
                        if len(audio_content) > 0:
                            try:
                                # Convert speech to text
                                stt_result = await connector.speech_to_text(
                                    audio_content=audio_content,
                                    language=from_language,
                                    format="detailed",
                                    audio_format=audio_format
                                )
                                original_text = stt_result.get("text", "")
                                # Translate if different languages
                                translated_text = original_text
                                if from_language != to_language and original_text.strip():
                                    try:
                                        translated_text = await connector.translate_text(
                                            text=original_text,
                                            from_language=from_language,
                                            to_language=to_language
                                        )
                                    except Exception as e:
                                        logger.warning(f"Translation failed: {str(e)}")
                                        translated_text = original_text
                                # Send result back to client
                                await manager.send_message(connection_id, {
                                    "type": "translation_result",
                                    "original_text": original_text,
                                    "translated_text": translated_text,
                                    "from_language": from_language,
                                    "to_language": to_language,
                                    "confidence": stt_result.get("confidence", 0.0),
                                    "timestamp": datetime.now().isoformat()
                                })
                            except Exception as e:
                                logger.error(f"Error processing audio chunk: {str(e)}")
                                await manager.send_error(connection_id, f"Audio processing failed: {str(e)}", "PROCESSING_ERROR")
                        # Reset buffer
                        audio_buffer = io.BytesIO()
                        chunk_count = 0
                elif message.get("type") == "ping":
                    # Respond to ping with pong
                    await manager.send_message(connection_id, {
                        "type": "pong",
                        "timestamp": datetime.now().isoformat()
                    })
                elif message.get("type") == "disconnect":
                    # Client requested disconnect
                    break
            except WebSocketDisconnect:
                break
            except json.JSONDecodeError:
                await manager.send_error(connection_id, "Invalid JSON message", "INVALID_JSON")
            except Exception as e:
                logger.error(f"Error processing WebSocket message: {str(e)}")
                await manager.send_error(connection_id, f"Message processing failed: {str(e)}", "MESSAGE_ERROR")
    except WebSocketDisconnect:
        pass
    except Exception as e:
        logger.error(f"WebSocket error: {str(e)}")
        try:
            await manager.send_error(connection_id, f"Connection error: {str(e)}", "CONNECTION_ERROR")
        except:
            pass
    finally:
        manager.disconnect(connection_id)
@router.websocket("/speech-to-text")
 async def websocket_speech_to_text(
    websocket: WebSocket,
    user_id: str = Query(...),
    language: str = Query("de-DE"),
    audio_format: str = Query("wav")
 ):
    """WebSocket endpoint for real-time speech-to-text with live audio streaming."""
    connection_id = f"stt_{user_id}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
    try:
        # Connect WebSocket
        await manager.connect(websocket, connection_id, user_id, {
            "service": "speech_to_text",
            "language": language,
            "audio_format": audio_format
        })
        # Get Azure Speech connector
        connector = await get_azure_speech_connector_ws(user_id)
        if not connector:
            await manager.send_error(connection_id, "Azure Speech Services not configured", "NO_CONNECTOR")
            return
        # Send connection confirmation
        await manager.send_message(connection_id, {
            "type": "connected",
            "connection_id": connection_id,
            "service": "speech_to_text",
            "language": language,
            "timestamp": datetime.now().isoformat()
        })
        # Process incoming audio chunks
        audio_buffer = io.BytesIO()
        chunk_count = 0
        while True:
            try:
                # Receive message from client
                data = await websocket.receive_text()
                message = json.loads(data)
                if message.get("type") == "audio_chunk":
                    # Decode base64 audio data
                    audio_data = base64.b64decode(message.get("data", ""))
                    audio_buffer.write(audio_data)
                    chunk_count += 1
                    # Process every 10 chunks or when buffer is large enough
                    if chunk_count % 10 == 0 or len(audio_data) > 8192:
                        audio_buffer.seek(0)
                        audio_content = audio_buffer.read()
                        if len(audio_content) > 0:
                            try:
                                # Convert speech to text
                                stt_result = await connector.speech_to_text(
                                    audio_content=audio_content,
                                    language=language,
                                    format="detailed",
                                    audio_format=audio_format
                                )
                                # Send result back to client
                                await manager.send_message(connection_id, {
                                    "type": "transcription_result",
                                    "text": stt_result.get("text", ""),
                                    "confidence": stt_result.get("confidence", 0.0),
                                    "language": stt_result.get("language", language),
                                    "is_final": stt_result.get("RecognitionStatus") == "Success",
                                    "timestamp": datetime.now().isoformat()
                                })
                            except Exception as e:
                                logger.error(f"Error processing audio chunk: {str(e)}")
                                await manager.send_error(connection_id, f"Audio processing failed: {str(e)}", "PROCESSING_ERROR")
                        # Reset buffer
                        audio_buffer = io.BytesIO()
                        chunk_count = 0
                elif message.get("type") == "ping":
                    # Respond to ping with pong
                    await manager.send_message(connection_id, {
                        "type": "pong",
                        "timestamp": datetime.now().isoformat()
                    })
                elif message.get("type") == "disconnect":
                    # Client requested disconnect
                    break
            except WebSocketDisconnect:
                break
            except json.JSONDecodeError:
                await manager.send_error(connection_id, "Invalid JSON message", "INVALID_JSON")
            except Exception as e:
                logger.error(f"Error processing WebSocket message: {str(e)}")
                await manager.send_error(connection_id, f"Message processing failed: {str(e)}", "MESSAGE_ERROR")
    except WebSocketDisconnect:
        pass
    except Exception as e:
        logger.error(f"WebSocket error: {str(e)}")
        try:
            await manager.send_error(connection_id, f"Connection error: {str(e)}", "CONNECTION_ERROR")
        except:
            pass
    finally:
        manager.disconnect(connection_id)
@router.websocket("/text-to-speech")
 async def websocket_text_to_speech(
    websocket: WebSocket,
    user_id: str = Query(...),
    language: str = Query("de-DE"),
    voice: str = Query("de-DE-KatjaNeural"),
    audio_format: str = Query("audio-16khz-128kbitrate-mono-mp3")
 ):
    """WebSocket endpoint for real-time text-to-speech streaming."""
    connection_id = f"tts_{user_id}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
    try:
        # Connect WebSocket
        await manager.connect(websocket, connection_id, user_id, {
            "service": "text_to_speech",
            "language": language,
            "voice": voice,
            "audio_format": audio_format
        })
        # Get Azure Speech connector
        connector = await get_azure_speech_connector_ws(user_id)
        if not connector:
            await manager.send_error(connection_id, "Azure Speech Services not configured", "NO_CONNECTOR")
            return
        # Send connection confirmation
        await manager.send_message(connection_id, {
            "type": "connected",
            "connection_id": connection_id,
            "service": "text_to_speech",
            "language": language,
            "voice": voice,
            "timestamp": datetime.now().isoformat()
        })
        while True:
            try:
                # Receive message from client
                data = await websocket.receive_text()
                message = json.loads(data)
                if message.get("type") == "text_to_speak":
                    text = message.get("text", "")
                    if text.strip():
                        try:
                            # Convert text to speech
                            audio_data = await connector.text_to_speech(
                                text=text,
                                language=language,
                                voice=voice,
                                format=audio_format
                            )
                            # Send audio data back to client
                            audio_base64 = base64.b64encode(audio_data).decode('utf-8')
                            await manager.send_message(connection_id, {
                                "type": "audio_data",
                                "audio_data": audio_base64,
                                "format": audio_format,
                                "voice": voice,
                                "text": text,
                                "timestamp": datetime.now().isoformat()
                            })
                        except Exception as e:
                            logger.error(f"Error converting text to speech: {str(e)}")
                            await manager.send_error(connection_id, f"TTS failed: {str(e)}", "TTS_ERROR")
                elif message.get("type") == "ping":
                    # Respond to ping with pong
                    await manager.send_message(connection_id, {
                        "type": "pong",
                        "timestamp": datetime.now().isoformat()
                    })
                elif message.get("type") == "disconnect":
                    # Client requested disconnect
                    break
            except WebSocketDisconnect:
                break
            except json.JSONDecodeError:
                await manager.send_error(connection_id, "Invalid JSON message", "INVALID_JSON")
            except Exception as e:
                logger.error(f"Error processing WebSocket message: {str(e)}")
                await manager.send_error(connection_id, f"Message processing failed: {str(e)}", "MESSAGE_ERROR")
    except WebSocketDisconnect:
        pass
    except Exception as e:
        logger.error(f"WebSocket error: {str(e)}")
        try:
            await manager.send_error(connection_id, f"Connection error: {str(e)}", "CONNECTION_ERROR")
        except:
            pass
    finally:
        manager.disconnect(connection_id)
@router.get("/status")
 async def websocket_status():
    """Get WebSocket connection status."""
    return {
        "active_connections": len(manager.active_connections),
        "connected_users": len(manager.user_connections),
        "services": {
            "realtime_interpreter": len([c for c in manager.connection_metadata.values() if c.get("service") == "realtime_interpreter"]),
            "speech_to_text": len([c for c in manager.connection_metadata.values() if c.get("service") == "speech_to_text"]),
            "text_to_speech": len([c for c in manager.connection_metadata.values() if c.get("service") == "text_to_speech"])
        }
    }
--- a/modules/services/serviceDeltaSync.py
+++ b/modules/services/serviceDeltaSync.py
@ -57,18 +57,23 @@ class ManagerSyncDelta:
    JIRA_ISSUE_TYPE = "Task"
    # Task sync definition for field mapping (like original synchronizer)
-    TASK_SYNC_DEFINITION = {
+    
-        "ID": ["get", ["key"]],
+    task_sync_definition={
-        "Summary": ["get", ["fields", "summary"]],
+        #key=excel-header, [get:jira>excel | put: excel>jira, jira-xml-field-list]
-        "Status": ["get", ["fields", "status", "name"]],
+        'ID': ['get', ['key']],
-        "Assignee": ["get", ["fields", "assignee", "displayName"]],
+        'Module Category': ['get', ['fields', 'customfield_10058', 'value']],
-        "Reporter": ["get", ["fields", "reporter", "displayName"]],
+        'Summary': ['get', ['fields', 'summary']],
-        "Created": ["get", ["fields", "created"]],
+        'Description': ['get', ['fields', 'description']],
-        "Updated": ["get", ["fields", "updated"]],
+        'References': ['get', ['fields', 'customfield_10066']],
-        "Priority": ["get", ["fields", "priority", "name"]],
+        'Priority': ['get', ['fields', 'priority', 'name']],
-        "IssueType": ["get", ["fields", "issuetype", "name"]],
+        'Issue Status': ['get', ['fields', 'customfield_10062']],
-        "Project": ["get", ["fields", "project", "name"]],
+        'Assignee': ['get', ['fields', 'assignee', 'displayName']],
-        "Description": ["get", ["fields", "description"]],
+        'Issue Created': ['get', ['fields', 'created']],
        'Due Date': ['get', ['fields', 'duedate']],
        'DELTA Comments': ['get', ['fields', 'customfield_10060']],
        'SELISE Ticket References': ['put', ['fields', 'customfield_10067']],
        'SELISE Status Values': ['put', ['fields', 'customfield_10065']],
        'SELISE Comments': ['put', ['fields', 'customfield_10064']],
    }
    def __init__(self):
--- a/requirements.txt
+++ b/requirements.txt
@ -48,6 +48,9 @@ Office365-REST-Python-Client==2.6.2  # Easy Sharepoint integration
 ## Image Processing
 Pillow>=10.0.0      # Für Bildverarbeitung (als PIL importiert)
 ## Audio Processing
 # Audio format conversion handled by pure Python implementation
 ## Utilities & Timezone Support
 python-dateutil==2.8.2
 python-dotenv==1.0.0
@ -56,6 +59,10 @@ pytz>=2023.3        # For timezone handling and UTC operations
 ## Dependencies for trio (used by httpx)
 sortedcontainers>=2.4.0     # Required by trio
 ## Google Cloud Integration
 google-cloud-speech==2.21.0
 google-cloud-translate==3.11.1
 ## MSFT Integration
 msal==1.24.1