From 75e07743a6743123f8bd214d992e1583e3267868 Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Mon, 11 May 2026 21:26:20 +0200 Subject: [PATCH] google keys transferred to account poweron.center.ai --- env-gateway-dev.env | 10 +- env-gateway-int.env | 10 +- env-gateway-prod-forgejo.env | 8 +- env-gateway-prod.env | 10 +- modules/connectors/connectorVoiceGoogle.py | 103 +++++++++++++++--- .../features/commcoach/serviceCommcoach.py | 2 + .../features/teamsbot/datamodelTeamsbot.py | 13 +++ modules/features/teamsbot/mainTeamsbot.py | 13 +++ .../features/teamsbot/routeFeatureTeamsbot.py | 54 +++++++++ modules/features/teamsbot/service.py | 1 + modules/interfaces/interfaceVoiceObjects.py | 23 +++- modules/routes/routeVoiceGoogle.py | 30 ++++- .../test_connectorVoiceGoogle_sttHelpers.py | 23 ++++ 13 files changed, 257 insertions(+), 43 deletions(-) create mode 100644 tests/unit/connectors/test_connectorVoiceGoogle_sttHelpers.py diff --git a/env-gateway-dev.env b/env-gateway-dev.env index 3709b0d8..158e00aa 100644 --- a/env-gateway-dev.env +++ b/env-gateway-dev.env @@ -19,7 +19,7 @@ APP_JWT_KEY_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpERjlrSktmZHVuQnJ1VVJDdndLaUcxZGJsT2Z APP_TOKEN_EXPIRY=300 # CORS Configuration -APP_ALLOWED_ORIGINS=http://localhost:8080,http://localhost:5176,https://playground.poweron.swiss +APP_ALLOWED_ORIGINS=http://localhost:8080,http://localhost:5176,https://nyla.poweron.swiss,https://nyla-int.poweron.swiss,https://nyla.poweron-center.net,https://nyla-int.poweron-center.net # Logging configuration APP_LOGGING_LOG_LEVEL = DEBUG @@ -39,11 +39,11 @@ Service_MSFT_DATA_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c Service_MSFT_DATA_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQm83T29rV1pQelMtc1p1MXR4NTFpa19CTEhHQ0xfNmdPUmZqcWp5UHBMS0hYTGl4c1pPdmhTNTJVWUl5WnlnUUZhV0VTRzVCb0d5YjR1NnZPZk5CZ0dGazNGdUJVbjkxeVdrYlNiVjJUYzF2aVFtQnVxTHFqTTJqZlF0RTFGNmE1OGN1TEk= Service_MSFT_DATA_REDIRECT_URI = http://localhost:8000/api/msft/auth/connect/callback -Service_GOOGLE_AUTH_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com -Service_GOOGLE_AUTH_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpETDJhbGVQMHlFQzNPVFI1ZzBMa3pNMGlQUHhaQm10eVl1bFlSeTBybzlTOWE2MURXQ0hkRlo0NlNGbHQxWEl1OVkxQnVKYlhhOXR1cUF4T3k0WDdscktkY1oyYllRTmdDTWpfbUdwWGtSd1JvNlYxeTBJdEtaaS1vYnItcW0yaFM= +Service_GOOGLE_AUTH_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com +Service_GOOGLE_AUTH_CLIENT_SECRET = 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 Service_GOOGLE_AUTH_REDIRECT_URI = http://localhost:8000/api/google/auth/login/callback -Service_GOOGLE_DATA_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com -Service_GOOGLE_DATA_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpETDJhbGVQMHlFQzNPVFI1ZzBMa3pNMGlQUHhaQm10eVl1bFlSeTBybzlTOWE2MURXQ0hkRlo0NlNGbHQxWEl1OVkxQnVKYlhhOXR1cUF4T3k0WDdscktkY1oyYllRTmdDTWpfbUdwWGtSd1JvNlYxeTBJdEtaaS1vYnItcW0yaFM= +Service_GOOGLE_DATA_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com +Service_GOOGLE_DATA_CLIENT_SECRET = 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 Service_GOOGLE_DATA_REDIRECT_URI = http://localhost:8000/api/google/auth/connect/callback # ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly. diff --git a/env-gateway-int.env b/env-gateway-int.env index d22b7d2a..33b21f1f 100644 --- a/env-gateway-int.env +++ b/env-gateway-int.env @@ -21,7 +21,7 @@ APP_JWT_KEY_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjNUctb2RwU25iR3ZnanBOdHZhWUtIajZ1RnZ APP_TOKEN_EXPIRY=300 # CORS Configuration -APP_ALLOWED_ORIGINS=http://localhost:8080,http://localhost:5176,https://playground.poweron.swiss,https://playground-int.poweron.swiss,https://nyla.poweron.swiss,https://nyla-int.poweron.swiss,https://nyla.poweron-center.net,https://nyla-int.poweron-center.net +APP_ALLOWED_ORIGINS=http://localhost:8080,http://localhost:5176,https://nyla.poweron.swiss,https://nyla-int.poweron.swiss,https://nyla.poweron-center.net,https://nyla-int.poweron-center.net # Logging configuration APP_LOGGING_LOG_LEVEL = DEBUG @@ -41,11 +41,11 @@ Service_MSFT_DATA_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c Service_MSFT_DATA_CLIENT_SECRET = INT_ENC:Z0FBQUFBQm83T29rMDZvcV9qTG5xb1FzUkdqS1llbzRxSEJXbmpONFFtcUtfZXdtZjQybmJSMjBjMEpnRVhiOGRuczZvVFBFdVVTQV80SG9PSnRQTEpLdVViNm5wc2E5aGRLWjZ4TGF1QjVkNmdRSzBpNWNkYXVublFYclVEdEM5TVBBZWVVMW5RVWk= Service_MSFT_DATA_REDIRECT_URI = https://gateway-int.poweron.swiss/api/msft/auth/connect/callback -Service_GOOGLE_AUTH_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com -Service_GOOGLE_AUTH_CLIENT_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjNThGeVRNd3hacThtRnE0bzlDa0JPUWQyaEd6QjlFckdsMGZjRlRfUks2bXV3aDdVRTF3LVRlZVY5WjVzSXV4ZGNnX002RDl3dkNYdGFzZkxVUW01My1wTHRCanVCLUozZEx4TlduQlB5MnpvNTR2SGlvbFl1YkhzTEtsSi1SOEo= +Service_GOOGLE_AUTH_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com +Service_GOOGLE_AUTH_CLIENT_SECRET = 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 Service_GOOGLE_AUTH_REDIRECT_URI = https://gateway-int.poweron.swiss/api/google/auth/login/callback -Service_GOOGLE_DATA_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com -Service_GOOGLE_DATA_CLIENT_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjNThGeVRNd3hacThtRnE0bzlDa0JPUWQyaEd6QjlFckdsMGZjRlRfUks2bXV3aDdVRTF3LVRlZVY5WjVzSXV4ZGNnX002RDl3dkNYdGFzZkxVUW01My1wTHRCanVCLUozZEx4TlduQlB5MnpvNTR2SGlvbFl1YkhzTEtsSi1SOEo= +Service_GOOGLE_AUTH_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com +Service_GOOGLE_AUTH_CLIENT_SECRET = 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 Service_GOOGLE_DATA_REDIRECT_URI = https://gateway-int.poweron.swiss/api/google/auth/connect/callback # ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly. diff --git a/env-gateway-prod-forgejo.env b/env-gateway-prod-forgejo.env index e0ab455b..cc35f9c1 100644 --- a/env-gateway-prod-forgejo.env +++ b/env-gateway-prod-forgejo.env @@ -39,11 +39,11 @@ Service_MSFT_DATA_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c Service_MSFT_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBESkk2T25scFU1T1pNd2FENTFRM3kzcEpSXy1HT0trQkR2Wnl3U3RYbExzRy1YUTkxd3lPZE84U2lhX3FZanp5TjhYRGluLXVjU3hjaWRBUnZLbVhtRDItZ3FxNXJ3MUxicUZTXzJWZVNrR0VKN3ZlNEtET1ppOFk0MzNmbkwyRmROUk4= Service_MSFT_DATA_REDIRECT_URI = https://api.poweron.swiss/api/msft/auth/connect/callback -Service_GOOGLE_AUTH_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com -Service_GOOGLE_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3eWFwSEZ4YnRJcjU1OW5kcXZKdkt1Z3gzWDFhVW5Eelh3VnpnNlppcWxweHY5UUQzeDIyVk83cW1XNVE4bllVWnR2MjlSQzFrV1UyUVV6OUt5b3Vqa3QzMUIwNFBqc2FVSXRxTlQ1OHVJZVFibnhBQ2puXzBwSXp5NUZhZjM1d1o= +Service_GOOGLE_AUTH_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com +Service_GOOGLE_AUTH_CLIENT_SECRET = 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 Service_GOOGLE_AUTH_REDIRECT_URI = -Service_GOOGLE_DATA_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com -Service_GOOGLE_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3eWFwSEZ4YnRJcjU1OW5kcXZKdkt1Z3gzWDFhVW5Eelh3VnpnNlppcWxweHY5UUQzeDIyVk83cW1XNVE4bllVWnR2MjlSQzFrV1UyUVV6OUt5b3Vqa3QzMUIwNFBqc2FVSXRxTlQ1OHVJZVFibnhBQ2puXzBwSXp5NUZhZjM1d1o= +Service_GOOGLE_AUTH_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com +Service_GOOGLE_AUTH_CLIENT_SECRET = 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 Service_GOOGLE_DATA_REDIRECT_URI = # ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly. diff --git a/env-gateway-prod.env b/env-gateway-prod.env index 0183ae1f..6c840977 100644 --- a/env-gateway-prod.env +++ b/env-gateway-prod.env @@ -20,7 +20,7 @@ APP_JWT_KEY_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3elhfV0Rnd2pQRjlMdkVwX1FnSmRhSzNZUl APP_TOKEN_EXPIRY=300 # CORS Configuration -APP_ALLOWED_ORIGINS=http://localhost:8080,http://localhost:5176,https://playground.poweron.swiss,https://playground-int.poweron.swiss,https://nyla.poweron.swiss,https://nyla-int.poweron.swiss,https://nyla.poweron-center.net,https://nyla-int.poweron-center.net +APP_ALLOWED_ORIGINS=http://localhost:8080,http://localhost:5176,https://nyla.poweron.swiss,https://nyla-int.poweron.swiss,https://nyla.poweron-center.net,https://nyla-int.poweron-center.net # Logging configuration APP_LOGGING_LOG_LEVEL = DEBUG @@ -40,11 +40,11 @@ Service_MSFT_DATA_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c Service_MSFT_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBESkk2T25scFU1T1pNd2FENTFRM3kzcEpSXy1HT0trQkR2Wnl3U3RYbExzRy1YUTkxd3lPZE84U2lhX3FZanp5TjhYRGluLXVjU3hjaWRBUnZLbVhtRDItZ3FxNXJ3MUxicUZTXzJWZVNrR0VKN3ZlNEtET1ppOFk0MzNmbkwyRmROUk4= Service_MSFT_DATA_REDIRECT_URI = https://gateway-prod.poweron.swiss/api/msft/auth/connect/callback -Service_GOOGLE_AUTH_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com -Service_GOOGLE_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3eWFwSEZ4YnRJcjU1OW5kcXZKdkt1Z3gzWDFhVW5Eelh3VnpnNlppcWxweHY5UUQzeDIyVk83cW1XNVE4bllVWnR2MjlSQzFrV1UyUVV6OUt5b3Vqa3QzMUIwNFBqc2FVSXRxTlQ1OHVJZVFibnhBQ2puXzBwSXp5NUZhZjM1d1o= +Service_GOOGLE_AUTH_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com +Service_GOOGLE_AUTH_CLIENT_SECRET = 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 Service_GOOGLE_AUTH_REDIRECT_URI = https://gateway-prod.poweron.swiss/api/google/auth/login/callback -Service_GOOGLE_DATA_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com -Service_GOOGLE_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3eWFwSEZ4YnRJcjU1OW5kcXZKdkt1Z3gzWDFhVW5Eelh3VnpnNlppcWxweHY5UUQzeDIyVk83cW1XNVE4bllVWnR2MjlSQzFrV1UyUVV6OUt5b3Vqa3QzMUIwNFBqc2FVSXRxTlQ1OHVJZVFibnhBQ2puXzBwSXp5NUZhZjM1d1o= +Service_GOOGLE_AUTH_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com +Service_GOOGLE_AUTH_CLIENT_SECRET = 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 Service_GOOGLE_DATA_REDIRECT_URI = https://gateway-prod.poweron.swiss/api/google/auth/connect/callback # ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly. diff --git a/modules/connectors/connectorVoiceGoogle.py b/modules/connectors/connectorVoiceGoogle.py index f875c72c..3dd3221d 100644 --- a/modules/connectors/connectorVoiceGoogle.py +++ b/modules/connectors/connectorVoiceGoogle.py @@ -19,6 +19,30 @@ from modules.shared.voiceCatalog import getDefaultVoice as _catalogDefaultVoice logger = logging.getLogger(__name__) + +def _buildPrimarySttRecognitionFields( + *, + model: str, + lightweight: bool, +) -> Dict[str, Any]: + """Shared fields for batch + streaming primary RecognitionConfig.""" + base: Dict[str, Any] = { + "enable_automatic_punctuation": True, + "model": model, + } + if lightweight: + base["enable_word_time_offsets"] = False + base["enable_word_confidence"] = False + base["max_alternatives"] = 1 + base["use_enhanced"] = False + else: + base["enable_word_time_offsets"] = True + base["enable_word_confidence"] = True + base["max_alternatives"] = 3 + base["use_enhanced"] = True + return base + + # Gemini-TTS speaker IDs from voices.list use short names (e.g. "Kore") and require # SynthesisInput.prompt + VoiceSelectionParams.model_name (google-cloud-texttospeech >= 2.24.0). _GEMINI_TTS_DEFAULT_MODEL = "gemini-2.5-flash-tts" @@ -73,7 +97,10 @@ class ConnectorGoogleSpeech: sampleRate: int = None, channels: int = None, skipFallbacks: bool = False, phraseHints: Optional[list] = None, - alternativeLanguages: Optional[list] = None) -> Dict: + alternativeLanguages: Optional[list] = None, + model: str = "latest_long", + lightweight: bool = False, + audioFormat: Optional[str] = None) -> Dict: """ Convert speech to text using Google Cloud Speech-to-Text API. @@ -82,6 +109,9 @@ class ConnectorGoogleSpeech: language: Language code (e.g., 'de-DE', 'en-US') sample_rate: Audio sample rate (auto-detected if None) channels: Number of audio channels (auto-detected if None) + model: Google recognition model (e.g. latest_long, latest_short) + lightweight: If True, omit word timings/confidence, single alternative, no enhanced model + audioFormat: If set (webm_opus, linear16, mp3, flac, wav), skip auto-detection Returns: Dict containing transcribed text, confidence, and metadata @@ -92,8 +122,24 @@ class ConnectorGoogleSpeech: logger.warning(f"Invalid sampleRate={sampleRate}, treating as unknown for auto-detection") sampleRate = None - # Auto-detect audio format if not provided - if sampleRate is None or channels is None: + explicitFormat = (audioFormat or "").strip().lower() or None + if explicitFormat: + if channels is None: + channels = 1 + if sampleRate is None: + if explicitFormat == "webm_opus": + sampleRate = 48000 + elif explicitFormat == "linear16": + sampleRate = 16000 + elif explicitFormat in ("mp3", "flac"): + sampleRate = 44100 + elif explicitFormat == "wav": + sampleRate = 16000 + else: + sampleRate = 16000 + audioFormat = explicitFormat + logger.info(f"STT explicit format: {audioFormat}, {sampleRate}Hz, {channels}ch") + elif sampleRate is None or channels is None: validation = self.validateAudioFormat(audioContent) if not validation["valid"]: return { @@ -156,12 +202,7 @@ class ConnectorGoogleSpeech: "encoding": encoding, "audio_channel_count": channels, "language_code": language, - "enable_automatic_punctuation": True, - "model": "latest_long", - "enable_word_time_offsets": True, - "enable_word_confidence": True, - "max_alternatives": 3, - "use_enhanced": True, + **_buildPrimarySttRecognitionFields(model=model, lightweight=lightweight), } if phraseHints: @@ -205,8 +246,7 @@ class ConnectorGoogleSpeech: sample_rate_hertz=16000, audio_channel_count=1, language_code=language, - enable_automatic_punctuation=True, - model="latest_long" + **_buildPrimarySttRecognitionFields(model=model, lightweight=lightweight), ) try: response = await asyncio.to_thread( @@ -343,7 +383,7 @@ class ConnectorGoogleSpeech: "error": "No recognition results (silence or unclear audio)" } - models = ["latest_long", "phone_call", "latest_short"] + models = list(dict.fromkeys([model, "latest_long", "phone_call", "latest_short"])) for fallback_config in fallback_configs: for model in models: @@ -419,6 +459,9 @@ class ConnectorGoogleSpeech: audioQueue: asyncio.Queue, language: str = "de-DE", phraseHints: Optional[list] = None, + model: str = "latest_long", + lightweight: bool = False, + singleUtterance: bool = False, ) -> AsyncGenerator[Dict[str, Any], None]: """ Stream audio chunks to Google Cloud Speech-to-Text Streaming API. @@ -429,9 +472,13 @@ class ConnectorGoogleSpeech: Send (b"", True) to signal end of stream. language: Language code phraseHints: Optional boost phrases + model: Google recognition model (e.g. latest_long, latest_short) + lightweight: If True, use non-enhanced primary config (lower latency) + singleUtterance: If True, end stream after first utterance (client should reconnect) Yields: - Dicts with keys: isFinal, transcript, confidence, stabilityScore, audioDurationSec + Dicts with keys: isFinal, transcript, confidence, stabilityScore, audioDurationSec; + optionally endOfSingleUtterance, reconnectRequired """ STREAM_LIMIT_SEC = 290 streamStartTs = time.time() @@ -442,9 +489,7 @@ class ConnectorGoogleSpeech: "sample_rate_hertz": 48000, "audio_channel_count": 1, "language_code": language, - "enable_automatic_punctuation": True, - "model": "latest_long", - "use_enhanced": True, + **_buildPrimarySttRecognitionFields(model=model, lightweight=lightweight), } if phraseHints: configParams["speech_contexts"] = [speech.SpeechContext(phrases=phraseHints, boost=15.0)] @@ -453,7 +498,7 @@ class ConnectorGoogleSpeech: streamingConfig = speech.StreamingRecognitionConfig( config=recognitionConfig, interim_results=True, - single_utterance=False, + single_utterance=singleUtterance, ) import queue as threadQueue @@ -490,7 +535,22 @@ class ConnectorGoogleSpeech: ) for response in responseStream: elapsed = time.time() - streamStartTs - estimatedDurationSec = totalAudioBytes / (48000 * 1 * 2) if totalAudioBytes else 0 + + durationFromResults = 0.0 + for result in response.results: + rt = getattr(result, "result_end_time", None) + if rt is None: + continue + if hasattr(rt, "total_seconds"): + durationFromResults = max(durationFromResults, float(rt.total_seconds())) + else: + durationFromResults = max( + durationFromResults, + float(getattr(rt, "seconds", 0)) + float(getattr(rt, "nanos", 0)) * 1e-9, + ) + estimatedDurationSec = durationFromResults if durationFromResults > 0 else ( + totalAudioBytes / (48000 * 1 * 2) if totalAudioBytes else 0.0 + ) finalTexts = [] interimTexts = [] @@ -524,6 +584,13 @@ class ConnectorGoogleSpeech: "stabilityScore": 0.0, "audioDurationSec": estimatedDurationSec, }), loop) + + speechEvt = getattr(response, "speech_event_type", None) + if speechEvt and "END_OF_SINGLE_UTTERANCE" in str(speechEvt): + asyncio.run_coroutine_threadsafe(resultOutQ.put({ + "endOfSingleUtterance": True, + "audioDurationSec": estimatedDurationSec, + }), loop) if elapsed >= STREAM_LIMIT_SEC: logger.info("Streaming STT approaching 5-min limit, client should reconnect") asyncio.run_coroutine_threadsafe(resultOutQ.put({ diff --git a/modules/features/commcoach/serviceCommcoach.py b/modules/features/commcoach/serviceCommcoach.py index 39b96b55..5ac3af23 100644 --- a/modules/features/commcoach/serviceCommcoach.py +++ b/modules/features/commcoach/serviceCommcoach.py @@ -1080,6 +1080,8 @@ class CommcoachService: audioContent=audioContent, language=language, skipFallbacks=True, + model="latest_short", + lightweight=True, ) transcribedText = "" diff --git a/modules/features/teamsbot/datamodelTeamsbot.py b/modules/features/teamsbot/datamodelTeamsbot.py index a7a22c9b..076b0eda 100644 --- a/modules/features/teamsbot/datamodelTeamsbot.py +++ b/modules/features/teamsbot/datamodelTeamsbot.py @@ -111,6 +111,14 @@ class TeamsbotMeetingModule(PowerOnModel): defaultDirectorPrompts: Optional[str] = Field(default=None, description="JSON list of default director prompts") goals: Optional[str] = Field(default=None, description="Free-text goals") kpiTargets: Optional[str] = Field(default=None, description="JSON object with structured KPI targets") + defaultMeetingLink: Optional[str] = Field( + default=None, + description="Default Teams meeting URL for new sessions in this module (user can override)", + ) + defaultBotName: Optional[str] = Field( + default=None, + description="Default display name for the bot when starting a session from this module", + ) status: TeamsbotModuleStatus = Field(default=TeamsbotModuleStatus.ACTIVE) @@ -257,6 +265,7 @@ class TeamsbotStartSessionRequest(BaseModel): """Request to start a new Teams Bot session.""" meetingLink: str = Field(description="Teams meeting join link (e.g., https://teams.microsoft.com/l/meetup-join/...)") botName: Optional[str] = Field(default=None, description="Override bot name for this session") + moduleId: Optional[str] = Field(default=None, description="Optional MeetingModule to attach this session to") connectionId: Optional[str] = Field(default=None, description="Microsoft connection ID for Graph API access") joinMode: Optional[TeamsbotJoinMode] = Field(default=None, description="How the bot joins: systemBot, anonymous, or userAccount. Defaults to systemBot if credentials configured, else anonymous.") sessionContext: Optional[str] = Field(default=None, description="Custom context/knowledge to provide to the bot for this session (e.g. meeting agenda, documents, background info)") @@ -277,6 +286,8 @@ class CreateMeetingModuleRequest(BaseModel): defaultDirectorPrompts: Optional[str] = None goals: Optional[str] = None kpiTargets: Optional[str] = None + defaultMeetingLink: Optional[str] = None + defaultBotName: Optional[str] = None class UpdateMeetingModuleRequest(BaseModel): @@ -287,6 +298,8 @@ class UpdateMeetingModuleRequest(BaseModel): defaultDirectorPrompts: Optional[str] = None goals: Optional[str] = None kpiTargets: Optional[str] = None + defaultMeetingLink: Optional[str] = None + defaultBotName: Optional[str] = None status: Optional[TeamsbotModuleStatus] = None diff --git a/modules/features/teamsbot/mainTeamsbot.py b/modules/features/teamsbot/mainTeamsbot.py index 66bc9247..850135d6 100644 --- a/modules/features/teamsbot/mainTeamsbot.py +++ b/modules/features/teamsbot/mainTeamsbot.py @@ -290,6 +290,19 @@ def _runMigrations(): migrated = False + # M2: MeetingModule default meeting link / bot name (additive columns) + if _tableExists("TeamsbotMeetingModule"): + for col, sqlType in ( + ("defaultMeetingLink", "TEXT"), + ("defaultBotName", "TEXT"), + ): + if not _columnExists("TeamsbotMeetingModule", col): + cur.execute( + f'ALTER TABLE "TeamsbotMeetingModule" ADD COLUMN "{col}" {sqlType} NULL', + ) + logger.info(f"Migration M2: Added TeamsbotMeetingModule.{col}") + migrated = True + # M1: Create default Adhoc modules for orphaned sessions # (only runs if TeamsbotSession table exists with moduleId column # and there are sessions without a moduleId) diff --git a/modules/features/teamsbot/routeFeatureTeamsbot.py b/modules/features/teamsbot/routeFeatureTeamsbot.py index ab42db22..b3088f8e 100644 --- a/modules/features/teamsbot/routeFeatureTeamsbot.py +++ b/modules/features/teamsbot/routeFeatureTeamsbot.py @@ -280,6 +280,11 @@ async def startSession( mandateId = _validateInstanceAccess(instanceId, context) interface = _getInterface(context, instanceId) config = _getInstanceConfig(instanceId) + + if body.moduleId: + mod = interface.getModule(body.moduleId) + if not mod or str(mod.get("instanceId") or "") != str(instanceId): + raise HTTPException(status_code=400, detail="Invalid moduleId for this instance") # Extract and validate meeting URL from user input (handles SafeLinks, invitation text, etc.) cleanMeetingUrl = _extractTeamsMeetingUrl(body.meetingLink) @@ -288,6 +293,7 @@ async def startSession( sessionData = TeamsbotSession( instanceId=instanceId, mandateId=mandateId, + moduleId=body.moduleId, meetingLink=cleanMeetingUrl, botName=body.botName or config.botName, sessionContext=body.sessionContext, @@ -426,6 +432,54 @@ async def listSessions( return {"sessions": sessions} +@router.get("/{instanceId}/dashboard/stream") +@limiter.limit("60/minute") +async def streamDashboard( + request: Request, + instanceId: str, + context: RequestContext = Depends(getRequestContext), +): + """ + SSE channel for the Teamsbot dashboard: repeated snapshots of sessions and meeting modules. + Push interval: 3s while any own session is pending/joining/active, otherwise 20s. + Same session visibility rules as GET /sessions (own sessions unless platform admin). + """ + _validateInstanceAccess(instanceId, context) + interface = _getInterface(context, instanceId) + userId = None if context.isPlatformAdmin else str(context.user.id) + activeStatuses = { + TeamsbotSessionStatus.PENDING.value, + TeamsbotSessionStatus.JOINING.value, + TeamsbotSessionStatus.ACTIVE.value, + } + + async def eventGenerator(): + while True: + sessionRows = [] + try: + sessionRows = interface.getSessions(instanceId, includeEnded=True, userId=userId) + moduleRows = interface.getModules(instanceId) + payload = {"type": "dashboardState", "sessions": sessionRows, "modules": moduleRows} + yield f"data: {json.dumps(payload, default=str)}\n\n" + except asyncio.CancelledError: + raise + except Exception as ex: + logger.warning("dashboard stream tick failed: %s", ex) + yield f"data: {json.dumps({'type': 'error', 'message': 'dashboard_tick_failed'})}\n\n" + hasActive = any((s.get("status") in activeStatuses) for s in sessionRows) + await asyncio.sleep(3.0 if hasActive else 20.0) + + return StreamingResponse( + eventGenerator(), + media_type="text/event-stream", + headers={ + "Cache-Control": "no-cache", + "Connection": "keep-alive", + "X-Accel-Buffering": "no", + }, + ) + + @router.get("/{instanceId}/sessions/{sessionId}") @limiter.limit("30/minute") async def getSession( diff --git a/modules/features/teamsbot/service.py b/modules/features/teamsbot/service.py index fe0d6c34..d520bf49 100644 --- a/modules/features/teamsbot/service.py +++ b/modules/features/teamsbot/service.py @@ -1225,6 +1225,7 @@ class TeamsbotService: skipFallbacks=True, phraseHints=phraseHints if phraseHints else None, alternativeLanguages=["en-US"], + audioFormat="linear16", ) if sttResult and sttResult.get("success") and sttResult.get("text"): diff --git a/modules/interfaces/interfaceVoiceObjects.py b/modules/interfaces/interfaceVoiceObjects.py index d0b6f461..03729f86 100644 --- a/modules/interfaces/interfaceVoiceObjects.py +++ b/modules/interfaces/interfaceVoiceObjects.py @@ -69,7 +69,10 @@ class VoiceObjects: sampleRate: int = None, channels: int = None, skipFallbacks: bool = False, phraseHints: list = None, - alternativeLanguages: list = None) -> Dict[str, Any]: + alternativeLanguages: list = None, + model: str = "latest_long", + lightweight: bool = False, + audioFormat: Optional[str] = None) -> Dict[str, Any]: """ Convert speech to text using Google Cloud Speech-to-Text API. @@ -81,6 +84,9 @@ class VoiceObjects: skipFallbacks: If True, skip fallback attempts (use when audio format is known) phraseHints: Optional list of phrases to boost recognition (names, terms) alternativeLanguages: Optional list of additional language codes for multi-language + model: Google STT model (e.g. latest_long, latest_short) + lightweight: If True, omit word-level features and enhanced model + audioFormat: If set (webm_opus, linear16, ...), skip format auto-detection Returns: Dict containing transcribed text, confidence, and metadata @@ -97,6 +103,9 @@ class VoiceObjects: skipFallbacks=skipFallbacks, phraseHints=phraseHints, alternativeLanguages=alternativeLanguages, + model=model, + lightweight=lightweight, + audioFormat=audioFormat, ) if result["success"]: @@ -120,13 +129,23 @@ class VoiceObjects: audioQueue: asyncio.Queue, language: str = "de-DE", phraseHints: Optional[list] = None, + model: str = "latest_long", + lightweight: bool = False, + singleUtterance: bool = False, ) -> AsyncGenerator[Dict[str, Any], None]: """ Stream audio to Google Streaming STT and yield interim/final results. Billing is recorded for each final result. """ connector = self._getGoogleSpeechConnector() - async for event in connector.streamingRecognize(audioQueue, language, phraseHints): + async for event in connector.streamingRecognize( + audioQueue, + language, + phraseHints, + model=model, + lightweight=lightweight, + singleUtterance=singleUtterance, + ): if event.get("isFinal") and self.billingCallback: durationSec = event.get("audioDurationSec", 0) priceCHF = connector.calculateSttCostCHF(durationSec) diff --git a/modules/routes/routeVoiceGoogle.py b/modules/routes/routeVoiceGoogle.py index 8987e73f..10185cc2 100644 --- a/modules/routes/routeVoiceGoogle.py +++ b/modules/routes/routeVoiceGoogle.py @@ -155,12 +155,13 @@ async def sttStream( Protocol: Client sends JSON: - {"type": "open", "language": "de-DE"} + {"type": "open", "language": "de-DE", "model": "latest_short", "lightweight": true, "singleUtterance": true} {"type": "audio", "chunk": ""} {"type": "close"} Server sends JSON: {"type": "interim", "text": "..."} {"type": "final", "text": "...", "confidence": 0.95} + {"type": "end_of_single_utterance", "audioDurationSec": 0.0} {"type": "error", "message": "..."} {"type": "closed"} """ @@ -205,7 +206,12 @@ async def sttStream( logger.warning(f"STT billing pre-flight skipped: {e}") audioQueue: asyncio.Queue = asyncio.Queue() - language = "de-DE" + sttOpenOptions: Dict[str, Any] = { + "language": "de-DE", + "model": "latest_long", + "lightweight": False, + "singleUtterance": False, + } streamingTask: Optional[asyncio.Task] = None voiceInterface: Optional[VoiceObjects] = None @@ -233,10 +239,23 @@ async def sttStream( voiceInterface.billingCallback = _billingCb try: - async for event in voiceInterface.streamingSpeechToText(audioQueue, language): + async for event in voiceInterface.streamingSpeechToText( + audioQueue, + sttOpenOptions["language"], + phraseHints=None, + model=sttOpenOptions["model"], + lightweight=sttOpenOptions["lightweight"], + singleUtterance=sttOpenOptions["singleUtterance"], + ): if event.get("reconnectRequired"): await _sendJson({"type": "reconnect_required"}) return + if event.get("endOfSingleUtterance"): + await _sendJson({ + "type": "end_of_single_utterance", + "audioDurationSec": event.get("audioDurationSec", 0.0), + }) + continue if event.get("isFinal"): if event.get("transcript"): await _sendJson({"type": "final", "text": event["transcript"], "confidence": event.get("confidence", 0.0)}) @@ -258,7 +277,10 @@ async def sttStream( msgType = (msg.get("type") or "").strip() if msgType == "open": - language = msg.get("language") or "de-DE" + sttOpenOptions["language"] = msg.get("language") or "de-DE" + sttOpenOptions["model"] = msg.get("model") or "latest_long" + sttOpenOptions["lightweight"] = bool(msg.get("lightweight")) + sttOpenOptions["singleUtterance"] = bool(msg.get("singleUtterance")) if streamingTask and not streamingTask.done(): await audioQueue.put((b"", True)) streamingTask.cancel() diff --git a/tests/unit/connectors/test_connectorVoiceGoogle_sttHelpers.py b/tests/unit/connectors/test_connectorVoiceGoogle_sttHelpers.py new file mode 100644 index 00000000..258dc0db --- /dev/null +++ b/tests/unit/connectors/test_connectorVoiceGoogle_sttHelpers.py @@ -0,0 +1,23 @@ +# Copyright (c) 2025 Patrick Motsch +"""Unit tests for Google STT helper config (no API calls).""" + +from modules.connectors.connectorVoiceGoogle import _buildPrimarySttRecognitionFields + + +def test_buildPrimaryStt_lightweight_stripsHeavyFeatures(): + d = _buildPrimarySttRecognitionFields(model="latest_short", lightweight=True) + assert d["model"] == "latest_short" + assert d["enable_word_time_offsets"] is False + assert d["enable_word_confidence"] is False + assert d["max_alternatives"] == 1 + assert d["use_enhanced"] is False + assert d["enable_automatic_punctuation"] is True + + +def test_buildPrimaryStt_full_matchesLegacyDefaults(): + d = _buildPrimarySttRecognitionFields(model="latest_long", lightweight=False) + assert d["model"] == "latest_long" + assert d["enable_word_time_offsets"] is True + assert d["enable_word_confidence"] is True + assert d["max_alternatives"] == 3 + assert d["use_enhanced"] is True