From fa530bfd4c99a47a57c294685480c75b6dc68b53 Mon Sep 17 00:00:00 2001 From: patrick-motsch Date: Fri, 13 Feb 2026 13:12:21 +0100 Subject: [PATCH 01/11] fixed route to media center --- .../features/teamsbot/routeFeatureTeamsbot.py | 47 ++++++++++--------- 1 file changed, 26 insertions(+), 21 deletions(-) diff --git a/modules/features/teamsbot/routeFeatureTeamsbot.py b/modules/features/teamsbot/routeFeatureTeamsbot.py index 4c6a7877..84f601fc 100644 --- a/modules/features/teamsbot/routeFeatureTeamsbot.py +++ b/modules/features/teamsbot/routeFeatureTeamsbot.py @@ -362,26 +362,31 @@ async def bridgeStatusCallback( logger.info(f"Bridge status callback: session={sessionId}, status={status}") - # Update session status - from modules.datamodels.datamodelUam import User - systemUser = User(id="system", email="system@internal") - interface = interfaceDb.getInterface(systemUser, featureInstanceId=instanceId) - - updates = {"status": status} - if errorMessage: - updates["errorMessage"] = errorMessage - if status == TeamsbotSessionStatus.ACTIVE.value: - from modules.shared.timeUtils import getUtcTimestamp - updates["startedAt"] = getUtcTimestamp() - elif status in [TeamsbotSessionStatus.ENDED.value, TeamsbotSessionStatus.ERROR.value]: - from modules.shared.timeUtils import getUtcTimestamp - updates["endedAt"] = getUtcTimestamp() - - interface.updateSession(sessionId, updates) - - # Emit SSE event - from .service import _emitSessionEvent - await _emitSessionEvent(sessionId, "statusChange", {"status": status, "errorMessage": errorMessage}) + try: + # Update session status (bridge callbacks have no user context) + from modules.datamodels.datamodelUam import User + systemUser = User(id="system", username="system", email="system@internal") + interface = interfaceDb.getInterface(systemUser, featureInstanceId=instanceId) + + updates = {"status": status} + if errorMessage: + updates["errorMessage"] = errorMessage + if status == TeamsbotSessionStatus.ACTIVE.value: + from modules.shared.timeUtils import getUtcTimestamp + updates["startedAt"] = getUtcTimestamp() + elif status in [TeamsbotSessionStatus.ENDED.value, TeamsbotSessionStatus.ERROR.value]: + from modules.shared.timeUtils import getUtcTimestamp + updates["endedAt"] = getUtcTimestamp() + + interface.updateSession(sessionId, updates) + + # Emit SSE event + from .service import _emitSessionEvent + await _emitSessionEvent(sessionId, "statusChange", {"status": status, "errorMessage": errorMessage}) + + except Exception as e: + logger.error(f"Bridge status callback processing failed: session={sessionId}, error={e}") + # Still return 200 so the bridge doesn't retry endlessly return {"received": True} @@ -405,7 +410,7 @@ async def bridgeAudioWebsocket( config = _getInstanceConfig(instanceId) from modules.datamodels.datamodelUam import User - systemUser = User(id="system", email="system@internal") + systemUser = User(id="system", username="system", email="system@internal") service = TeamsbotService(systemUser, None, instanceId, config) try: From 3a9812a9e2a9d44caa15eac3033f7c41db187c17 Mon Sep 17 00:00:00 2001 From: patrick-motsch Date: Fri, 13 Feb 2026 13:35:38 +0100 Subject: [PATCH 02/11] test websocket --- .../features/teamsbot/routeFeatureTeamsbot.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/modules/features/teamsbot/routeFeatureTeamsbot.py b/modules/features/teamsbot/routeFeatureTeamsbot.py index 84f601fc..e568e173 100644 --- a/modules/features/teamsbot/routeFeatureTeamsbot.py +++ b/modules/features/teamsbot/routeFeatureTeamsbot.py @@ -403,21 +403,23 @@ async def bridgeAudioWebsocket( Gateway sends: TTS audio responses """ await websocket.accept() - logger.info(f"Bridge audio WebSocket connected: session={sessionId}") + logger.info(f"Bridge audio WebSocket connected: session={sessionId}, instance={instanceId}") # TODO: Validate bridge API key from headers/query params - config = _getInstanceConfig(instanceId) - - from modules.datamodels.datamodelUam import User - systemUser = User(id="system", username="system", email="system@internal") - service = TeamsbotService(systemUser, None, instanceId, config) - try: + config = _getInstanceConfig(instanceId) + logger.info(f"Bridge audio WebSocket config loaded: session={sessionId}") + + from modules.datamodels.datamodelUam import User + systemUser = User(id="system", username="system", email="system@internal") + service = TeamsbotService(systemUser, None, instanceId, config) + logger.info(f"Bridge audio WebSocket service created: session={sessionId}") + await service.handleAudioStream(websocket, sessionId) except WebSocketDisconnect: logger.info(f"Bridge audio WebSocket disconnected: session={sessionId}") except Exception as e: - logger.error(f"Bridge audio WebSocket error: session={sessionId}, error={e}") + logger.error(f"Bridge audio WebSocket error: session={sessionId}, error={e}", exc_info=True) finally: logger.info(f"Bridge audio WebSocket closed: session={sessionId}") From e85aa8366dd9a9f8b418df3cd64672951c17d7f7 Mon Sep 17 00:00:00 2001 From: patrick-motsch Date: Fri, 13 Feb 2026 13:51:05 +0100 Subject: [PATCH 03/11] fix route mail --- modules/features/teamsbot/routeFeatureTeamsbot.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/features/teamsbot/routeFeatureTeamsbot.py b/modules/features/teamsbot/routeFeatureTeamsbot.py index e568e173..fa5e951c 100644 --- a/modules/features/teamsbot/routeFeatureTeamsbot.py +++ b/modules/features/teamsbot/routeFeatureTeamsbot.py @@ -365,7 +365,7 @@ async def bridgeStatusCallback( try: # Update session status (bridge callbacks have no user context) from modules.datamodels.datamodelUam import User - systemUser = User(id="system", username="system", email="system@internal") + systemUser = User(id="system", username="system", email="system@internal.local") interface = interfaceDb.getInterface(systemUser, featureInstanceId=instanceId) updates = {"status": status} @@ -412,7 +412,7 @@ async def bridgeAudioWebsocket( logger.info(f"Bridge audio WebSocket config loaded: session={sessionId}") from modules.datamodels.datamodelUam import User - systemUser = User(id="system", username="system", email="system@internal") + systemUser = User(id="system", username="system", email="system@internal.local") service = TeamsbotService(systemUser, None, instanceId, config) logger.info(f"Bridge audio WebSocket service created: session={sessionId}") From 3c566b58d0c6ead7abfcad5a53f3543e3f1bae27 Mon Sep 17 00:00:00 2001 From: patrick-motsch Date: Fri, 13 Feb 2026 14:01:37 +0100 Subject: [PATCH 04/11] mail for websocket fixed --- modules/features/teamsbot/routeFeatureTeamsbot.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/features/teamsbot/routeFeatureTeamsbot.py b/modules/features/teamsbot/routeFeatureTeamsbot.py index fa5e951c..17e81dd3 100644 --- a/modules/features/teamsbot/routeFeatureTeamsbot.py +++ b/modules/features/teamsbot/routeFeatureTeamsbot.py @@ -365,7 +365,7 @@ async def bridgeStatusCallback( try: # Update session status (bridge callbacks have no user context) from modules.datamodels.datamodelUam import User - systemUser = User(id="system", username="system", email="system@internal.local") + systemUser = User(id="system", username="system", email="p.motsch@poweron.swiss") interface = interfaceDb.getInterface(systemUser, featureInstanceId=instanceId) updates = {"status": status} @@ -412,7 +412,7 @@ async def bridgeAudioWebsocket( logger.info(f"Bridge audio WebSocket config loaded: session={sessionId}") from modules.datamodels.datamodelUam import User - systemUser = User(id="system", username="system", email="system@internal.local") + systemUser = User(id="system", username="system", email="p.motsch@poweron.swiss") service = TeamsbotService(systemUser, None, instanceId, config) logger.info(f"Bridge audio WebSocket service created: session={sessionId}") From 92ee130c0a36f1e928ba0c5bf285a9830de8106f Mon Sep 17 00:00:00 2001 From: patrick-motsch Date: Fri, 13 Feb 2026 14:13:25 +0100 Subject: [PATCH 05/11] fix: convert timestamp to string for Pydantic v2 validation in teamsbot service Co-authored-by: Cursor --- modules/features/teamsbot/service.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/features/teamsbot/service.py b/modules/features/teamsbot/service.py index af3f0686..a3d9bef6 100644 --- a/modules/features/teamsbot/service.py +++ b/modules/features/teamsbot/service.py @@ -281,7 +281,7 @@ class TeamsbotService: sessionId=sessionId, speaker=speaker, text=transcriptText, - timestamp=getUtcTimestamp(), + timestamp=str(getUtcTimestamp()), confidence=confidence, language=self.config.language, isFinal=True, @@ -476,7 +476,7 @@ class TeamsbotService: modelName=response.modelName, processingTime=response.processingTime, priceCHF=response.priceCHF, - timestamp=getUtcTimestamp(), + timestamp=str(getUtcTimestamp()), ).model_dump() createdResponse = interface.createBotResponse(botResponseData) From d88bacc4a91abcbbfc51421e0be69fa9e2b1221f Mon Sep 17 00:00:00 2001 From: patrick-motsch Date: Fri, 13 Feb 2026 14:43:45 +0100 Subject: [PATCH 06/11] billing fix --- .../features/teamsbot/routeFeatureTeamsbot.py | 12 ++++++--- modules/features/teamsbot/service.py | 25 ++++++++++++++++--- 2 files changed, 30 insertions(+), 7 deletions(-) diff --git a/modules/features/teamsbot/routeFeatureTeamsbot.py b/modules/features/teamsbot/routeFeatureTeamsbot.py index 17e81dd3..258c535c 100644 --- a/modules/features/teamsbot/routeFeatureTeamsbot.py +++ b/modules/features/teamsbot/routeFeatureTeamsbot.py @@ -412,9 +412,15 @@ async def bridgeAudioWebsocket( logger.info(f"Bridge audio WebSocket config loaded: session={sessionId}") from modules.datamodels.datamodelUam import User - systemUser = User(id="system", username="system", email="p.motsch@poweron.swiss") - service = TeamsbotService(systemUser, None, instanceId, config) - logger.info(f"Bridge audio WebSocket service created: session={sessionId}") + systemUser = User(id="system", username="system", email="system@internal.local") + + # Look up mandateId from the session record (needed for AI billing context) + sessionInterface = interfaceDb.getInterface(systemUser, featureInstanceId=instanceId) + session = sessionInterface.getSession(sessionId) + mandateId = session.get("mandateId") if session else None + + service = TeamsbotService(systemUser, mandateId, instanceId, config) + logger.info(f"Bridge audio WebSocket service created: session={sessionId}, mandateId={mandateId}") await service.handleAudioStream(websocket, sessionId) except WebSocketDisconnect: diff --git a/modules/features/teamsbot/service.py b/modules/features/teamsbot/service.py index a3d9bef6..813edf44 100644 --- a/modules/features/teamsbot/service.py +++ b/modules/features/teamsbot/service.py @@ -31,6 +31,22 @@ from .bridgeConnector import BridgeConnector logger = logging.getLogger(__name__) + +# ========================================================================= +# Minimal Service Context (for AI billing in bridge callbacks) +# ========================================================================= + +class _ServiceContext: + """Minimal context providing user/mandate info for AiService billing. + Used by bridge callbacks where a full Services instance is not available.""" + + def __init__(self, user, mandateId, featureInstanceId=None): + self.user = user + self.mandateId = mandateId + self.featureInstanceId = featureInstanceId + self.featureCode = "teamsbot" + + # ========================================================================= # Session Event Queues (for SSE streaming to frontend) # ========================================================================= @@ -377,9 +393,9 @@ class TeamsbotService: try: from modules.services.serviceAi.mainServiceAi import AiService - # Create AiService with service center context - # Note: In production, serviceCenter should be passed properly - aiService = AiService(serviceCenter=None) + # Create minimal service context for AI billing + serviceContext = _ServiceContext(self.currentUser, self.mandateId, self.instanceId) + aiService = AiService(serviceCenter=serviceContext) await aiService.ensureAiObjectsInitialized() request = AiCallRequest( @@ -528,7 +544,8 @@ class TeamsbotService: from modules.services.serviceAi.mainServiceAi import AiService - aiService = AiService(serviceCenter=None) + serviceContext = _ServiceContext(self.currentUser, self.mandateId, self.instanceId) + aiService = AiService(serviceCenter=serviceContext) await aiService.ensureAiObjectsInitialized() request = AiCallRequest( From 77cbc5803ae025b65e65c973767c28002707f003 Mon Sep 17 00:00:00 2001 From: patrick-motsch Date: Fri, 13 Feb 2026 17:12:04 +0100 Subject: [PATCH 07/11] fix: use poweron.swiss domain for system user email (Pydantic rejects .local TLD) Co-authored-by: Cursor --- modules/features/teamsbot/routeFeatureTeamsbot.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/features/teamsbot/routeFeatureTeamsbot.py b/modules/features/teamsbot/routeFeatureTeamsbot.py index 258c535c..19d495a5 100644 --- a/modules/features/teamsbot/routeFeatureTeamsbot.py +++ b/modules/features/teamsbot/routeFeatureTeamsbot.py @@ -365,7 +365,7 @@ async def bridgeStatusCallback( try: # Update session status (bridge callbacks have no user context) from modules.datamodels.datamodelUam import User - systemUser = User(id="system", username="system", email="p.motsch@poweron.swiss") + systemUser = User(id="system", username="system", email="system@poweron.swiss") interface = interfaceDb.getInterface(systemUser, featureInstanceId=instanceId) updates = {"status": status} @@ -412,7 +412,7 @@ async def bridgeAudioWebsocket( logger.info(f"Bridge audio WebSocket config loaded: session={sessionId}") from modules.datamodels.datamodelUam import User - systemUser = User(id="system", username="system", email="system@internal.local") + systemUser = User(id="system", username="system", email="system@poweron.swiss") # Look up mandateId from the session record (needed for AI billing context) sessionInterface = interfaceDb.getInterface(systemUser, featureInstanceId=instanceId) From 65128db7133338c81b80e19048b5203ffd054189 Mon Sep 17 00:00:00 2001 From: patrick-motsch Date: Fri, 13 Feb 2026 17:26:56 +0100 Subject: [PATCH 08/11] fix: use original session user for bridge callbacks instead of system user (RBAC) Co-authored-by: Cursor --- .../features/teamsbot/routeFeatureTeamsbot.py | 32 ++++++++++++++++--- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/modules/features/teamsbot/routeFeatureTeamsbot.py b/modules/features/teamsbot/routeFeatureTeamsbot.py index 19d495a5..9821fc6c 100644 --- a/modules/features/teamsbot/routeFeatureTeamsbot.py +++ b/modules/features/teamsbot/routeFeatureTeamsbot.py @@ -363,11 +363,22 @@ async def bridgeStatusCallback( logger.info(f"Bridge status callback: session={sessionId}, status={status}") try: - # Update session status (bridge callbacks have no user context) + # Load the original user who started the session (has RBAC roles in mandate) from modules.datamodels.datamodelUam import User + from modules.interfaces.interfaceDbApp import getRootInterface + systemUser = User(id="system", username="system", email="system@poweron.swiss") interface = interfaceDb.getInterface(systemUser, featureInstanceId=instanceId) + # Look up original user from session for consistent context + session = interface.getSession(sessionId) + startedByUserId = session.get("startedByUserId") if session else None + if startedByUserId: + rootInterface = getRootInterface() + originalUser = rootInterface.getUser(startedByUserId) + if originalUser: + interface = interfaceDb.getInterface(originalUser, featureInstanceId=instanceId) + updates = {"status": status} if errorMessage: updates["errorMessage"] = errorMessage @@ -411,16 +422,27 @@ async def bridgeAudioWebsocket( config = _getInstanceConfig(instanceId) logger.info(f"Bridge audio WebSocket config loaded: session={sessionId}") + # Load the original user who started the session (has RBAC roles in mandate) + # Bridge callbacks have no HTTP auth, so we reconstruct the user context from the session record. from modules.datamodels.datamodelUam import User - systemUser = User(id="system", username="system", email="system@poweron.swiss") + from modules.interfaces.interfaceDbApp import getRootInterface - # Look up mandateId from the session record (needed for AI billing context) + systemUser = User(id="system", username="system", email="system@poweron.swiss") sessionInterface = interfaceDb.getInterface(systemUser, featureInstanceId=instanceId) session = sessionInterface.getSession(sessionId) mandateId = session.get("mandateId") if session else None + startedByUserId = session.get("startedByUserId") if session else None - service = TeamsbotService(systemUser, mandateId, instanceId, config) - logger.info(f"Bridge audio WebSocket service created: session={sessionId}, mandateId={mandateId}") + # Look up the original user (getRootInterface uses admin context, can load any user) + rootInterface = getRootInterface() + originalUser = rootInterface.getUser(startedByUserId) if startedByUserId else None + + if not originalUser: + logger.warning(f"Could not load original user {startedByUserId}, falling back to system user") + originalUser = systemUser + + service = TeamsbotService(originalUser, mandateId, instanceId, config) + logger.info(f"Bridge audio WebSocket service created: session={sessionId}, mandateId={mandateId}, user={originalUser.id}") await service.handleAudioStream(websocket, sessionId) except WebSocketDisconnect: From 367edd83e28ab4f90f8a955f102685928e93505a Mon Sep 17 00:00:00 2001 From: patrick-motsch Date: Fri, 13 Feb 2026 17:46:02 +0100 Subject: [PATCH 09/11] fix: add missing await on speechToText and textToSpeech async calls Co-authored-by: Cursor --- modules/features/teamsbot/service.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/features/teamsbot/service.py b/modules/features/teamsbot/service.py index 813edf44..a7a266ec 100644 --- a/modules/features/teamsbot/service.py +++ b/modules/features/teamsbot/service.py @@ -275,7 +275,7 @@ class TeamsbotService: # Step 1: STT -- convert audio to text try: - sttResult = voiceInterface.speechToText( + sttResult = await voiceInterface.speechToText( audioContent=audioBytes, language=self.config.language, sampleRate=16000, @@ -462,7 +462,7 @@ class TeamsbotService: # 4a: TTS -> Audio to bridge try: - ttsResult = voiceInterface.textToSpeech( + ttsResult = await voiceInterface.textToSpeech( text=speechResult.responseText, languageCode=self.config.language, voiceName=self.config.voiceId From 77151df0f433c02e7914bddc863d1406719f3674 Mon Sep 17 00:00:00 2001 From: patrick-motsch Date: Fri, 13 Feb 2026 18:03:23 +0100 Subject: [PATCH 10/11] fix: STT audioFormat scoping, automation import name, orphan FK cleanup Co-authored-by: Cursor --- modules/connectors/connectorVoiceGoogle.py | 3 +++ modules/features/automation/mainAutomation.py | 4 ++-- modules/shared/dbMultiTenantOptimizations.py | 16 ++++++++++++++++ 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/modules/connectors/connectorVoiceGoogle.py b/modules/connectors/connectorVoiceGoogle.py index b9d9bf2c..c32b7fa4 100644 --- a/modules/connectors/connectorVoiceGoogle.py +++ b/modules/connectors/connectorVoiceGoogle.py @@ -86,6 +86,9 @@ class ConnectorGoogleSpeech: channels = validation["channels"] audioFormat = validation["format"] logger.info(f"Auto-detected audio: {audioFormat}, {sampleRate}Hz, {channels}ch") + else: + # When sampleRate and channels are explicitly provided, assume raw PCM (LINEAR16) + audioFormat = "linear16" logger.info(f"Processing audio with Google Cloud Speech-to-Text") logger.info(f"Audio: {len(audioContent)} bytes, {sampleRate}Hz, {channels}ch") diff --git a/modules/features/automation/mainAutomation.py b/modules/features/automation/mainAutomation.py index 503e7b41..e23b320f 100644 --- a/modules/features/automation/mainAutomation.py +++ b/modules/features/automation/mainAutomation.py @@ -301,12 +301,12 @@ def _migrateExistingTemplates() -> None: as system templates (isSystem=True). This runs idempotently during feature registration. """ try: - from modules.features.automation.interfaceFeatureAutomation import getAutomationInterface + from modules.features.automation.interfaceFeatureAutomation import getInterface from modules.security.rootAccess import getRootUser from modules.features.automation.datamodelFeatureAutomation import AutomationTemplate rootUser = getRootUser() - automationInterface = getAutomationInterface(rootUser) + automationInterface = getInterface(rootUser) # Get all templates from DB allTemplates = automationInterface.db.getRecordset(AutomationTemplate) diff --git a/modules/shared/dbMultiTenantOptimizations.py b/modules/shared/dbMultiTenantOptimizations.py index ff8d7e8f..f3c2de98 100644 --- a/modules/shared/dbMultiTenantOptimizations.py +++ b/modules/shared/dbMultiTenantOptimizations.py @@ -390,6 +390,22 @@ def _applyForeignKeys(cursor, tables: Optional[List[str]]) -> int: logger.warning(f"Failed to drop FK {constraintName}: {e}") continue + # Clean up orphaned rows before applying FK constraint + try: + cursor.execute(f""" + DELETE FROM "{tableName}" + WHERE "{column}" IS NOT NULL + AND "{column}" NOT IN (SELECT "id" FROM "{refTable}") + """) + orphanCount = cursor.rowcount + if orphanCount > 0: + logger.info( + f"Cleaned {orphanCount} orphaned row(s) from {tableName} " + f"(missing {refTable} reference via {column})" + ) + except Exception as e: + logger.warning(f"Failed to clean orphans for FK {constraintName}: {e}") + try: cursor.execute(f""" ALTER TABLE "{tableName}" From ae4dc9fa48e4ff06590fc6a0bea581ce04ba42a2 Mon Sep 17 00:00:00 2001 From: patrick-motsch Date: Fri, 13 Feb 2026 18:17:47 +0100 Subject: [PATCH 11/11] fix: skip STT fallbacks for teamsbot, run audio processing in background Co-authored-by: Cursor --- modules/connectors/connectorVoiceGoogle.py | 12 ++++++- modules/features/teamsbot/service.py | 37 +++++++++++++++------ modules/interfaces/interfaceVoiceObjects.py | 7 ++-- 3 files changed, 43 insertions(+), 13 deletions(-) diff --git a/modules/connectors/connectorVoiceGoogle.py b/modules/connectors/connectorVoiceGoogle.py index c32b7fa4..85a19bf7 100644 --- a/modules/connectors/connectorVoiceGoogle.py +++ b/modules/connectors/connectorVoiceGoogle.py @@ -58,7 +58,8 @@ class ConnectorGoogleSpeech: raise async def speechToText(self, audioContent: bytes, language: str = "de-DE", - sampleRate: int = None, channels: int = None) -> Dict: + sampleRate: int = None, channels: int = None, + skipFallbacks: bool = False) -> Dict: """ Convert speech to text using Google Cloud Speech-to-Text API. @@ -234,6 +235,15 @@ class ConnectorGoogleSpeech: "error": f"Google Cloud error: {response.error}" } + # Skip fallbacks when format is known (e.g. teamsbot with explicit LINEAR16 16kHz) + if skipFallbacks: + return { + "success": False, + "text": "", + "confidence": 0.0, + "error": "No recognition results (silence or unclear audio)" + } + # Try multiple fallback approaches fallback_configs = [] diff --git a/modules/features/teamsbot/service.py b/modules/features/teamsbot/service.py index a7a266ec..ea10fddd 100644 --- a/modules/features/teamsbot/service.py +++ b/modules/features/teamsbot/service.py @@ -198,12 +198,15 @@ class TeamsbotService: audioBuffer = bytearray() bufferDurationMs = 0 - targetBufferMs = 1500 # Buffer 1.5 seconds of audio before STT + targetBufferMs = 3000 # Buffer 3 seconds of audio before STT # PCM16 at 16kHz mono = 32000 bytes/second bytesPerSecond = 32000 bytesPerMs = bytesPerSecond / 1000 + # Track background STT/AI tasks so they don't block the WebSocket loop + backgroundTasks: list[asyncio.Task] = [] + logger.info(f"Audio processing started for session {sessionId}") try: @@ -235,18 +238,26 @@ class TeamsbotService: audioBuffer.extend(audioChunk) bufferDurationMs = len(audioBuffer) / bytesPerMs - # Process when buffer has enough audio + # Process when buffer has enough audio - run in background to not block WebSocket if bufferDurationMs >= targetBufferMs: - await self._processAudioBuffer( - bytes(audioBuffer), - sessionId, - interface, - voiceInterface, - websocket, - ) + chunkBytes = bytes(audioBuffer) audioBuffer.clear() bufferDurationMs = 0 + task = asyncio.create_task( + self._processAudioBuffer( + chunkBytes, + sessionId, + interface, + voiceInterface, + websocket, + ) + ) + backgroundTasks.append(task) + + # Clean up completed tasks + backgroundTasks = [t for t in backgroundTasks if not t.done()] + except Exception as e: if "disconnect" not in str(e).lower(): logger.error(f"Audio stream error for session {sessionId}: {e}") @@ -261,6 +272,10 @@ class TeamsbotService: websocket, ) + # Wait for any remaining background tasks + if backgroundTasks: + await asyncio.gather(*backgroundTasks, return_exceptions=True) + logger.info(f"Audio processing ended for session {sessionId}") async def _processAudioBuffer( @@ -274,12 +289,14 @@ class TeamsbotService: """Process a buffered audio chunk through the STT -> AI -> TTS pipeline.""" # Step 1: STT -- convert audio to text + # skipFallbacks=True because we know the exact format (LINEAR16, 16kHz, mono from Teams) try: sttResult = await voiceInterface.speechToText( audioContent=audioBytes, language=self.config.language, sampleRate=16000, - channels=1 + channels=1, + skipFallbacks=True ) except Exception as e: logger.warning(f"STT failed for session {sessionId}: {e}") diff --git a/modules/interfaces/interfaceVoiceObjects.py b/modules/interfaces/interfaceVoiceObjects.py index 0c28f81d..cccebce4 100644 --- a/modules/interfaces/interfaceVoiceObjects.py +++ b/modules/interfaces/interfaceVoiceObjects.py @@ -66,7 +66,8 @@ class VoiceObjects: # Speech-to-Text Operations async def speechToText(self, audioContent: bytes, language: str = "de-DE", - sampleRate: int = None, channels: int = None) -> Dict[str, Any]: + sampleRate: int = None, channels: int = None, + skipFallbacks: bool = False) -> Dict[str, Any]: """ Convert speech to text using Google Cloud Speech-to-Text API. @@ -75,6 +76,7 @@ class VoiceObjects: language: Language code (e.g., 'de-DE', 'en-US') sampleRate: Audio sample rate (auto-detected if None) channels: Number of audio channels (auto-detected if None) + skipFallbacks: If True, skip fallback attempts (use when audio format is known) Returns: Dict containing transcribed text, confidence, and metadata @@ -87,7 +89,8 @@ class VoiceObjects: audioContent=audioContent, language=language, sampleRate=sampleRate, - channels=channels + channels=channels, + skipFallbacks=skipFallbacks ) if result["success"]: