From ef813a930471a02da180e8deff65fa962f60e930 Mon Sep 17 00:00:00 2001 From: patrick-motsch Date: Mon, 16 Feb 2026 00:07:42 +0100 Subject: [PATCH] feat(teamsbot): chat message handling, response channel (voice/chat/both), join mode (systemBot/anonymous/userAccount) Co-authored-by: Cursor --- .../features/teamsbot/datamodelTeamsbot.py | 19 +++- .../features/teamsbot/routeFeatureTeamsbot.py | 30 +++++- modules/features/teamsbot/service.py | 95 +++++++++++++------ 3 files changed, 115 insertions(+), 29 deletions(-) diff --git a/modules/features/teamsbot/datamodelTeamsbot.py b/modules/features/teamsbot/datamodelTeamsbot.py index 3dee59d6..5bfaf69c 100644 --- a/modules/features/teamsbot/datamodelTeamsbot.py +++ b/modules/features/teamsbot/datamodelTeamsbot.py @@ -46,6 +46,20 @@ class TeamsbotResponseMode(str, Enum): TRANSCRIBE_ONLY = "transcribeOnly" # Only transcribe, no AI responses +class TeamsbotResponseChannel(str, Enum): + """Channel for bot responses.""" + VOICE = "voice" # Bot responds only via voice (TTS) + CHAT = "chat" # Bot responds only via chat message + BOTH = "both" # Bot responds via voice AND chat + + +class TeamsbotJoinMode(str, Enum): + """How the bot joins the meeting.""" + SYSTEM_BOT = "systemBot" # Join with system bot account (backend-managed credentials) + ANONYMOUS = "anonymous" # Join as anonymous guest + USER_ACCOUNT = "userAccount" # Join with user's own Microsoft account (OAuth) + + # ============================================================================ # Database Models (stored in PostgreSQL) # ============================================================================ @@ -106,7 +120,7 @@ class TeamsbotBotResponse(BaseModel): # ============================================================================ class TeamsbotConfig(BaseModel): - """Configuration for a Teams Bot feature instance.""" + """Configuration for a Teams Bot feature instance (serves as default template for new users).""" botName: str = Field(default="AI Assistant", description="Default bot display name") backgroundImageUrl: Optional[str] = Field(default=None, description="Default background image URL") aiSystemPrompt: str = Field( @@ -114,6 +128,7 @@ class TeamsbotConfig(BaseModel): description="Custom system prompt for the AI analysis" ) responseMode: TeamsbotResponseMode = Field(default=TeamsbotResponseMode.AUTO, description="How the bot responds") + responseChannel: TeamsbotResponseChannel = Field(default=TeamsbotResponseChannel.VOICE, description="Channel for bot responses: voice, chat, or both") language: str = Field(default="de-DE", description="Primary language for STT/TTS") voiceId: Optional[str] = Field(default=None, description="Google TTS voice ID (e.g., de-DE-Standard-A)") browserBotUrl: Optional[str] = Field(default=None, description="URL of the Browser Bot service. Falls back to TEAMSBOT_BROWSER_BOT_URL env variable if not set per-instance.") @@ -141,6 +156,7 @@ class TeamsbotStartSessionRequest(BaseModel): botName: Optional[str] = Field(default=None, description="Override bot name for this session") backgroundImageUrl: Optional[str] = Field(default=None, description="Override background image for this session") connectionId: Optional[str] = Field(default=None, description="Microsoft connection ID for Graph API access") + joinMode: Optional[TeamsbotJoinMode] = Field(default=None, description="How the bot joins: systemBot, anonymous, or userAccount. Defaults to systemBot if credentials configured, else anonymous.") class TeamsbotSessionResponse(BaseModel): @@ -156,6 +172,7 @@ class TeamsbotConfigUpdateRequest(BaseModel): backgroundImageUrl: Optional[str] = None aiSystemPrompt: Optional[str] = None responseMode: Optional[TeamsbotResponseMode] = None + responseChannel: Optional[TeamsbotResponseChannel] = None language: Optional[str] = None voiceId: Optional[str] = None browserBotUrl: Optional[str] = None diff --git a/modules/features/teamsbot/routeFeatureTeamsbot.py b/modules/features/teamsbot/routeFeatureTeamsbot.py index a6bf5fcf..59b009be 100644 --- a/modules/features/teamsbot/routeFeatureTeamsbot.py +++ b/modules/features/teamsbot/routeFeatureTeamsbot.py @@ -30,6 +30,7 @@ from .datamodelTeamsbot import ( TeamsbotSessionResponse, TeamsbotConfigUpdateRequest, TeamsbotConfig, + TeamsbotJoinMode, ) # Import service @@ -186,8 +187,35 @@ async def startSession( appApiUrl = APP_CONFIG.get("APP_API_URL", "") gatewayBaseUrl = appApiUrl.rstrip("/") if appApiUrl else str(request.base_url).rstrip("/") + # Determine effective join mode + joinMode = body.joinMode + if not joinMode: + # Default: use system bot if credentials are configured, otherwise anonymous + if config.botAccountEmail and config.botAccountPassword: + joinMode = TeamsbotJoinMode.SYSTEM_BOT + else: + joinMode = TeamsbotJoinMode.ANONYMOUS + + # Resolve credentials based on join mode + effectiveEmail = None + effectivePassword = None + if joinMode == TeamsbotJoinMode.SYSTEM_BOT: + effectiveEmail = config.botAccountEmail + effectivePassword = config.botAccountPassword + elif joinMode == TeamsbotJoinMode.USER_ACCOUNT: + # TODO: Resolve OAuth token from user's Microsoft connection + logger.info(f"User account join mode requested but not yet implemented - falling back to anonymous") + joinMode = TeamsbotJoinMode.ANONYMOUS + # ANONYMOUS mode: no credentials + + # Temporarily override config credentials for this session's join mode + sessionConfig = config.model_copy(update={ + "botAccountEmail": effectiveEmail, + "botAccountPassword": effectivePassword, + }) + # Start the bot in background (join meeting via bridge) - service = TeamsbotService(context.user, mandateId, instanceId, config) + service = TeamsbotService(context.user, mandateId, instanceId, sessionConfig) asyncio.create_task( service.joinMeeting(sessionId, cleanMeetingUrl, body.connectionId, gatewayBaseUrl) ) diff --git a/modules/features/teamsbot/service.py b/modules/features/teamsbot/service.py index ad52e0c5..6cd1fa1d 100644 --- a/modules/features/teamsbot/service.py +++ b/modules/features/teamsbot/service.py @@ -25,6 +25,7 @@ from .datamodelTeamsbot import ( TeamsbotResponseType, TeamsbotConfig, TeamsbotResponseMode, + TeamsbotResponseChannel, SpeechTeamsResponse, ) from .browserBotConnector import BrowserBotConnector @@ -238,6 +239,20 @@ class TeamsbotService: websocket=websocket, ) + elif msgType == "chatMessage": + chat = message.get("chat", {}) + logger.info(f"[WS-DEBUG] Chat message received: speaker={chat.get('speaker')}, text={chat.get('text', '')[:60]}...") + await self._processTranscript( + sessionId=sessionId, + speaker=chat.get("speaker", "Unknown"), + text=chat.get("text", ""), + isFinal=True, + interface=interface, + voiceInterface=voiceInterface, + websocket=websocket, + source="chat", + ) + elif msgType == "status": status = message.get("status") errorMessage = message.get("message") @@ -300,8 +315,9 @@ class TeamsbotService: interface, voiceInterface, websocket: WebSocket, + source: str = "caption", ): - """Process a transcript segment from the browser bot's caption scraping.""" + """Process a transcript segment from captions or chat messages.""" text = text.strip() if not text: @@ -325,11 +341,12 @@ class TeamsbotService: createdTranscript = interface.createTranscript(transcriptData) - # Update context buffer + # Update context buffer (mark source for chat messages) self._contextBuffer.append({ "speaker": speaker or "Unknown", "text": text, "timestamp": getUtcTimestamp(), + "source": source, }) # Keep only last N segments maxSegments = self.config.contextWindowSegments @@ -442,13 +459,17 @@ class TeamsbotService: self._lastAiCallTime = time.time() # Build transcript context from buffer. - # Mark bot's own utterances so the AI knows what it already said. + # Mark bot's own utterances and chat messages for the AI. contextLines = [] for segment in self._contextBuffer: speaker = segment.get("speaker", "Unknown") text = segment.get("text", "") + segSource = segment.get("source", "caption") + prefix = "Chat" if segSource == "chat" else "" if self._isBotSpeaker(speaker): contextLines.append(f"[YOU ({self.config.botName})]: {text}") + elif prefix: + contextLines.append(f"[{prefix}: {speaker}]: {text}") else: contextLines.append(f"[{speaker}]: {text}") @@ -522,35 +543,55 @@ class TeamsbotService: }) return - # Auto mode: send voice + chat response + # Determine response channel (voice, chat, or both) + channel = self.config.responseChannel responseType = TeamsbotResponseType.BOTH + + if channel == TeamsbotResponseChannel.VOICE: + responseType = TeamsbotResponseType.AUDIO + elif channel == TeamsbotResponseChannel.CHAT: + responseType = TeamsbotResponseType.CHAT + else: + responseType = TeamsbotResponseType.BOTH - # 4a: TTS -> Audio to bridge - try: - ttsResult = await voiceInterface.textToSpeech( - text=speechResult.responseText, - languageCode=self.config.language, - voiceName=self.config.voiceId - ) - - if ttsResult and isinstance(ttsResult, dict): - audioContent = ttsResult.get("audioContent") - if audioContent and websocket: - # Send TTS audio to bot via WebSocket - # Bot expects: {type: "playAudio", sessionId, audio: {data, format}} + # 4a: Voice response (TTS -> Audio to bot) + if channel in (TeamsbotResponseChannel.VOICE, TeamsbotResponseChannel.BOTH): + try: + ttsResult = await voiceInterface.textToSpeech( + text=speechResult.responseText, + languageCode=self.config.language, + voiceName=self.config.voiceId + ) + + if ttsResult and isinstance(ttsResult, dict): + audioContent = ttsResult.get("audioContent") + if audioContent and websocket: + await websocket.send_text(json.dumps({ + "type": "playAudio", + "sessionId": sessionId, + "audio": { + "data": base64.b64encode(audioContent if isinstance(audioContent, bytes) else audioContent.encode()).decode(), + "format": "mp3", + }, + })) + elif audioContent and not websocket: + logger.info(f"TTS audio generated for session {sessionId} (HTTP mode - no WebSocket for playback)") + except Exception as ttsErr: + logger.warning(f"TTS failed for session {sessionId}: {ttsErr}") + if responseType == TeamsbotResponseType.AUDIO: + responseType = TeamsbotResponseType.CHAT # Fallback to chat only + + # 4b: Chat response (send text message to meeting chat) + if channel in (TeamsbotResponseChannel.CHAT, TeamsbotResponseChannel.BOTH): + try: + if websocket: await websocket.send_text(json.dumps({ - "type": "playAudio", + "type": "sendChatMessage", "sessionId": sessionId, - "audio": { - "data": base64.b64encode(audioContent if isinstance(audioContent, bytes) else audioContent.encode()).decode(), - "format": "mp3", - }, + "text": speechResult.responseText, })) - elif audioContent and not websocket: - logger.info(f"TTS audio generated for session {sessionId} (HTTP mode - no WebSocket for playback)") - except Exception as ttsErr: - logger.warning(f"TTS failed for session {sessionId}: {ttsErr}") - responseType = TeamsbotResponseType.CHAT # Fallback to chat only + except Exception as chatErr: + logger.warning(f"Chat message send failed for session {sessionId}: {chatErr}") # 4b: Store bot response botResponseData = TeamsbotBotResponse(