From ef813a930471a02da180e8deff65fa962f60e930 Mon Sep 17 00:00:00 2001
From: patrick-motsch
Date: Mon, 16 Feb 2026 00:07:42 +0100
Subject: [PATCH] feat(teamsbot): chat message handling, response channel
(voice/chat/both), join mode (systemBot/anonymous/userAccount)
Co-authored-by: Cursor
---
.../features/teamsbot/datamodelTeamsbot.py | 19 +++-
.../features/teamsbot/routeFeatureTeamsbot.py | 30 +++++-
modules/features/teamsbot/service.py | 95 +++++++++++++------
3 files changed, 115 insertions(+), 29 deletions(-)
diff --git a/modules/features/teamsbot/datamodelTeamsbot.py b/modules/features/teamsbot/datamodelTeamsbot.py
index 3dee59d6..5bfaf69c 100644
--- a/modules/features/teamsbot/datamodelTeamsbot.py
+++ b/modules/features/teamsbot/datamodelTeamsbot.py
@@ -46,6 +46,20 @@ class TeamsbotResponseMode(str, Enum):
TRANSCRIBE_ONLY = "transcribeOnly" # Only transcribe, no AI responses
+class TeamsbotResponseChannel(str, Enum):
+ """Channel for bot responses."""
+ VOICE = "voice" # Bot responds only via voice (TTS)
+ CHAT = "chat" # Bot responds only via chat message
+ BOTH = "both" # Bot responds via voice AND chat
+
+
+class TeamsbotJoinMode(str, Enum):
+ """How the bot joins the meeting."""
+ SYSTEM_BOT = "systemBot" # Join with system bot account (backend-managed credentials)
+ ANONYMOUS = "anonymous" # Join as anonymous guest
+ USER_ACCOUNT = "userAccount" # Join with user's own Microsoft account (OAuth)
+
+
# ============================================================================
# Database Models (stored in PostgreSQL)
# ============================================================================
@@ -106,7 +120,7 @@ class TeamsbotBotResponse(BaseModel):
# ============================================================================
class TeamsbotConfig(BaseModel):
- """Configuration for a Teams Bot feature instance."""
+ """Configuration for a Teams Bot feature instance (serves as default template for new users)."""
botName: str = Field(default="AI Assistant", description="Default bot display name")
backgroundImageUrl: Optional[str] = Field(default=None, description="Default background image URL")
aiSystemPrompt: str = Field(
@@ -114,6 +128,7 @@ class TeamsbotConfig(BaseModel):
description="Custom system prompt for the AI analysis"
)
responseMode: TeamsbotResponseMode = Field(default=TeamsbotResponseMode.AUTO, description="How the bot responds")
+ responseChannel: TeamsbotResponseChannel = Field(default=TeamsbotResponseChannel.VOICE, description="Channel for bot responses: voice, chat, or both")
language: str = Field(default="de-DE", description="Primary language for STT/TTS")
voiceId: Optional[str] = Field(default=None, description="Google TTS voice ID (e.g., de-DE-Standard-A)")
browserBotUrl: Optional[str] = Field(default=None, description="URL of the Browser Bot service. Falls back to TEAMSBOT_BROWSER_BOT_URL env variable if not set per-instance.")
@@ -141,6 +156,7 @@ class TeamsbotStartSessionRequest(BaseModel):
botName: Optional[str] = Field(default=None, description="Override bot name for this session")
backgroundImageUrl: Optional[str] = Field(default=None, description="Override background image for this session")
connectionId: Optional[str] = Field(default=None, description="Microsoft connection ID for Graph API access")
+ joinMode: Optional[TeamsbotJoinMode] = Field(default=None, description="How the bot joins: systemBot, anonymous, or userAccount. Defaults to systemBot if credentials configured, else anonymous.")
class TeamsbotSessionResponse(BaseModel):
@@ -156,6 +172,7 @@ class TeamsbotConfigUpdateRequest(BaseModel):
backgroundImageUrl: Optional[str] = None
aiSystemPrompt: Optional[str] = None
responseMode: Optional[TeamsbotResponseMode] = None
+ responseChannel: Optional[TeamsbotResponseChannel] = None
language: Optional[str] = None
voiceId: Optional[str] = None
browserBotUrl: Optional[str] = None
diff --git a/modules/features/teamsbot/routeFeatureTeamsbot.py b/modules/features/teamsbot/routeFeatureTeamsbot.py
index a6bf5fcf..59b009be 100644
--- a/modules/features/teamsbot/routeFeatureTeamsbot.py
+++ b/modules/features/teamsbot/routeFeatureTeamsbot.py
@@ -30,6 +30,7 @@ from .datamodelTeamsbot import (
TeamsbotSessionResponse,
TeamsbotConfigUpdateRequest,
TeamsbotConfig,
+ TeamsbotJoinMode,
)
# Import service
@@ -186,8 +187,35 @@ async def startSession(
appApiUrl = APP_CONFIG.get("APP_API_URL", "")
gatewayBaseUrl = appApiUrl.rstrip("/") if appApiUrl else str(request.base_url).rstrip("/")
+ # Determine effective join mode
+ joinMode = body.joinMode
+ if not joinMode:
+ # Default: use system bot if credentials are configured, otherwise anonymous
+ if config.botAccountEmail and config.botAccountPassword:
+ joinMode = TeamsbotJoinMode.SYSTEM_BOT
+ else:
+ joinMode = TeamsbotJoinMode.ANONYMOUS
+
+ # Resolve credentials based on join mode
+ effectiveEmail = None
+ effectivePassword = None
+ if joinMode == TeamsbotJoinMode.SYSTEM_BOT:
+ effectiveEmail = config.botAccountEmail
+ effectivePassword = config.botAccountPassword
+ elif joinMode == TeamsbotJoinMode.USER_ACCOUNT:
+ # TODO: Resolve OAuth token from user's Microsoft connection
+ logger.info(f"User account join mode requested but not yet implemented - falling back to anonymous")
+ joinMode = TeamsbotJoinMode.ANONYMOUS
+ # ANONYMOUS mode: no credentials
+
+ # Temporarily override config credentials for this session's join mode
+ sessionConfig = config.model_copy(update={
+ "botAccountEmail": effectiveEmail,
+ "botAccountPassword": effectivePassword,
+ })
+
# Start the bot in background (join meeting via bridge)
- service = TeamsbotService(context.user, mandateId, instanceId, config)
+ service = TeamsbotService(context.user, mandateId, instanceId, sessionConfig)
asyncio.create_task(
service.joinMeeting(sessionId, cleanMeetingUrl, body.connectionId, gatewayBaseUrl)
)
diff --git a/modules/features/teamsbot/service.py b/modules/features/teamsbot/service.py
index ad52e0c5..6cd1fa1d 100644
--- a/modules/features/teamsbot/service.py
+++ b/modules/features/teamsbot/service.py
@@ -25,6 +25,7 @@ from .datamodelTeamsbot import (
TeamsbotResponseType,
TeamsbotConfig,
TeamsbotResponseMode,
+ TeamsbotResponseChannel,
SpeechTeamsResponse,
)
from .browserBotConnector import BrowserBotConnector
@@ -238,6 +239,20 @@ class TeamsbotService:
websocket=websocket,
)
+ elif msgType == "chatMessage":
+ chat = message.get("chat", {})
+ logger.info(f"[WS-DEBUG] Chat message received: speaker={chat.get('speaker')}, text={chat.get('text', '')[:60]}...")
+ await self._processTranscript(
+ sessionId=sessionId,
+ speaker=chat.get("speaker", "Unknown"),
+ text=chat.get("text", ""),
+ isFinal=True,
+ interface=interface,
+ voiceInterface=voiceInterface,
+ websocket=websocket,
+ source="chat",
+ )
+
elif msgType == "status":
status = message.get("status")
errorMessage = message.get("message")
@@ -300,8 +315,9 @@ class TeamsbotService:
interface,
voiceInterface,
websocket: WebSocket,
+ source: str = "caption",
):
- """Process a transcript segment from the browser bot's caption scraping."""
+ """Process a transcript segment from captions or chat messages."""
text = text.strip()
if not text:
@@ -325,11 +341,12 @@ class TeamsbotService:
createdTranscript = interface.createTranscript(transcriptData)
- # Update context buffer
+ # Update context buffer (mark source for chat messages)
self._contextBuffer.append({
"speaker": speaker or "Unknown",
"text": text,
"timestamp": getUtcTimestamp(),
+ "source": source,
})
# Keep only last N segments
maxSegments = self.config.contextWindowSegments
@@ -442,13 +459,17 @@ class TeamsbotService:
self._lastAiCallTime = time.time()
# Build transcript context from buffer.
- # Mark bot's own utterances so the AI knows what it already said.
+ # Mark bot's own utterances and chat messages for the AI.
contextLines = []
for segment in self._contextBuffer:
speaker = segment.get("speaker", "Unknown")
text = segment.get("text", "")
+ segSource = segment.get("source", "caption")
+ prefix = "Chat" if segSource == "chat" else ""
if self._isBotSpeaker(speaker):
contextLines.append(f"[YOU ({self.config.botName})]: {text}")
+ elif prefix:
+ contextLines.append(f"[{prefix}: {speaker}]: {text}")
else:
contextLines.append(f"[{speaker}]: {text}")
@@ -522,35 +543,55 @@ class TeamsbotService:
})
return
- # Auto mode: send voice + chat response
+ # Determine response channel (voice, chat, or both)
+ channel = self.config.responseChannel
responseType = TeamsbotResponseType.BOTH
+
+ if channel == TeamsbotResponseChannel.VOICE:
+ responseType = TeamsbotResponseType.AUDIO
+ elif channel == TeamsbotResponseChannel.CHAT:
+ responseType = TeamsbotResponseType.CHAT
+ else:
+ responseType = TeamsbotResponseType.BOTH
- # 4a: TTS -> Audio to bridge
- try:
- ttsResult = await voiceInterface.textToSpeech(
- text=speechResult.responseText,
- languageCode=self.config.language,
- voiceName=self.config.voiceId
- )
-
- if ttsResult and isinstance(ttsResult, dict):
- audioContent = ttsResult.get("audioContent")
- if audioContent and websocket:
- # Send TTS audio to bot via WebSocket
- # Bot expects: {type: "playAudio", sessionId, audio: {data, format}}
+ # 4a: Voice response (TTS -> Audio to bot)
+ if channel in (TeamsbotResponseChannel.VOICE, TeamsbotResponseChannel.BOTH):
+ try:
+ ttsResult = await voiceInterface.textToSpeech(
+ text=speechResult.responseText,
+ languageCode=self.config.language,
+ voiceName=self.config.voiceId
+ )
+
+ if ttsResult and isinstance(ttsResult, dict):
+ audioContent = ttsResult.get("audioContent")
+ if audioContent and websocket:
+ await websocket.send_text(json.dumps({
+ "type": "playAudio",
+ "sessionId": sessionId,
+ "audio": {
+ "data": base64.b64encode(audioContent if isinstance(audioContent, bytes) else audioContent.encode()).decode(),
+ "format": "mp3",
+ },
+ }))
+ elif audioContent and not websocket:
+ logger.info(f"TTS audio generated for session {sessionId} (HTTP mode - no WebSocket for playback)")
+ except Exception as ttsErr:
+ logger.warning(f"TTS failed for session {sessionId}: {ttsErr}")
+ if responseType == TeamsbotResponseType.AUDIO:
+ responseType = TeamsbotResponseType.CHAT # Fallback to chat only
+
+ # 4b: Chat response (send text message to meeting chat)
+ if channel in (TeamsbotResponseChannel.CHAT, TeamsbotResponseChannel.BOTH):
+ try:
+ if websocket:
await websocket.send_text(json.dumps({
- "type": "playAudio",
+ "type": "sendChatMessage",
"sessionId": sessionId,
- "audio": {
- "data": base64.b64encode(audioContent if isinstance(audioContent, bytes) else audioContent.encode()).decode(),
- "format": "mp3",
- },
+ "text": speechResult.responseText,
}))
- elif audioContent and not websocket:
- logger.info(f"TTS audio generated for session {sessionId} (HTTP mode - no WebSocket for playback)")
- except Exception as ttsErr:
- logger.warning(f"TTS failed for session {sessionId}: {ttsErr}")
- responseType = TeamsbotResponseType.CHAT # Fallback to chat only
+ except Exception as chatErr:
+ logger.warning(f"Chat message send failed for session {sessionId}: {chatErr}")
# 4b: Store bot response
botResponseData = TeamsbotBotResponse(