feat(teamsbot): chat message handling, response channel (voice/chat/both), join mode (systemBot/anonymous/userAccount)
Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
parent
cc86b144ac
commit
ef813a9304
3 changed files with 115 additions and 29 deletions
|
|
@ -46,6 +46,20 @@ class TeamsbotResponseMode(str, Enum):
|
||||||
TRANSCRIBE_ONLY = "transcribeOnly" # Only transcribe, no AI responses
|
TRANSCRIBE_ONLY = "transcribeOnly" # Only transcribe, no AI responses
|
||||||
|
|
||||||
|
|
||||||
|
class TeamsbotResponseChannel(str, Enum):
|
||||||
|
"""Channel for bot responses."""
|
||||||
|
VOICE = "voice" # Bot responds only via voice (TTS)
|
||||||
|
CHAT = "chat" # Bot responds only via chat message
|
||||||
|
BOTH = "both" # Bot responds via voice AND chat
|
||||||
|
|
||||||
|
|
||||||
|
class TeamsbotJoinMode(str, Enum):
|
||||||
|
"""How the bot joins the meeting."""
|
||||||
|
SYSTEM_BOT = "systemBot" # Join with system bot account (backend-managed credentials)
|
||||||
|
ANONYMOUS = "anonymous" # Join as anonymous guest
|
||||||
|
USER_ACCOUNT = "userAccount" # Join with user's own Microsoft account (OAuth)
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
# Database Models (stored in PostgreSQL)
|
# Database Models (stored in PostgreSQL)
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
|
|
@ -106,7 +120,7 @@ class TeamsbotBotResponse(BaseModel):
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
|
|
||||||
class TeamsbotConfig(BaseModel):
|
class TeamsbotConfig(BaseModel):
|
||||||
"""Configuration for a Teams Bot feature instance."""
|
"""Configuration for a Teams Bot feature instance (serves as default template for new users)."""
|
||||||
botName: str = Field(default="AI Assistant", description="Default bot display name")
|
botName: str = Field(default="AI Assistant", description="Default bot display name")
|
||||||
backgroundImageUrl: Optional[str] = Field(default=None, description="Default background image URL")
|
backgroundImageUrl: Optional[str] = Field(default=None, description="Default background image URL")
|
||||||
aiSystemPrompt: str = Field(
|
aiSystemPrompt: str = Field(
|
||||||
|
|
@ -114,6 +128,7 @@ class TeamsbotConfig(BaseModel):
|
||||||
description="Custom system prompt for the AI analysis"
|
description="Custom system prompt for the AI analysis"
|
||||||
)
|
)
|
||||||
responseMode: TeamsbotResponseMode = Field(default=TeamsbotResponseMode.AUTO, description="How the bot responds")
|
responseMode: TeamsbotResponseMode = Field(default=TeamsbotResponseMode.AUTO, description="How the bot responds")
|
||||||
|
responseChannel: TeamsbotResponseChannel = Field(default=TeamsbotResponseChannel.VOICE, description="Channel for bot responses: voice, chat, or both")
|
||||||
language: str = Field(default="de-DE", description="Primary language for STT/TTS")
|
language: str = Field(default="de-DE", description="Primary language for STT/TTS")
|
||||||
voiceId: Optional[str] = Field(default=None, description="Google TTS voice ID (e.g., de-DE-Standard-A)")
|
voiceId: Optional[str] = Field(default=None, description="Google TTS voice ID (e.g., de-DE-Standard-A)")
|
||||||
browserBotUrl: Optional[str] = Field(default=None, description="URL of the Browser Bot service. Falls back to TEAMSBOT_BROWSER_BOT_URL env variable if not set per-instance.")
|
browserBotUrl: Optional[str] = Field(default=None, description="URL of the Browser Bot service. Falls back to TEAMSBOT_BROWSER_BOT_URL env variable if not set per-instance.")
|
||||||
|
|
@ -141,6 +156,7 @@ class TeamsbotStartSessionRequest(BaseModel):
|
||||||
botName: Optional[str] = Field(default=None, description="Override bot name for this session")
|
botName: Optional[str] = Field(default=None, description="Override bot name for this session")
|
||||||
backgroundImageUrl: Optional[str] = Field(default=None, description="Override background image for this session")
|
backgroundImageUrl: Optional[str] = Field(default=None, description="Override background image for this session")
|
||||||
connectionId: Optional[str] = Field(default=None, description="Microsoft connection ID for Graph API access")
|
connectionId: Optional[str] = Field(default=None, description="Microsoft connection ID for Graph API access")
|
||||||
|
joinMode: Optional[TeamsbotJoinMode] = Field(default=None, description="How the bot joins: systemBot, anonymous, or userAccount. Defaults to systemBot if credentials configured, else anonymous.")
|
||||||
|
|
||||||
|
|
||||||
class TeamsbotSessionResponse(BaseModel):
|
class TeamsbotSessionResponse(BaseModel):
|
||||||
|
|
@ -156,6 +172,7 @@ class TeamsbotConfigUpdateRequest(BaseModel):
|
||||||
backgroundImageUrl: Optional[str] = None
|
backgroundImageUrl: Optional[str] = None
|
||||||
aiSystemPrompt: Optional[str] = None
|
aiSystemPrompt: Optional[str] = None
|
||||||
responseMode: Optional[TeamsbotResponseMode] = None
|
responseMode: Optional[TeamsbotResponseMode] = None
|
||||||
|
responseChannel: Optional[TeamsbotResponseChannel] = None
|
||||||
language: Optional[str] = None
|
language: Optional[str] = None
|
||||||
voiceId: Optional[str] = None
|
voiceId: Optional[str] = None
|
||||||
browserBotUrl: Optional[str] = None
|
browserBotUrl: Optional[str] = None
|
||||||
|
|
|
||||||
|
|
@ -30,6 +30,7 @@ from .datamodelTeamsbot import (
|
||||||
TeamsbotSessionResponse,
|
TeamsbotSessionResponse,
|
||||||
TeamsbotConfigUpdateRequest,
|
TeamsbotConfigUpdateRequest,
|
||||||
TeamsbotConfig,
|
TeamsbotConfig,
|
||||||
|
TeamsbotJoinMode,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Import service
|
# Import service
|
||||||
|
|
@ -186,8 +187,35 @@ async def startSession(
|
||||||
appApiUrl = APP_CONFIG.get("APP_API_URL", "")
|
appApiUrl = APP_CONFIG.get("APP_API_URL", "")
|
||||||
gatewayBaseUrl = appApiUrl.rstrip("/") if appApiUrl else str(request.base_url).rstrip("/")
|
gatewayBaseUrl = appApiUrl.rstrip("/") if appApiUrl else str(request.base_url).rstrip("/")
|
||||||
|
|
||||||
|
# Determine effective join mode
|
||||||
|
joinMode = body.joinMode
|
||||||
|
if not joinMode:
|
||||||
|
# Default: use system bot if credentials are configured, otherwise anonymous
|
||||||
|
if config.botAccountEmail and config.botAccountPassword:
|
||||||
|
joinMode = TeamsbotJoinMode.SYSTEM_BOT
|
||||||
|
else:
|
||||||
|
joinMode = TeamsbotJoinMode.ANONYMOUS
|
||||||
|
|
||||||
|
# Resolve credentials based on join mode
|
||||||
|
effectiveEmail = None
|
||||||
|
effectivePassword = None
|
||||||
|
if joinMode == TeamsbotJoinMode.SYSTEM_BOT:
|
||||||
|
effectiveEmail = config.botAccountEmail
|
||||||
|
effectivePassword = config.botAccountPassword
|
||||||
|
elif joinMode == TeamsbotJoinMode.USER_ACCOUNT:
|
||||||
|
# TODO: Resolve OAuth token from user's Microsoft connection
|
||||||
|
logger.info(f"User account join mode requested but not yet implemented - falling back to anonymous")
|
||||||
|
joinMode = TeamsbotJoinMode.ANONYMOUS
|
||||||
|
# ANONYMOUS mode: no credentials
|
||||||
|
|
||||||
|
# Temporarily override config credentials for this session's join mode
|
||||||
|
sessionConfig = config.model_copy(update={
|
||||||
|
"botAccountEmail": effectiveEmail,
|
||||||
|
"botAccountPassword": effectivePassword,
|
||||||
|
})
|
||||||
|
|
||||||
# Start the bot in background (join meeting via bridge)
|
# Start the bot in background (join meeting via bridge)
|
||||||
service = TeamsbotService(context.user, mandateId, instanceId, config)
|
service = TeamsbotService(context.user, mandateId, instanceId, sessionConfig)
|
||||||
asyncio.create_task(
|
asyncio.create_task(
|
||||||
service.joinMeeting(sessionId, cleanMeetingUrl, body.connectionId, gatewayBaseUrl)
|
service.joinMeeting(sessionId, cleanMeetingUrl, body.connectionId, gatewayBaseUrl)
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -25,6 +25,7 @@ from .datamodelTeamsbot import (
|
||||||
TeamsbotResponseType,
|
TeamsbotResponseType,
|
||||||
TeamsbotConfig,
|
TeamsbotConfig,
|
||||||
TeamsbotResponseMode,
|
TeamsbotResponseMode,
|
||||||
|
TeamsbotResponseChannel,
|
||||||
SpeechTeamsResponse,
|
SpeechTeamsResponse,
|
||||||
)
|
)
|
||||||
from .browserBotConnector import BrowserBotConnector
|
from .browserBotConnector import BrowserBotConnector
|
||||||
|
|
@ -238,6 +239,20 @@ class TeamsbotService:
|
||||||
websocket=websocket,
|
websocket=websocket,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
elif msgType == "chatMessage":
|
||||||
|
chat = message.get("chat", {})
|
||||||
|
logger.info(f"[WS-DEBUG] Chat message received: speaker={chat.get('speaker')}, text={chat.get('text', '')[:60]}...")
|
||||||
|
await self._processTranscript(
|
||||||
|
sessionId=sessionId,
|
||||||
|
speaker=chat.get("speaker", "Unknown"),
|
||||||
|
text=chat.get("text", ""),
|
||||||
|
isFinal=True,
|
||||||
|
interface=interface,
|
||||||
|
voiceInterface=voiceInterface,
|
||||||
|
websocket=websocket,
|
||||||
|
source="chat",
|
||||||
|
)
|
||||||
|
|
||||||
elif msgType == "status":
|
elif msgType == "status":
|
||||||
status = message.get("status")
|
status = message.get("status")
|
||||||
errorMessage = message.get("message")
|
errorMessage = message.get("message")
|
||||||
|
|
@ -300,8 +315,9 @@ class TeamsbotService:
|
||||||
interface,
|
interface,
|
||||||
voiceInterface,
|
voiceInterface,
|
||||||
websocket: WebSocket,
|
websocket: WebSocket,
|
||||||
|
source: str = "caption",
|
||||||
):
|
):
|
||||||
"""Process a transcript segment from the browser bot's caption scraping."""
|
"""Process a transcript segment from captions or chat messages."""
|
||||||
|
|
||||||
text = text.strip()
|
text = text.strip()
|
||||||
if not text:
|
if not text:
|
||||||
|
|
@ -325,11 +341,12 @@ class TeamsbotService:
|
||||||
|
|
||||||
createdTranscript = interface.createTranscript(transcriptData)
|
createdTranscript = interface.createTranscript(transcriptData)
|
||||||
|
|
||||||
# Update context buffer
|
# Update context buffer (mark source for chat messages)
|
||||||
self._contextBuffer.append({
|
self._contextBuffer.append({
|
||||||
"speaker": speaker or "Unknown",
|
"speaker": speaker or "Unknown",
|
||||||
"text": text,
|
"text": text,
|
||||||
"timestamp": getUtcTimestamp(),
|
"timestamp": getUtcTimestamp(),
|
||||||
|
"source": source,
|
||||||
})
|
})
|
||||||
# Keep only last N segments
|
# Keep only last N segments
|
||||||
maxSegments = self.config.contextWindowSegments
|
maxSegments = self.config.contextWindowSegments
|
||||||
|
|
@ -442,13 +459,17 @@ class TeamsbotService:
|
||||||
self._lastAiCallTime = time.time()
|
self._lastAiCallTime = time.time()
|
||||||
|
|
||||||
# Build transcript context from buffer.
|
# Build transcript context from buffer.
|
||||||
# Mark bot's own utterances so the AI knows what it already said.
|
# Mark bot's own utterances and chat messages for the AI.
|
||||||
contextLines = []
|
contextLines = []
|
||||||
for segment in self._contextBuffer:
|
for segment in self._contextBuffer:
|
||||||
speaker = segment.get("speaker", "Unknown")
|
speaker = segment.get("speaker", "Unknown")
|
||||||
text = segment.get("text", "")
|
text = segment.get("text", "")
|
||||||
|
segSource = segment.get("source", "caption")
|
||||||
|
prefix = "Chat" if segSource == "chat" else ""
|
||||||
if self._isBotSpeaker(speaker):
|
if self._isBotSpeaker(speaker):
|
||||||
contextLines.append(f"[YOU ({self.config.botName})]: {text}")
|
contextLines.append(f"[YOU ({self.config.botName})]: {text}")
|
||||||
|
elif prefix:
|
||||||
|
contextLines.append(f"[{prefix}: {speaker}]: {text}")
|
||||||
else:
|
else:
|
||||||
contextLines.append(f"[{speaker}]: {text}")
|
contextLines.append(f"[{speaker}]: {text}")
|
||||||
|
|
||||||
|
|
@ -522,10 +543,19 @@ class TeamsbotService:
|
||||||
})
|
})
|
||||||
return
|
return
|
||||||
|
|
||||||
# Auto mode: send voice + chat response
|
# Determine response channel (voice, chat, or both)
|
||||||
|
channel = self.config.responseChannel
|
||||||
responseType = TeamsbotResponseType.BOTH
|
responseType = TeamsbotResponseType.BOTH
|
||||||
|
|
||||||
# 4a: TTS -> Audio to bridge
|
if channel == TeamsbotResponseChannel.VOICE:
|
||||||
|
responseType = TeamsbotResponseType.AUDIO
|
||||||
|
elif channel == TeamsbotResponseChannel.CHAT:
|
||||||
|
responseType = TeamsbotResponseType.CHAT
|
||||||
|
else:
|
||||||
|
responseType = TeamsbotResponseType.BOTH
|
||||||
|
|
||||||
|
# 4a: Voice response (TTS -> Audio to bot)
|
||||||
|
if channel in (TeamsbotResponseChannel.VOICE, TeamsbotResponseChannel.BOTH):
|
||||||
try:
|
try:
|
||||||
ttsResult = await voiceInterface.textToSpeech(
|
ttsResult = await voiceInterface.textToSpeech(
|
||||||
text=speechResult.responseText,
|
text=speechResult.responseText,
|
||||||
|
|
@ -536,8 +566,6 @@ class TeamsbotService:
|
||||||
if ttsResult and isinstance(ttsResult, dict):
|
if ttsResult and isinstance(ttsResult, dict):
|
||||||
audioContent = ttsResult.get("audioContent")
|
audioContent = ttsResult.get("audioContent")
|
||||||
if audioContent and websocket:
|
if audioContent and websocket:
|
||||||
# Send TTS audio to bot via WebSocket
|
|
||||||
# Bot expects: {type: "playAudio", sessionId, audio: {data, format}}
|
|
||||||
await websocket.send_text(json.dumps({
|
await websocket.send_text(json.dumps({
|
||||||
"type": "playAudio",
|
"type": "playAudio",
|
||||||
"sessionId": sessionId,
|
"sessionId": sessionId,
|
||||||
|
|
@ -550,8 +578,21 @@ class TeamsbotService:
|
||||||
logger.info(f"TTS audio generated for session {sessionId} (HTTP mode - no WebSocket for playback)")
|
logger.info(f"TTS audio generated for session {sessionId} (HTTP mode - no WebSocket for playback)")
|
||||||
except Exception as ttsErr:
|
except Exception as ttsErr:
|
||||||
logger.warning(f"TTS failed for session {sessionId}: {ttsErr}")
|
logger.warning(f"TTS failed for session {sessionId}: {ttsErr}")
|
||||||
|
if responseType == TeamsbotResponseType.AUDIO:
|
||||||
responseType = TeamsbotResponseType.CHAT # Fallback to chat only
|
responseType = TeamsbotResponseType.CHAT # Fallback to chat only
|
||||||
|
|
||||||
|
# 4b: Chat response (send text message to meeting chat)
|
||||||
|
if channel in (TeamsbotResponseChannel.CHAT, TeamsbotResponseChannel.BOTH):
|
||||||
|
try:
|
||||||
|
if websocket:
|
||||||
|
await websocket.send_text(json.dumps({
|
||||||
|
"type": "sendChatMessage",
|
||||||
|
"sessionId": sessionId,
|
||||||
|
"text": speechResult.responseText,
|
||||||
|
}))
|
||||||
|
except Exception as chatErr:
|
||||||
|
logger.warning(f"Chat message send failed for session {sessionId}: {chatErr}")
|
||||||
|
|
||||||
# 4b: Store bot response
|
# 4b: Store bot response
|
||||||
botResponseData = TeamsbotBotResponse(
|
botResponseData = TeamsbotBotResponse(
|
||||||
sessionId=sessionId,
|
sessionId=sessionId,
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue