feat(teamsbot): stop command detection, session context for AI, context summarization for long sessions
Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
parent
c7d1642f61
commit
4186ec6188
3 changed files with 94 additions and 1 deletions
|
|
@ -78,6 +78,7 @@ class TeamsbotSession(BaseModel):
|
||||||
startedByUserId: str = Field(description="User ID who started the session")
|
startedByUserId: str = Field(description="User ID who started the session")
|
||||||
bridgeSessionId: Optional[str] = Field(default=None, description="Session ID on the .NET Media Bridge")
|
bridgeSessionId: Optional[str] = Field(default=None, description="Session ID on the .NET Media Bridge")
|
||||||
meetingChatId: Optional[str] = Field(default=None, description="Teams meeting chat ID for Graph API messages")
|
meetingChatId: Optional[str] = Field(default=None, description="Teams meeting chat ID for Graph API messages")
|
||||||
|
sessionContext: Optional[str] = Field(default=None, description="Custom context/knowledge provided by the user for this session")
|
||||||
summary: Optional[str] = Field(default=None, description="AI-generated meeting summary (after session ends)")
|
summary: Optional[str] = Field(default=None, description="AI-generated meeting summary (after session ends)")
|
||||||
errorMessage: Optional[str] = Field(default=None, description="Error message if status is ERROR")
|
errorMessage: Optional[str] = Field(default=None, description="Error message if status is ERROR")
|
||||||
transcriptSegmentCount: int = Field(default=0, description="Number of transcript segments in this session")
|
transcriptSegmentCount: int = Field(default=0, description="Number of transcript segments in this session")
|
||||||
|
|
@ -200,6 +201,7 @@ class TeamsbotStartSessionRequest(BaseModel):
|
||||||
backgroundImageUrl: Optional[str] = Field(default=None, description="Override background image for this session")
|
backgroundImageUrl: Optional[str] = Field(default=None, description="Override background image for this session")
|
||||||
connectionId: Optional[str] = Field(default=None, description="Microsoft connection ID for Graph API access")
|
connectionId: Optional[str] = Field(default=None, description="Microsoft connection ID for Graph API access")
|
||||||
joinMode: Optional[TeamsbotJoinMode] = Field(default=None, description="How the bot joins: systemBot, anonymous, or userAccount. Defaults to systemBot if credentials configured, else anonymous.")
|
joinMode: Optional[TeamsbotJoinMode] = Field(default=None, description="How the bot joins: systemBot, anonymous, or userAccount. Defaults to systemBot if credentials configured, else anonymous.")
|
||||||
|
sessionContext: Optional[str] = Field(default=None, description="Custom context/knowledge to provide to the bot for this session (e.g. meeting agenda, documents, background info)")
|
||||||
|
|
||||||
|
|
||||||
class TeamsbotSessionResponse(BaseModel):
|
class TeamsbotSessionResponse(BaseModel):
|
||||||
|
|
|
||||||
|
|
@ -178,6 +178,7 @@ async def startSession(
|
||||||
meetingLink=cleanMeetingUrl,
|
meetingLink=cleanMeetingUrl,
|
||||||
botName=body.botName or config.botName,
|
botName=body.botName or config.botName,
|
||||||
backgroundImageUrl=body.backgroundImageUrl or config.backgroundImageUrl,
|
backgroundImageUrl=body.backgroundImageUrl or config.backgroundImageUrl,
|
||||||
|
sessionContext=body.sessionContext,
|
||||||
status=TeamsbotSessionStatus.PENDING,
|
status=TeamsbotSessionStatus.PENDING,
|
||||||
startedByUserId=str(context.user.id),
|
startedByUserId=str(context.user.id),
|
||||||
).model_dump()
|
).model_dump()
|
||||||
|
|
|
||||||
|
|
@ -77,6 +77,8 @@ class TeamsbotService:
|
||||||
# State
|
# State
|
||||||
self._lastAiCallTime: float = 0.0
|
self._lastAiCallTime: float = 0.0
|
||||||
self._contextBuffer: List[Dict[str, Any]] = []
|
self._contextBuffer: List[Dict[str, Any]] = []
|
||||||
|
self._sessionContext: Optional[str] = None # User-provided background context
|
||||||
|
self._contextSummary: Optional[str] = None # AI-generated summary of long context
|
||||||
|
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
# Session Lifecycle
|
# Session Lifecycle
|
||||||
|
|
@ -210,6 +212,13 @@ class TeamsbotService:
|
||||||
interface = interfaceDb.getInterface(self.currentUser, self.mandateId, self.instanceId)
|
interface = interfaceDb.getInterface(self.currentUser, self.mandateId, self.instanceId)
|
||||||
voiceInterface = getVoiceInterface(self.currentUser, self.mandateId)
|
voiceInterface = getVoiceInterface(self.currentUser, self.mandateId)
|
||||||
|
|
||||||
|
# Load session context (user-provided background knowledge)
|
||||||
|
session = interface.getSession(sessionId)
|
||||||
|
if session:
|
||||||
|
self._sessionContext = session.get("sessionContext")
|
||||||
|
if self._sessionContext:
|
||||||
|
logger.info(f"Session {sessionId}: Loaded session context ({len(self._sessionContext)} chars)")
|
||||||
|
|
||||||
logger.info(f"[WS-DEBUG] WebSocket handler started for session {sessionId}")
|
logger.info(f"[WS-DEBUG] WebSocket handler started for session {sessionId}")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
@ -323,6 +332,23 @@ class TeamsbotService:
|
||||||
if not text:
|
if not text:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
# Check for STOP command: "<botname> stop" or "<botname> STOP"
|
||||||
|
# This immediately stops the bot from speaking and clears the audio queue.
|
||||||
|
botNameLower = self.config.botName.lower()
|
||||||
|
textLower = text.lower()
|
||||||
|
if botNameLower in textLower and "stop" in textLower:
|
||||||
|
logger.info(f"Session {sessionId}: STOP command detected: [{speaker}] {text[:60]}")
|
||||||
|
if websocket:
|
||||||
|
try:
|
||||||
|
await websocket.send_text(json.dumps({
|
||||||
|
"type": "stopAudio",
|
||||||
|
"sessionId": sessionId,
|
||||||
|
}))
|
||||||
|
except Exception as stopErr:
|
||||||
|
logger.warning(f"Failed to send stop command: {stopErr}")
|
||||||
|
# Don't trigger AI analysis for stop commands
|
||||||
|
return
|
||||||
|
|
||||||
# Filter out the bot's own speech from AI triggering.
|
# Filter out the bot's own speech from AI triggering.
|
||||||
# The bot hears itself via captions — these should be stored in the
|
# The bot hears itself via captions — these should be stored in the
|
||||||
# transcript for the record, but must NOT trigger AI analysis (feedback loop).
|
# transcript for the record, but must NOT trigger AI analysis (feedback loop).
|
||||||
|
|
@ -351,6 +377,10 @@ class TeamsbotService:
|
||||||
# Keep only last N segments
|
# Keep only last N segments
|
||||||
maxSegments = self.config.contextWindowSegments
|
maxSegments = self.config.contextWindowSegments
|
||||||
if len(self._contextBuffer) > maxSegments:
|
if len(self._contextBuffer) > maxSegments:
|
||||||
|
# When buffer overflows, summarize the older half to preserve context
|
||||||
|
# without losing information. The summary replaces the old segments.
|
||||||
|
if not self._contextSummary and len(self._contextBuffer) > maxSegments * 1.5:
|
||||||
|
asyncio.create_task(self._summarizeContextBuffer(sessionId))
|
||||||
self._contextBuffer = self._contextBuffer[-maxSegments:]
|
self._contextBuffer = self._contextBuffer[-maxSegments:]
|
||||||
|
|
||||||
# Emit SSE event for live transcript
|
# Emit SSE event for live transcript
|
||||||
|
|
@ -473,7 +503,17 @@ class TeamsbotService:
|
||||||
else:
|
else:
|
||||||
contextLines.append(f"[{speaker}]: {text}")
|
contextLines.append(f"[{speaker}]: {text}")
|
||||||
|
|
||||||
transcriptContext = f"BOT_NAME:{self.config.botName}\n" + "\n".join(contextLines)
|
# Include session context if provided by the user at session start
|
||||||
|
sessionContextStr = ""
|
||||||
|
if self._sessionContext:
|
||||||
|
sessionContextStr = f"\nSESSION_CONTEXT (background knowledge provided by the user):\n{self._sessionContext}\n"
|
||||||
|
|
||||||
|
# Include summary of earlier conversation if available
|
||||||
|
summaryStr = ""
|
||||||
|
if self._contextSummary:
|
||||||
|
summaryStr = f"\nEARLIER_CONVERSATION_SUMMARY:\n{self._contextSummary}\n"
|
||||||
|
|
||||||
|
transcriptContext = f"BOT_NAME:{self.config.botName}{sessionContextStr}{summaryStr}\nRECENT_TRANSCRIPT:\n" + "\n".join(contextLines)
|
||||||
|
|
||||||
# Call SPEECH_TEAMS
|
# Call SPEECH_TEAMS
|
||||||
try:
|
try:
|
||||||
|
|
@ -633,6 +673,56 @@ class TeamsbotService:
|
||||||
logger.error(f"SPEECH_TEAMS analysis failed for session {sessionId}: {type(e).__name__}: {e}", exc_info=True)
|
logger.error(f"SPEECH_TEAMS analysis failed for session {sessionId}: {type(e).__name__}: {e}", exc_info=True)
|
||||||
await _emitSessionEvent(sessionId, "error", {"message": f"AI analysis failed: {type(e).__name__}: {str(e)}"})
|
await _emitSessionEvent(sessionId, "error", {"message": f"AI analysis failed: {type(e).__name__}: {str(e)}"})
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Context Summarization (for long sessions)
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
async def _summarizeContextBuffer(self, sessionId: str):
|
||||||
|
"""Summarize the older part of the context buffer to preserve information
|
||||||
|
without exceeding the context window. This runs in the background."""
|
||||||
|
try:
|
||||||
|
if self._contextSummary:
|
||||||
|
return # Already summarized recently
|
||||||
|
|
||||||
|
# Take the older half of the buffer for summarization
|
||||||
|
halfPoint = len(self._contextBuffer) // 2
|
||||||
|
oldSegments = self._contextBuffer[:halfPoint]
|
||||||
|
|
||||||
|
if len(oldSegments) < 10:
|
||||||
|
return # Not enough to summarize
|
||||||
|
|
||||||
|
# Build text to summarize
|
||||||
|
lines = []
|
||||||
|
for seg in oldSegments:
|
||||||
|
speaker = seg.get("speaker", "Unknown")
|
||||||
|
text = seg.get("text", "")
|
||||||
|
lines.append(f"[{speaker}]: {text}")
|
||||||
|
textToSummarize = "\n".join(lines)
|
||||||
|
|
||||||
|
from modules.services.serviceAi.mainServiceAi import AiService
|
||||||
|
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum
|
||||||
|
|
||||||
|
serviceContext = _ServiceContext(self.currentUser, self.mandateId, self.instanceId)
|
||||||
|
aiService = AiService(serviceCenter=serviceContext)
|
||||||
|
await aiService.ensureAiObjectsInitialized()
|
||||||
|
|
||||||
|
request = AiCallRequest(
|
||||||
|
prompt="Fasse das folgende Meeting-Transkript in 3-5 Saetzen zusammen. Nenne die wichtigsten Themen, Entscheidungen und offene Fragen. Antworte NUR mit der Zusammenfassung, keine Erklaerungen.",
|
||||||
|
context=textToSummarize,
|
||||||
|
options=AiCallOptions(
|
||||||
|
operationType=OperationTypeEnum.DATA_ANALYSE,
|
||||||
|
priority=PriorityEnum.SPEED,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
response = await aiService.callAi(request)
|
||||||
|
if response and response.errorCount == 0:
|
||||||
|
self._contextSummary = response.content.strip()
|
||||||
|
logger.info(f"Session {sessionId}: Context summarized ({len(oldSegments)} segments -> {len(self._contextSummary)} chars)")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Context summarization failed for session {sessionId}: {e}")
|
||||||
|
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
# Meeting Summary
|
# Meeting Summary
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue