feat(teamsbot): stop command detection, session context for AI, context summarization for long sessions

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-16 09:29:03 +01:00 · 2026-02-16 09:29:03 +01:00 · 4186ec6188
commit 4186ec6188
parent c7d1642f61
3 changed files with 94 additions and 1 deletions
--- a/modules/features/teamsbot/datamodelTeamsbot.py
+++ b/modules/features/teamsbot/datamodelTeamsbot.py
@ -78,6 +78,7 @@ class TeamsbotSession(BaseModel):
    startedByUserId: str = Field(description="User ID who started the session")
    bridgeSessionId: Optional[str] = Field(default=None, description="Session ID on the .NET Media Bridge")
    meetingChatId: Optional[str] = Field(default=None, description="Teams meeting chat ID for Graph API messages")
+    sessionContext: Optional[str] = Field(default=None, description="Custom context/knowledge provided by the user for this session")
    summary: Optional[str] = Field(default=None, description="AI-generated meeting summary (after session ends)")
    errorMessage: Optional[str] = Field(default=None, description="Error message if status is ERROR")
    transcriptSegmentCount: int = Field(default=0, description="Number of transcript segments in this session")
@ -200,6 +201,7 @@ class TeamsbotStartSessionRequest(BaseModel):
    backgroundImageUrl: Optional[str] = Field(default=None, description="Override background image for this session")
    connectionId: Optional[str] = Field(default=None, description="Microsoft connection ID for Graph API access")
    joinMode: Optional[TeamsbotJoinMode] = Field(default=None, description="How the bot joins: systemBot, anonymous, or userAccount. Defaults to systemBot if credentials configured, else anonymous.")
+    sessionContext: Optional[str] = Field(default=None, description="Custom context/knowledge to provide to the bot for this session (e.g. meeting agenda, documents, background info)")


 class TeamsbotSessionResponse(BaseModel):
--- a/modules/features/teamsbot/routeFeatureTeamsbot.py
+++ b/modules/features/teamsbot/routeFeatureTeamsbot.py
@ -178,6 +178,7 @@ async def startSession(
        meetingLink=cleanMeetingUrl,
        botName=body.botName or config.botName,
        backgroundImageUrl=body.backgroundImageUrl or config.backgroundImageUrl,
+        sessionContext=body.sessionContext,
        status=TeamsbotSessionStatus.PENDING,
        startedByUserId=str(context.user.id),
    ).model_dump()
--- a/modules/features/teamsbot/service.py
+++ b/modules/features/teamsbot/service.py
@ -77,6 +77,8 @@ class TeamsbotService:
        # State
        self._lastAiCallTime: float = 0.0
        self._contextBuffer: List[Dict[str, Any]] = []
+        self._sessionContext: Optional[str] = None  # User-provided background context
+        self._contextSummary: Optional[str] = None  # AI-generated summary of long context

    # =========================================================================
    # Session Lifecycle
@ -210,6 +212,13 @@ class TeamsbotService:
        interface = interfaceDb.getInterface(self.currentUser, self.mandateId, self.instanceId)
        voiceInterface = getVoiceInterface(self.currentUser, self.mandateId)

+        # Load session context (user-provided background knowledge)
+        session = interface.getSession(sessionId)
+        if session:
+            self._sessionContext = session.get("sessionContext")
+            if self._sessionContext:
+                logger.info(f"Session {sessionId}: Loaded session context ({len(self._sessionContext)} chars)")
+
        logger.info(f"[WS-DEBUG] WebSocket handler started for session {sessionId}")

        try:
@ -323,6 +332,23 @@ class TeamsbotService:
        if not text:
            return

+        # Check for STOP command: "<botname> stop" or "<botname> STOP"
+        # This immediately stops the bot from speaking and clears the audio queue.
+        botNameLower = self.config.botName.lower()
+        textLower = text.lower()
+        if botNameLower in textLower and "stop" in textLower:
+            logger.info(f"Session {sessionId}: STOP command detected: [{speaker}] {text[:60]}")
+            if websocket:
+                try:
+                    await websocket.send_text(json.dumps({
+                        "type": "stopAudio",
+                        "sessionId": sessionId,
+                    }))
+                except Exception as stopErr:
+                    logger.warning(f"Failed to send stop command: {stopErr}")
+            # Don't trigger AI analysis for stop commands
+            return
+
        # Filter out the bot's own speech from AI triggering.
        # The bot hears itself via captions — these should be stored in the
        # transcript for the record, but must NOT trigger AI analysis (feedback loop).
@ -351,6 +377,10 @@ class TeamsbotService:
        # Keep only last N segments
        maxSegments = self.config.contextWindowSegments
        if len(self._contextBuffer) > maxSegments:
+            # When buffer overflows, summarize the older half to preserve context
+            # without losing information. The summary replaces the old segments.
+            if not self._contextSummary and len(self._contextBuffer) > maxSegments * 1.5:
+                asyncio.create_task(self._summarizeContextBuffer(sessionId))
            self._contextBuffer = self._contextBuffer[-maxSegments:]

        # Emit SSE event for live transcript
@ -473,7 +503,17 @@ class TeamsbotService:
            else:
                contextLines.append(f"[{speaker}]: {text}")
        
-        transcriptContext = f"BOT_NAME:{self.config.botName}\n" + "\n".join(contextLines)
+        # Include session context if provided by the user at session start
+        sessionContextStr = ""
+        if self._sessionContext:
+            sessionContextStr = f"\nSESSION_CONTEXT (background knowledge provided by the user):\n{self._sessionContext}\n"
+
+        # Include summary of earlier conversation if available
+        summaryStr = ""
+        if self._contextSummary:
+            summaryStr = f"\nEARLIER_CONVERSATION_SUMMARY:\n{self._contextSummary}\n"
+
+        transcriptContext = f"BOT_NAME:{self.config.botName}{sessionContextStr}{summaryStr}\nRECENT_TRANSCRIPT:\n" + "\n".join(contextLines)

        # Call SPEECH_TEAMS
        try:
@ -633,6 +673,56 @@ class TeamsbotService:
            logger.error(f"SPEECH_TEAMS analysis failed for session {sessionId}: {type(e).__name__}: {e}", exc_info=True)
            await _emitSessionEvent(sessionId, "error", {"message": f"AI analysis failed: {type(e).__name__}: {str(e)}"})

+    # =========================================================================
+    # Context Summarization (for long sessions)
+    # =========================================================================
+
+    async def _summarizeContextBuffer(self, sessionId: str):
+        """Summarize the older part of the context buffer to preserve information
+        without exceeding the context window. This runs in the background."""
+        try:
+            if self._contextSummary:
+                return  # Already summarized recently
+
+            # Take the older half of the buffer for summarization
+            halfPoint = len(self._contextBuffer) // 2
+            oldSegments = self._contextBuffer[:halfPoint]
+
+            if len(oldSegments) < 10:
+                return  # Not enough to summarize
+
+            # Build text to summarize
+            lines = []
+            for seg in oldSegments:
+                speaker = seg.get("speaker", "Unknown")
+                text = seg.get("text", "")
+                lines.append(f"[{speaker}]: {text}")
+            textToSummarize = "\n".join(lines)
+
+            from modules.services.serviceAi.mainServiceAi import AiService
+            from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum
+
+            serviceContext = _ServiceContext(self.currentUser, self.mandateId, self.instanceId)
+            aiService = AiService(serviceCenter=serviceContext)
+            await aiService.ensureAiObjectsInitialized()
+
+            request = AiCallRequest(
+                prompt="Fasse das folgende Meeting-Transkript in 3-5 Saetzen zusammen. Nenne die wichtigsten Themen, Entscheidungen und offene Fragen. Antworte NUR mit der Zusammenfassung, keine Erklaerungen.",
+                context=textToSummarize,
+                options=AiCallOptions(
+                    operationType=OperationTypeEnum.DATA_ANALYSE,
+                    priority=PriorityEnum.SPEED,
+                )
+            )
+
+            response = await aiService.callAi(request)
+            if response and response.errorCount == 0:
+                self._contextSummary = response.content.strip()
+                logger.info(f"Session {sessionId}: Context summarized ({len(oldSegments)} segments -> {len(self._contextSummary)} chars)")
+
+        except Exception as e:
+            logger.warning(f"Context summarization failed for session {sessionId}: {e}")
+
    # =========================================================================
    # Meeting Summary
    # =========================================================================