feat(teamsbot): stop via AI intent (multilingual), initial session context summarization

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-16 10:03:21 +01:00 · 2026-02-16 10:03:21 +01:00 · a67c7f8b81
commit a67c7f8b81
parent 4186ec6188
3 changed files with 68 additions and 22 deletions
--- a/modules/features/teamsbot/datamodelTeamsbot.py
+++ b/modules/features/teamsbot/datamodelTeamsbot.py
@ -36,6 +36,7 @@ class TeamsbotDetectedIntent(str, Enum):
    ADDRESSED = "addressed"       # Bot was directly addressed
    QUESTION = "question"         # A general question was asked
    PROACTIVE = "proactive"       # Bot has a valuable proactive contribution
    STOP = "stop"                 # User asked the bot to stop/be quiet
    NONE = "none"                 # No action needed
--- a/modules/features/teamsbot/service.py
+++ b/modules/features/teamsbot/service.py
@ -213,11 +213,17 @@ class TeamsbotService:
        voiceInterface = getVoiceInterface(self.currentUser, self.mandateId)
        # Load session context (user-provided background knowledge)
        # If the context is long (>500 chars), summarize it to reduce token usage
        session = interface.getSession(sessionId)
        if session:
-            self._sessionContext = session.get("sessionContext")
+            rawContext = session.get("sessionContext")
            if rawContext and len(rawContext) > 500:
                logger.info(f"Session {sessionId}: Summarizing long session context ({len(rawContext)} chars)...")
                self._sessionContext = await self._summarizeSessionContext(sessionId, rawContext)
            elif rawContext:
                self._sessionContext = rawContext
            if self._sessionContext:
-                logger.info(f"Session {sessionId}: Loaded session context ({len(self._sessionContext)} chars)")
+                logger.info(f"Session {sessionId}: Session context ready ({len(self._sessionContext)} chars)")
        logger.info(f"[WS-DEBUG] WebSocket handler started for session {sessionId}")
@ -332,23 +338,6 @@ class TeamsbotService:
        if not text:
            return
        # Check for STOP command: "<botname> stop" or "<botname> STOP"
        # This immediately stops the bot from speaking and clears the audio queue.
        botNameLower = self.config.botName.lower()
        textLower = text.lower()
        if botNameLower in textLower and "stop" in textLower:
            logger.info(f"Session {sessionId}: STOP command detected: [{speaker}] {text[:60]}")
            if websocket:
                try:
                    await websocket.send_text(json.dumps({
                        "type": "stopAudio",
                        "sessionId": sessionId,
                    }))
                except Exception as stopErr:
                    logger.warning(f"Failed to send stop command: {stopErr}")
            # Don't trigger AI analysis for stop commands
            return
        # Filter out the bot's own speech from AI triggering.
        # The bot hears itself via captions — these should be stored in the
        # transcript for the record, but must NOT trigger AI analysis (feedback loop).
@ -571,7 +560,20 @@ class TeamsbotService:
                "priceCHF": response.priceCHF,
            })
-            # Step 4: Respond if AI decided to
+            # Step 4a: Handle STOP intent -- stop audio immediately
            if speechResult.detectedIntent == "stop":
                logger.info(f"Session {sessionId}: AI detected STOP intent: {speechResult.reasoning}")
                if websocket:
                    try:
                        await websocket.send_text(json.dumps({
                            "type": "stopAudio",
                            "sessionId": sessionId,
                        }))
                    except Exception as stopErr:
                        logger.warning(f"Failed to send stop command: {stopErr}")
                return
            # Step 4b: Respond if AI decided to
            if speechResult.shouldRespond and speechResult.responseText:
                if self.config.responseMode == TeamsbotResponseMode.MANUAL:
@ -677,6 +679,42 @@ class TeamsbotService:
    # Context Summarization (for long sessions)
    # =========================================================================
    async def _summarizeSessionContext(self, sessionId: str, rawContext: str) -> str:
        """Summarize a long user-provided session context to its essential points.
        This reduces token usage in every subsequent AI call."""
        try:
            from modules.services.serviceAi.mainServiceAi import AiService
            from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum
            serviceContext = _ServiceContext(self.currentUser, self.mandateId, self.instanceId)
            aiService = AiService(serviceCenter=serviceContext)
            await aiService.ensureAiObjectsInitialized()
            request = AiCallRequest(
                prompt=(
                    "Fasse den folgenden Kontext auf die wesentlichen Punkte zusammen. "
                    "Behalte alle wichtigen Fakten, Namen, Zahlen, Entscheidungen und Aktionspunkte. "
                    "Entferne Fuelltext und Wiederholungen. "
                    "Antworte NUR mit der Zusammenfassung, keine Erklaerungen oder Einleitungen."
                ),
                context=rawContext,
                options=AiCallOptions(
                    operationType=OperationTypeEnum.DATA_ANALYSE,
                    priority=PriorityEnum.SPEED,
                )
            )
            response = await aiService.callAi(request)
            if response and response.errorCount == 0 and response.content:
                summary = response.content.strip()
                logger.info(f"Session {sessionId}: Context summarized from {len(rawContext)} to {len(summary)} chars")
                return summary
        except Exception as e:
            logger.warning(f"Session context summarization failed for {sessionId}: {e}")
        # Fallback: return original (truncated if very long)
        return rawContext[:2000] if len(rawContext) > 2000 else rawContext
    async def _summarizeContextBuffer(self, sessionId: str):
        """Summarize the older part of the context buffer to preserve information
        without exceeding the context window. This runs in the background."""
--- a/modules/services/serviceAi/mainServiceAi.py
+++ b/modules/services/serviceAi/mainServiceAi.py
@ -357,7 +357,13 @@ ANTWORT-STIL (wenn du antwortest):
 - NICHT "Ich bin {botName} und ich bin hier um zu helfen" wiederholen
 - NICHT frueheres wiederholen das du schon gesagt hast
 - Max 1-2 Saetze, praezise auf den Punkt
- Sieh dir an was du (markiert als [YOU]) bereits gesagt hast und wiederhole es NICHT"""
+- Sieh dir an was du (markiert als [YOU]) bereits gesagt hast und wiederhole es NICHT
 STOP-ERKENNUNG:
 Wenn jemand dich bittet aufzuhoeren, still zu sein, zu stoppen, oder nicht mehr zu reden
 (in JEDER Sprache, z.B. "Shelly stop", "Shelly sei still", "Shelly halt", "Shelly be quiet",
 "Shelly shut up", "Shelly arrete", etc.), dann setze detectedIntent auf "stop" und
 shouldRespond auf false. Du musst NICHT antworten wenn jemand dich stoppt."""
        # Append user-configured instructions if provided
        if userSystemPrompt and userSystemPrompt.strip():
@ -370,13 +376,14 @@ WICHTIG: Antworte IMMER als valides JSON in exakt diesem Format:
    "shouldRespond": true/false,
    "responseText": "Deine Antwort hier" oder null,
    "reasoning": "Kurze Begruendung deiner Entscheidung",
-    "detectedIntent": "addressed" | "question" | "proactive" | "none"
+    "detectedIntent": "addressed" | "question" | "proactive" | "stop" | "none"
 }}
 detectedIntent-Werte:
 - "addressed": {botName} wurde direkt angesprochen
 - "question": Eine allgemeine Frage wurde gestellt
 - "proactive": Du hast einen wertvollen proaktiven Beitrag
 - "stop": Der User bittet {botName} aufzuhoeren/still zu sein (in jeder Sprache)
 - "none": Kein Handlungsbedarf"""
        return basePrompt