feat(teamsbot): stop via AI intent (multilingual), initial session context summarization

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-16 10:03:21 +01:00 · 2026-02-16 10:03:21 +01:00 · a67c7f8b81
commit a67c7f8b81
parent 4186ec6188
3 changed files with 68 additions and 22 deletions
--- a/modules/features/teamsbot/datamodelTeamsbot.py
+++ b/modules/features/teamsbot/datamodelTeamsbot.py
@ -36,6 +36,7 @@ class TeamsbotDetectedIntent(str, Enum):
    ADDRESSED = "addressed"       # Bot was directly addressed
    QUESTION = "question"         # A general question was asked
    PROACTIVE = "proactive"       # Bot has a valuable proactive contribution
+    STOP = "stop"                 # User asked the bot to stop/be quiet
    NONE = "none"                 # No action needed


--- a/modules/features/teamsbot/service.py
+++ b/modules/features/teamsbot/service.py
@ -213,11 +213,17 @@ class TeamsbotService:
        voiceInterface = getVoiceInterface(self.currentUser, self.mandateId)

        # Load session context (user-provided background knowledge)
+        # If the context is long (>500 chars), summarize it to reduce token usage
        session = interface.getSession(sessionId)
        if session:
-            self._sessionContext = session.get("sessionContext")
+            rawContext = session.get("sessionContext")
+            if rawContext and len(rawContext) > 500:
+                logger.info(f"Session {sessionId}: Summarizing long session context ({len(rawContext)} chars)...")
+                self._sessionContext = await self._summarizeSessionContext(sessionId, rawContext)
+            elif rawContext:
+                self._sessionContext = rawContext
            if self._sessionContext:
-                logger.info(f"Session {sessionId}: Loaded session context ({len(self._sessionContext)} chars)")
+                logger.info(f"Session {sessionId}: Session context ready ({len(self._sessionContext)} chars)")

        logger.info(f"[WS-DEBUG] WebSocket handler started for session {sessionId}")

@ -332,23 +338,6 @@ class TeamsbotService:
        if not text:
            return

-        # Check for STOP command: "<botname> stop" or "<botname> STOP"
-        # This immediately stops the bot from speaking and clears the audio queue.
-        botNameLower = self.config.botName.lower()
-        textLower = text.lower()
-        if botNameLower in textLower and "stop" in textLower:
-            logger.info(f"Session {sessionId}: STOP command detected: [{speaker}] {text[:60]}")
-            if websocket:
-                try:
-                    await websocket.send_text(json.dumps({
-                        "type": "stopAudio",
-                        "sessionId": sessionId,
-                    }))
-                except Exception as stopErr:
-                    logger.warning(f"Failed to send stop command: {stopErr}")
-            # Don't trigger AI analysis for stop commands
-            return
-
        # Filter out the bot's own speech from AI triggering.
        # The bot hears itself via captions — these should be stored in the
        # transcript for the record, but must NOT trigger AI analysis (feedback loop).
@ -571,7 +560,20 @@ class TeamsbotService:
                "priceCHF": response.priceCHF,
            })

-            # Step 4: Respond if AI decided to
+            # Step 4a: Handle STOP intent -- stop audio immediately
+            if speechResult.detectedIntent == "stop":
+                logger.info(f"Session {sessionId}: AI detected STOP intent: {speechResult.reasoning}")
+                if websocket:
+                    try:
+                        await websocket.send_text(json.dumps({
+                            "type": "stopAudio",
+                            "sessionId": sessionId,
+                        }))
+                    except Exception as stopErr:
+                        logger.warning(f"Failed to send stop command: {stopErr}")
+                return
+
+            # Step 4b: Respond if AI decided to
            if speechResult.shouldRespond and speechResult.responseText:
                
                if self.config.responseMode == TeamsbotResponseMode.MANUAL:
@ -677,6 +679,42 @@ class TeamsbotService:
    # Context Summarization (for long sessions)
    # =========================================================================

+    async def _summarizeSessionContext(self, sessionId: str, rawContext: str) -> str:
+        """Summarize a long user-provided session context to its essential points.
+        This reduces token usage in every subsequent AI call."""
+        try:
+            from modules.services.serviceAi.mainServiceAi import AiService
+            from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum
+
+            serviceContext = _ServiceContext(self.currentUser, self.mandateId, self.instanceId)
+            aiService = AiService(serviceCenter=serviceContext)
+            await aiService.ensureAiObjectsInitialized()
+
+            request = AiCallRequest(
+                prompt=(
+                    "Fasse den folgenden Kontext auf die wesentlichen Punkte zusammen. "
+                    "Behalte alle wichtigen Fakten, Namen, Zahlen, Entscheidungen und Aktionspunkte. "
+                    "Entferne Fuelltext und Wiederholungen. "
+                    "Antworte NUR mit der Zusammenfassung, keine Erklaerungen oder Einleitungen."
+                ),
+                context=rawContext,
+                options=AiCallOptions(
+                    operationType=OperationTypeEnum.DATA_ANALYSE,
+                    priority=PriorityEnum.SPEED,
+                )
+            )
+
+            response = await aiService.callAi(request)
+            if response and response.errorCount == 0 and response.content:
+                summary = response.content.strip()
+                logger.info(f"Session {sessionId}: Context summarized from {len(rawContext)} to {len(summary)} chars")
+                return summary
+        except Exception as e:
+            logger.warning(f"Session context summarization failed for {sessionId}: {e}")
+        
+        # Fallback: return original (truncated if very long)
+        return rawContext[:2000] if len(rawContext) > 2000 else rawContext
+
    async def _summarizeContextBuffer(self, sessionId: str):
        """Summarize the older part of the context buffer to preserve information
        without exceeding the context window. This runs in the background."""
--- a/modules/services/serviceAi/mainServiceAi.py
+++ b/modules/services/serviceAi/mainServiceAi.py
@ -357,7 +357,13 @@ ANTWORT-STIL (wenn du antwortest):
 - NICHT "Ich bin {botName} und ich bin hier um zu helfen" wiederholen
 - NICHT frueheres wiederholen das du schon gesagt hast
 - Max 1-2 Saetze, praezise auf den Punkt
- Sieh dir an was du (markiert als [YOU]) bereits gesagt hast und wiederhole es NICHT"""
+- Sieh dir an was du (markiert als [YOU]) bereits gesagt hast und wiederhole es NICHT
+
+STOP-ERKENNUNG:
+Wenn jemand dich bittet aufzuhoeren, still zu sein, zu stoppen, oder nicht mehr zu reden
+(in JEDER Sprache, z.B. "Shelly stop", "Shelly sei still", "Shelly halt", "Shelly be quiet",
+"Shelly shut up", "Shelly arrete", etc.), dann setze detectedIntent auf "stop" und
+shouldRespond auf false. Du musst NICHT antworten wenn jemand dich stoppt."""

        # Append user-configured instructions if provided
        if userSystemPrompt and userSystemPrompt.strip():
@ -370,13 +376,14 @@ WICHTIG: Antworte IMMER als valides JSON in exakt diesem Format:
    "shouldRespond": true/false,
    "responseText": "Deine Antwort hier" oder null,
    "reasoning": "Kurze Begruendung deiner Entscheidung",
-    "detectedIntent": "addressed" | "question" | "proactive" | "none"
+    "detectedIntent": "addressed" | "question" | "proactive" | "stop" | "none"
 }}

 detectedIntent-Werte:
 - "addressed": {botName} wurde direkt angesprochen
 - "question": Eine allgemeine Frage wurde gestellt
 - "proactive": Du hast einen wertvollen proaktiven Beitrag
+- "stop": Der User bittet {botName} aufzuhoeren/still zu sein (in jeder Sprache)
 - "none": Kein Handlungsbedarf"""

        return basePrompt