From a67c7f8b813f21b946af6b06a3ce05dc61e0be5f Mon Sep 17 00:00:00 2001
From: patrick-motsch <p.motsch@valueon.ch>
Date: Mon, 16 Feb 2026 10:03:21 +0100
Subject: [PATCH] feat(teamsbot): stop via AI intent (multilingual), initial
 session context summarization

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 .../features/teamsbot/datamodelTeamsbot.py    |  1 +
 modules/features/teamsbot/service.py          | 78 ++++++++++++++-----
 modules/services/serviceAi/mainServiceAi.py   | 11 ++-
 3 files changed, 68 insertions(+), 22 deletions(-)
diff --git a/modules/features/teamsbot/datamodelTeamsbot.py b/modules/features/teamsbot/datamodelTeamsbot.py
index e4803097..81625b90 100644
--- a/modules/features/teamsbot/datamodelTeamsbot.py
+++ b/modules/features/teamsbot/datamodelTeamsbot.py
@@ -36,6 +36,7 @@ class TeamsbotDetectedIntent(str, Enum):
     ADDRESSED = "addressed"       # Bot was directly addressed
     QUESTION = "question"         # A general question was asked
     PROACTIVE = "proactive"       # Bot has a valuable proactive contribution
+    STOP = "stop"                 # User asked the bot to stop/be quiet
     NONE = "none"                 # No action needed
 
 
diff --git a/modules/features/teamsbot/service.py b/modules/features/teamsbot/service.py
index 8b246f1f..9c3c537c 100644
--- a/modules/features/teamsbot/service.py
+++ b/modules/features/teamsbot/service.py
@@ -213,11 +213,17 @@ class TeamsbotService:
         voiceInterface = getVoiceInterface(self.currentUser, self.mandateId)
 
         # Load session context (user-provided background knowledge)
+        # If the context is long (>500 chars), summarize it to reduce token usage
         session = interface.getSession(sessionId)
         if session:
-            self._sessionContext = session.get("sessionContext")
+            rawContext = session.get("sessionContext")
+            if rawContext and len(rawContext) > 500:
+                logger.info(f"Session {sessionId}: Summarizing long session context ({len(rawContext)} chars)...")
+                self._sessionContext = await self._summarizeSessionContext(sessionId, rawContext)
+            elif rawContext:
+                self._sessionContext = rawContext
             if self._sessionContext:
-                logger.info(f"Session {sessionId}: Loaded session context ({len(self._sessionContext)} chars)")
+                logger.info(f"Session {sessionId}: Session context ready ({len(self._sessionContext)} chars)")
 
         logger.info(f"[WS-DEBUG] WebSocket handler started for session {sessionId}")
 
@@ -332,23 +338,6 @@ class TeamsbotService:
         if not text:
             return
 
-        # Check for STOP command: "<botname> stop" or "<botname> STOP"
-        # This immediately stops the bot from speaking and clears the audio queue.
-        botNameLower = self.config.botName.lower()
-        textLower = text.lower()
-        if botNameLower in textLower and "stop" in textLower:
-            logger.info(f"Session {sessionId}: STOP command detected: [{speaker}] {text[:60]}")
-            if websocket:
-                try:
-                    await websocket.send_text(json.dumps({
-                        "type": "stopAudio",
-                        "sessionId": sessionId,
-                    }))
-                except Exception as stopErr:
-                    logger.warning(f"Failed to send stop command: {stopErr}")
-            # Don't trigger AI analysis for stop commands
-            return
-
         # Filter out the bot's own speech from AI triggering.
         # The bot hears itself via captions — these should be stored in the
         # transcript for the record, but must NOT trigger AI analysis (feedback loop).
@@ -571,7 +560,20 @@ class TeamsbotService:
                 "priceCHF": response.priceCHF,
             })
 
-            # Step 4: Respond if AI decided to
+            # Step 4a: Handle STOP intent -- stop audio immediately
+            if speechResult.detectedIntent == "stop":
+                logger.info(f"Session {sessionId}: AI detected STOP intent: {speechResult.reasoning}")
+                if websocket:
+                    try:
+                        await websocket.send_text(json.dumps({
+                            "type": "stopAudio",
+                            "sessionId": sessionId,
+                        }))
+                    except Exception as stopErr:
+                        logger.warning(f"Failed to send stop command: {stopErr}")
+                return
+
+            # Step 4b: Respond if AI decided to
             if speechResult.shouldRespond and speechResult.responseText:
                 
                 if self.config.responseMode == TeamsbotResponseMode.MANUAL:
@@ -677,6 +679,42 @@ class TeamsbotService:
     # Context Summarization (for long sessions)
     # =========================================================================
 
+    async def _summarizeSessionContext(self, sessionId: str, rawContext: str) -> str:
+        """Summarize a long user-provided session context to its essential points.
+        This reduces token usage in every subsequent AI call."""
+        try:
+            from modules.services.serviceAi.mainServiceAi import AiService
+            from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum
+
+            serviceContext = _ServiceContext(self.currentUser, self.mandateId, self.instanceId)
+            aiService = AiService(serviceCenter=serviceContext)
+            await aiService.ensureAiObjectsInitialized()
+
+            request = AiCallRequest(
+                prompt=(
+                    "Fasse den folgenden Kontext auf die wesentlichen Punkte zusammen. "
+                    "Behalte alle wichtigen Fakten, Namen, Zahlen, Entscheidungen und Aktionspunkte. "
+                    "Entferne Fuelltext und Wiederholungen. "
+                    "Antworte NUR mit der Zusammenfassung, keine Erklaerungen oder Einleitungen."
+                ),
+                context=rawContext,
+                options=AiCallOptions(
+                    operationType=OperationTypeEnum.DATA_ANALYSE,
+                    priority=PriorityEnum.SPEED,
+                )
+            )
+
+            response = await aiService.callAi(request)
+            if response and response.errorCount == 0 and response.content:
+                summary = response.content.strip()
+                logger.info(f"Session {sessionId}: Context summarized from {len(rawContext)} to {len(summary)} chars")
+                return summary
+        except Exception as e:
+            logger.warning(f"Session context summarization failed for {sessionId}: {e}")
+        
+        # Fallback: return original (truncated if very long)
+        return rawContext[:2000] if len(rawContext) > 2000 else rawContext
+
     async def _summarizeContextBuffer(self, sessionId: str):
         """Summarize the older part of the context buffer to preserve information
         without exceeding the context window. This runs in the background."""
diff --git a/modules/services/serviceAi/mainServiceAi.py b/modules/services/serviceAi/mainServiceAi.py
index dd4f000e..0781086d 100644
--- a/modules/services/serviceAi/mainServiceAi.py
+++ b/modules/services/serviceAi/mainServiceAi.py
@@ -357,7 +357,13 @@ ANTWORT-STIL (wenn du antwortest):
 - NICHT "Ich bin {botName} und ich bin hier um zu helfen" wiederholen
 - NICHT frueheres wiederholen das du schon gesagt hast
 - Max 1-2 Saetze, praezise auf den Punkt
-- Sieh dir an was du (markiert als [YOU]) bereits gesagt hast und wiederhole es NICHT"""
+- Sieh dir an was du (markiert als [YOU]) bereits gesagt hast und wiederhole es NICHT
+
+STOP-ERKENNUNG:
+Wenn jemand dich bittet aufzuhoeren, still zu sein, zu stoppen, oder nicht mehr zu reden
+(in JEDER Sprache, z.B. "Shelly stop", "Shelly sei still", "Shelly halt", "Shelly be quiet",
+"Shelly shut up", "Shelly arrete", etc.), dann setze detectedIntent auf "stop" und
+shouldRespond auf false. Du musst NICHT antworten wenn jemand dich stoppt."""
 
         # Append user-configured instructions if provided
         if userSystemPrompt and userSystemPrompt.strip():
@@ -370,13 +376,14 @@ WICHTIG: Antworte IMMER als valides JSON in exakt diesem Format:
     "shouldRespond": true/false,
     "responseText": "Deine Antwort hier" oder null,
     "reasoning": "Kurze Begruendung deiner Entscheidung",
-    "detectedIntent": "addressed" | "question" | "proactive" | "none"
+    "detectedIntent": "addressed" | "question" | "proactive" | "stop" | "none"
 }}
 
 detectedIntent-Werte:
 - "addressed": {botName} wurde direkt angesprochen
 - "question": Eine allgemeine Frage wurde gestellt
 - "proactive": Du hast einen wertvollen proaktiven Beitrag
+- "stop": Der User bittet {botName} aufzuhoeren/still zu sein (in jeder Sprache)
 - "none": Kein Handlungsbedarf"""
 
         return basePrompt