From a67c7f8b813f21b946af6b06a3ce05dc61e0be5f Mon Sep 17 00:00:00 2001
From: patrick-motsch
Date: Mon, 16 Feb 2026 10:03:21 +0100
Subject: [PATCH] feat(teamsbot): stop via AI intent (multilingual), initial
session context summarization
Co-authored-by: Cursor
---
.../features/teamsbot/datamodelTeamsbot.py | 1 +
modules/features/teamsbot/service.py | 78 ++++++++++++++-----
modules/services/serviceAi/mainServiceAi.py | 11 ++-
3 files changed, 68 insertions(+), 22 deletions(-)
diff --git a/modules/features/teamsbot/datamodelTeamsbot.py b/modules/features/teamsbot/datamodelTeamsbot.py
index e4803097..81625b90 100644
--- a/modules/features/teamsbot/datamodelTeamsbot.py
+++ b/modules/features/teamsbot/datamodelTeamsbot.py
@@ -36,6 +36,7 @@ class TeamsbotDetectedIntent(str, Enum):
ADDRESSED = "addressed" # Bot was directly addressed
QUESTION = "question" # A general question was asked
PROACTIVE = "proactive" # Bot has a valuable proactive contribution
+ STOP = "stop" # User asked the bot to stop/be quiet
NONE = "none" # No action needed
diff --git a/modules/features/teamsbot/service.py b/modules/features/teamsbot/service.py
index 8b246f1f..9c3c537c 100644
--- a/modules/features/teamsbot/service.py
+++ b/modules/features/teamsbot/service.py
@@ -213,11 +213,17 @@ class TeamsbotService:
voiceInterface = getVoiceInterface(self.currentUser, self.mandateId)
# Load session context (user-provided background knowledge)
+ # If the context is long (>500 chars), summarize it to reduce token usage
session = interface.getSession(sessionId)
if session:
- self._sessionContext = session.get("sessionContext")
+ rawContext = session.get("sessionContext")
+ if rawContext and len(rawContext) > 500:
+ logger.info(f"Session {sessionId}: Summarizing long session context ({len(rawContext)} chars)...")
+ self._sessionContext = await self._summarizeSessionContext(sessionId, rawContext)
+ elif rawContext:
+ self._sessionContext = rawContext
if self._sessionContext:
- logger.info(f"Session {sessionId}: Loaded session context ({len(self._sessionContext)} chars)")
+ logger.info(f"Session {sessionId}: Session context ready ({len(self._sessionContext)} chars)")
logger.info(f"[WS-DEBUG] WebSocket handler started for session {sessionId}")
@@ -332,23 +338,6 @@ class TeamsbotService:
if not text:
return
- # Check for STOP command: " stop" or " STOP"
- # This immediately stops the bot from speaking and clears the audio queue.
- botNameLower = self.config.botName.lower()
- textLower = text.lower()
- if botNameLower in textLower and "stop" in textLower:
- logger.info(f"Session {sessionId}: STOP command detected: [{speaker}] {text[:60]}")
- if websocket:
- try:
- await websocket.send_text(json.dumps({
- "type": "stopAudio",
- "sessionId": sessionId,
- }))
- except Exception as stopErr:
- logger.warning(f"Failed to send stop command: {stopErr}")
- # Don't trigger AI analysis for stop commands
- return
-
# Filter out the bot's own speech from AI triggering.
# The bot hears itself via captions — these should be stored in the
# transcript for the record, but must NOT trigger AI analysis (feedback loop).
@@ -571,7 +560,20 @@ class TeamsbotService:
"priceCHF": response.priceCHF,
})
- # Step 4: Respond if AI decided to
+ # Step 4a: Handle STOP intent -- stop audio immediately
+ if speechResult.detectedIntent == "stop":
+ logger.info(f"Session {sessionId}: AI detected STOP intent: {speechResult.reasoning}")
+ if websocket:
+ try:
+ await websocket.send_text(json.dumps({
+ "type": "stopAudio",
+ "sessionId": sessionId,
+ }))
+ except Exception as stopErr:
+ logger.warning(f"Failed to send stop command: {stopErr}")
+ return
+
+ # Step 4b: Respond if AI decided to
if speechResult.shouldRespond and speechResult.responseText:
if self.config.responseMode == TeamsbotResponseMode.MANUAL:
@@ -677,6 +679,42 @@ class TeamsbotService:
# Context Summarization (for long sessions)
# =========================================================================
+ async def _summarizeSessionContext(self, sessionId: str, rawContext: str) -> str:
+ """Summarize a long user-provided session context to its essential points.
+ This reduces token usage in every subsequent AI call."""
+ try:
+ from modules.services.serviceAi.mainServiceAi import AiService
+ from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum
+
+ serviceContext = _ServiceContext(self.currentUser, self.mandateId, self.instanceId)
+ aiService = AiService(serviceCenter=serviceContext)
+ await aiService.ensureAiObjectsInitialized()
+
+ request = AiCallRequest(
+ prompt=(
+ "Fasse den folgenden Kontext auf die wesentlichen Punkte zusammen. "
+ "Behalte alle wichtigen Fakten, Namen, Zahlen, Entscheidungen und Aktionspunkte. "
+ "Entferne Fuelltext und Wiederholungen. "
+ "Antworte NUR mit der Zusammenfassung, keine Erklaerungen oder Einleitungen."
+ ),
+ context=rawContext,
+ options=AiCallOptions(
+ operationType=OperationTypeEnum.DATA_ANALYSE,
+ priority=PriorityEnum.SPEED,
+ )
+ )
+
+ response = await aiService.callAi(request)
+ if response and response.errorCount == 0 and response.content:
+ summary = response.content.strip()
+ logger.info(f"Session {sessionId}: Context summarized from {len(rawContext)} to {len(summary)} chars")
+ return summary
+ except Exception as e:
+ logger.warning(f"Session context summarization failed for {sessionId}: {e}")
+
+ # Fallback: return original (truncated if very long)
+ return rawContext[:2000] if len(rawContext) > 2000 else rawContext
+
async def _summarizeContextBuffer(self, sessionId: str):
"""Summarize the older part of the context buffer to preserve information
without exceeding the context window. This runs in the background."""
diff --git a/modules/services/serviceAi/mainServiceAi.py b/modules/services/serviceAi/mainServiceAi.py
index dd4f000e..0781086d 100644
--- a/modules/services/serviceAi/mainServiceAi.py
+++ b/modules/services/serviceAi/mainServiceAi.py
@@ -357,7 +357,13 @@ ANTWORT-STIL (wenn du antwortest):
- NICHT "Ich bin {botName} und ich bin hier um zu helfen" wiederholen
- NICHT frueheres wiederholen das du schon gesagt hast
- Max 1-2 Saetze, praezise auf den Punkt
-- Sieh dir an was du (markiert als [YOU]) bereits gesagt hast und wiederhole es NICHT"""
+- Sieh dir an was du (markiert als [YOU]) bereits gesagt hast und wiederhole es NICHT
+
+STOP-ERKENNUNG:
+Wenn jemand dich bittet aufzuhoeren, still zu sein, zu stoppen, oder nicht mehr zu reden
+(in JEDER Sprache, z.B. "Shelly stop", "Shelly sei still", "Shelly halt", "Shelly be quiet",
+"Shelly shut up", "Shelly arrete", etc.), dann setze detectedIntent auf "stop" und
+shouldRespond auf false. Du musst NICHT antworten wenn jemand dich stoppt."""
# Append user-configured instructions if provided
if userSystemPrompt and userSystemPrompt.strip():
@@ -370,13 +376,14 @@ WICHTIG: Antworte IMMER als valides JSON in exakt diesem Format:
"shouldRespond": true/false,
"responseText": "Deine Antwort hier" oder null,
"reasoning": "Kurze Begruendung deiner Entscheidung",
- "detectedIntent": "addressed" | "question" | "proactive" | "none"
+ "detectedIntent": "addressed" | "question" | "proactive" | "stop" | "none"
}}
detectedIntent-Werte:
- "addressed": {botName} wurde direkt angesprochen
- "question": Eine allgemeine Frage wurde gestellt
- "proactive": Du hast einen wertvollen proaktiven Beitrag
+- "stop": Der User bittet {botName} aufzuhoeren/still zu sein (in jeder Sprache)
- "none": Kein Handlungsbedarf"""
return basePrompt