From a67c7f8b813f21b946af6b06a3ce05dc61e0be5f Mon Sep 17 00:00:00 2001 From: patrick-motsch Date: Mon, 16 Feb 2026 10:03:21 +0100 Subject: [PATCH] feat(teamsbot): stop via AI intent (multilingual), initial session context summarization Co-authored-by: Cursor --- .../features/teamsbot/datamodelTeamsbot.py | 1 + modules/features/teamsbot/service.py | 78 ++++++++++++++----- modules/services/serviceAi/mainServiceAi.py | 11 ++- 3 files changed, 68 insertions(+), 22 deletions(-) diff --git a/modules/features/teamsbot/datamodelTeamsbot.py b/modules/features/teamsbot/datamodelTeamsbot.py index e4803097..81625b90 100644 --- a/modules/features/teamsbot/datamodelTeamsbot.py +++ b/modules/features/teamsbot/datamodelTeamsbot.py @@ -36,6 +36,7 @@ class TeamsbotDetectedIntent(str, Enum): ADDRESSED = "addressed" # Bot was directly addressed QUESTION = "question" # A general question was asked PROACTIVE = "proactive" # Bot has a valuable proactive contribution + STOP = "stop" # User asked the bot to stop/be quiet NONE = "none" # No action needed diff --git a/modules/features/teamsbot/service.py b/modules/features/teamsbot/service.py index 8b246f1f..9c3c537c 100644 --- a/modules/features/teamsbot/service.py +++ b/modules/features/teamsbot/service.py @@ -213,11 +213,17 @@ class TeamsbotService: voiceInterface = getVoiceInterface(self.currentUser, self.mandateId) # Load session context (user-provided background knowledge) + # If the context is long (>500 chars), summarize it to reduce token usage session = interface.getSession(sessionId) if session: - self._sessionContext = session.get("sessionContext") + rawContext = session.get("sessionContext") + if rawContext and len(rawContext) > 500: + logger.info(f"Session {sessionId}: Summarizing long session context ({len(rawContext)} chars)...") + self._sessionContext = await self._summarizeSessionContext(sessionId, rawContext) + elif rawContext: + self._sessionContext = rawContext if self._sessionContext: - logger.info(f"Session {sessionId}: Loaded session context ({len(self._sessionContext)} chars)") + logger.info(f"Session {sessionId}: Session context ready ({len(self._sessionContext)} chars)") logger.info(f"[WS-DEBUG] WebSocket handler started for session {sessionId}") @@ -332,23 +338,6 @@ class TeamsbotService: if not text: return - # Check for STOP command: " stop" or " STOP" - # This immediately stops the bot from speaking and clears the audio queue. - botNameLower = self.config.botName.lower() - textLower = text.lower() - if botNameLower in textLower and "stop" in textLower: - logger.info(f"Session {sessionId}: STOP command detected: [{speaker}] {text[:60]}") - if websocket: - try: - await websocket.send_text(json.dumps({ - "type": "stopAudio", - "sessionId": sessionId, - })) - except Exception as stopErr: - logger.warning(f"Failed to send stop command: {stopErr}") - # Don't trigger AI analysis for stop commands - return - # Filter out the bot's own speech from AI triggering. # The bot hears itself via captions — these should be stored in the # transcript for the record, but must NOT trigger AI analysis (feedback loop). @@ -571,7 +560,20 @@ class TeamsbotService: "priceCHF": response.priceCHF, }) - # Step 4: Respond if AI decided to + # Step 4a: Handle STOP intent -- stop audio immediately + if speechResult.detectedIntent == "stop": + logger.info(f"Session {sessionId}: AI detected STOP intent: {speechResult.reasoning}") + if websocket: + try: + await websocket.send_text(json.dumps({ + "type": "stopAudio", + "sessionId": sessionId, + })) + except Exception as stopErr: + logger.warning(f"Failed to send stop command: {stopErr}") + return + + # Step 4b: Respond if AI decided to if speechResult.shouldRespond and speechResult.responseText: if self.config.responseMode == TeamsbotResponseMode.MANUAL: @@ -677,6 +679,42 @@ class TeamsbotService: # Context Summarization (for long sessions) # ========================================================================= + async def _summarizeSessionContext(self, sessionId: str, rawContext: str) -> str: + """Summarize a long user-provided session context to its essential points. + This reduces token usage in every subsequent AI call.""" + try: + from modules.services.serviceAi.mainServiceAi import AiService + from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum + + serviceContext = _ServiceContext(self.currentUser, self.mandateId, self.instanceId) + aiService = AiService(serviceCenter=serviceContext) + await aiService.ensureAiObjectsInitialized() + + request = AiCallRequest( + prompt=( + "Fasse den folgenden Kontext auf die wesentlichen Punkte zusammen. " + "Behalte alle wichtigen Fakten, Namen, Zahlen, Entscheidungen und Aktionspunkte. " + "Entferne Fuelltext und Wiederholungen. " + "Antworte NUR mit der Zusammenfassung, keine Erklaerungen oder Einleitungen." + ), + context=rawContext, + options=AiCallOptions( + operationType=OperationTypeEnum.DATA_ANALYSE, + priority=PriorityEnum.SPEED, + ) + ) + + response = await aiService.callAi(request) + if response and response.errorCount == 0 and response.content: + summary = response.content.strip() + logger.info(f"Session {sessionId}: Context summarized from {len(rawContext)} to {len(summary)} chars") + return summary + except Exception as e: + logger.warning(f"Session context summarization failed for {sessionId}: {e}") + + # Fallback: return original (truncated if very long) + return rawContext[:2000] if len(rawContext) > 2000 else rawContext + async def _summarizeContextBuffer(self, sessionId: str): """Summarize the older part of the context buffer to preserve information without exceeding the context window. This runs in the background.""" diff --git a/modules/services/serviceAi/mainServiceAi.py b/modules/services/serviceAi/mainServiceAi.py index dd4f000e..0781086d 100644 --- a/modules/services/serviceAi/mainServiceAi.py +++ b/modules/services/serviceAi/mainServiceAi.py @@ -357,7 +357,13 @@ ANTWORT-STIL (wenn du antwortest): - NICHT "Ich bin {botName} und ich bin hier um zu helfen" wiederholen - NICHT frueheres wiederholen das du schon gesagt hast - Max 1-2 Saetze, praezise auf den Punkt -- Sieh dir an was du (markiert als [YOU]) bereits gesagt hast und wiederhole es NICHT""" +- Sieh dir an was du (markiert als [YOU]) bereits gesagt hast und wiederhole es NICHT + +STOP-ERKENNUNG: +Wenn jemand dich bittet aufzuhoeren, still zu sein, zu stoppen, oder nicht mehr zu reden +(in JEDER Sprache, z.B. "Shelly stop", "Shelly sei still", "Shelly halt", "Shelly be quiet", +"Shelly shut up", "Shelly arrete", etc.), dann setze detectedIntent auf "stop" und +shouldRespond auf false. Du musst NICHT antworten wenn jemand dich stoppt.""" # Append user-configured instructions if provided if userSystemPrompt and userSystemPrompt.strip(): @@ -370,13 +376,14 @@ WICHTIG: Antworte IMMER als valides JSON in exakt diesem Format: "shouldRespond": true/false, "responseText": "Deine Antwort hier" oder null, "reasoning": "Kurze Begruendung deiner Entscheidung", - "detectedIntent": "addressed" | "question" | "proactive" | "none" + "detectedIntent": "addressed" | "question" | "proactive" | "stop" | "none" }} detectedIntent-Werte: - "addressed": {botName} wurde direkt angesprochen - "question": Eine allgemeine Frage wurde gestellt - "proactive": Du hast einen wertvollen proaktiven Beitrag +- "stop": Der User bittet {botName} aufzuhoeren/still zu sein (in jeder Sprache) - "none": Kein Handlungsbedarf""" return basePrompt