feat(teamsbot): stop via AI intent (multilingual), initial session context summarization

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
patrick-motsch 2026-02-16 10:03:21 +01:00
parent 4186ec6188
commit a67c7f8b81
3 changed files with 68 additions and 22 deletions

View file

@ -36,6 +36,7 @@ class TeamsbotDetectedIntent(str, Enum):
ADDRESSED = "addressed" # Bot was directly addressed
QUESTION = "question" # A general question was asked
PROACTIVE = "proactive" # Bot has a valuable proactive contribution
STOP = "stop" # User asked the bot to stop/be quiet
NONE = "none" # No action needed

View file

@ -213,11 +213,17 @@ class TeamsbotService:
voiceInterface = getVoiceInterface(self.currentUser, self.mandateId)
# Load session context (user-provided background knowledge)
# If the context is long (>500 chars), summarize it to reduce token usage
session = interface.getSession(sessionId)
if session:
self._sessionContext = session.get("sessionContext")
rawContext = session.get("sessionContext")
if rawContext and len(rawContext) > 500:
logger.info(f"Session {sessionId}: Summarizing long session context ({len(rawContext)} chars)...")
self._sessionContext = await self._summarizeSessionContext(sessionId, rawContext)
elif rawContext:
self._sessionContext = rawContext
if self._sessionContext:
logger.info(f"Session {sessionId}: Loaded session context ({len(self._sessionContext)} chars)")
logger.info(f"Session {sessionId}: Session context ready ({len(self._sessionContext)} chars)")
logger.info(f"[WS-DEBUG] WebSocket handler started for session {sessionId}")
@ -332,23 +338,6 @@ class TeamsbotService:
if not text:
return
# Check for STOP command: "<botname> stop" or "<botname> STOP"
# This immediately stops the bot from speaking and clears the audio queue.
botNameLower = self.config.botName.lower()
textLower = text.lower()
if botNameLower in textLower and "stop" in textLower:
logger.info(f"Session {sessionId}: STOP command detected: [{speaker}] {text[:60]}")
if websocket:
try:
await websocket.send_text(json.dumps({
"type": "stopAudio",
"sessionId": sessionId,
}))
except Exception as stopErr:
logger.warning(f"Failed to send stop command: {stopErr}")
# Don't trigger AI analysis for stop commands
return
# Filter out the bot's own speech from AI triggering.
# The bot hears itself via captions — these should be stored in the
# transcript for the record, but must NOT trigger AI analysis (feedback loop).
@ -571,7 +560,20 @@ class TeamsbotService:
"priceCHF": response.priceCHF,
})
# Step 4: Respond if AI decided to
# Step 4a: Handle STOP intent -- stop audio immediately
if speechResult.detectedIntent == "stop":
logger.info(f"Session {sessionId}: AI detected STOP intent: {speechResult.reasoning}")
if websocket:
try:
await websocket.send_text(json.dumps({
"type": "stopAudio",
"sessionId": sessionId,
}))
except Exception as stopErr:
logger.warning(f"Failed to send stop command: {stopErr}")
return
# Step 4b: Respond if AI decided to
if speechResult.shouldRespond and speechResult.responseText:
if self.config.responseMode == TeamsbotResponseMode.MANUAL:
@ -677,6 +679,42 @@ class TeamsbotService:
# Context Summarization (for long sessions)
# =========================================================================
async def _summarizeSessionContext(self, sessionId: str, rawContext: str) -> str:
"""Summarize a long user-provided session context to its essential points.
This reduces token usage in every subsequent AI call."""
try:
from modules.services.serviceAi.mainServiceAi import AiService
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum
serviceContext = _ServiceContext(self.currentUser, self.mandateId, self.instanceId)
aiService = AiService(serviceCenter=serviceContext)
await aiService.ensureAiObjectsInitialized()
request = AiCallRequest(
prompt=(
"Fasse den folgenden Kontext auf die wesentlichen Punkte zusammen. "
"Behalte alle wichtigen Fakten, Namen, Zahlen, Entscheidungen und Aktionspunkte. "
"Entferne Fuelltext und Wiederholungen. "
"Antworte NUR mit der Zusammenfassung, keine Erklaerungen oder Einleitungen."
),
context=rawContext,
options=AiCallOptions(
operationType=OperationTypeEnum.DATA_ANALYSE,
priority=PriorityEnum.SPEED,
)
)
response = await aiService.callAi(request)
if response and response.errorCount == 0 and response.content:
summary = response.content.strip()
logger.info(f"Session {sessionId}: Context summarized from {len(rawContext)} to {len(summary)} chars")
return summary
except Exception as e:
logger.warning(f"Session context summarization failed for {sessionId}: {e}")
# Fallback: return original (truncated if very long)
return rawContext[:2000] if len(rawContext) > 2000 else rawContext
async def _summarizeContextBuffer(self, sessionId: str):
"""Summarize the older part of the context buffer to preserve information
without exceeding the context window. This runs in the background."""

View file

@ -357,7 +357,13 @@ ANTWORT-STIL (wenn du antwortest):
- NICHT "Ich bin {botName} und ich bin hier um zu helfen" wiederholen
- NICHT frueheres wiederholen das du schon gesagt hast
- Max 1-2 Saetze, praezise auf den Punkt
- Sieh dir an was du (markiert als [YOU]) bereits gesagt hast und wiederhole es NICHT"""
- Sieh dir an was du (markiert als [YOU]) bereits gesagt hast und wiederhole es NICHT
STOP-ERKENNUNG:
Wenn jemand dich bittet aufzuhoeren, still zu sein, zu stoppen, oder nicht mehr zu reden
(in JEDER Sprache, z.B. "Shelly stop", "Shelly sei still", "Shelly halt", "Shelly be quiet",
"Shelly shut up", "Shelly arrete", etc.), dann setze detectedIntent auf "stop" und
shouldRespond auf false. Du musst NICHT antworten wenn jemand dich stoppt."""
# Append user-configured instructions if provided
if userSystemPrompt and userSystemPrompt.strip():
@ -370,13 +376,14 @@ WICHTIG: Antworte IMMER als valides JSON in exakt diesem Format:
"shouldRespond": true/false,
"responseText": "Deine Antwort hier" oder null,
"reasoning": "Kurze Begruendung deiner Entscheidung",
"detectedIntent": "addressed" | "question" | "proactive" | "none"
"detectedIntent": "addressed" | "question" | "proactive" | "stop" | "none"
}}
detectedIntent-Werte:
- "addressed": {botName} wurde direkt angesprochen
- "question": Eine allgemeine Frage wurde gestellt
- "proactive": Du hast einen wertvollen proaktiven Beitrag
- "stop": Der User bittet {botName} aufzuhoeren/still zu sein (in jeder Sprache)
- "none": Kein Handlungsbedarf"""
return basePrompt