feat(teamsbot): stop via AI intent (multilingual), initial session context summarization
Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
parent
4186ec6188
commit
a67c7f8b81
3 changed files with 68 additions and 22 deletions
|
|
@ -36,6 +36,7 @@ class TeamsbotDetectedIntent(str, Enum):
|
|||
ADDRESSED = "addressed" # Bot was directly addressed
|
||||
QUESTION = "question" # A general question was asked
|
||||
PROACTIVE = "proactive" # Bot has a valuable proactive contribution
|
||||
STOP = "stop" # User asked the bot to stop/be quiet
|
||||
NONE = "none" # No action needed
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -213,11 +213,17 @@ class TeamsbotService:
|
|||
voiceInterface = getVoiceInterface(self.currentUser, self.mandateId)
|
||||
|
||||
# Load session context (user-provided background knowledge)
|
||||
# If the context is long (>500 chars), summarize it to reduce token usage
|
||||
session = interface.getSession(sessionId)
|
||||
if session:
|
||||
self._sessionContext = session.get("sessionContext")
|
||||
rawContext = session.get("sessionContext")
|
||||
if rawContext and len(rawContext) > 500:
|
||||
logger.info(f"Session {sessionId}: Summarizing long session context ({len(rawContext)} chars)...")
|
||||
self._sessionContext = await self._summarizeSessionContext(sessionId, rawContext)
|
||||
elif rawContext:
|
||||
self._sessionContext = rawContext
|
||||
if self._sessionContext:
|
||||
logger.info(f"Session {sessionId}: Loaded session context ({len(self._sessionContext)} chars)")
|
||||
logger.info(f"Session {sessionId}: Session context ready ({len(self._sessionContext)} chars)")
|
||||
|
||||
logger.info(f"[WS-DEBUG] WebSocket handler started for session {sessionId}")
|
||||
|
||||
|
|
@ -332,23 +338,6 @@ class TeamsbotService:
|
|||
if not text:
|
||||
return
|
||||
|
||||
# Check for STOP command: "<botname> stop" or "<botname> STOP"
|
||||
# This immediately stops the bot from speaking and clears the audio queue.
|
||||
botNameLower = self.config.botName.lower()
|
||||
textLower = text.lower()
|
||||
if botNameLower in textLower and "stop" in textLower:
|
||||
logger.info(f"Session {sessionId}: STOP command detected: [{speaker}] {text[:60]}")
|
||||
if websocket:
|
||||
try:
|
||||
await websocket.send_text(json.dumps({
|
||||
"type": "stopAudio",
|
||||
"sessionId": sessionId,
|
||||
}))
|
||||
except Exception as stopErr:
|
||||
logger.warning(f"Failed to send stop command: {stopErr}")
|
||||
# Don't trigger AI analysis for stop commands
|
||||
return
|
||||
|
||||
# Filter out the bot's own speech from AI triggering.
|
||||
# The bot hears itself via captions — these should be stored in the
|
||||
# transcript for the record, but must NOT trigger AI analysis (feedback loop).
|
||||
|
|
@ -571,7 +560,20 @@ class TeamsbotService:
|
|||
"priceCHF": response.priceCHF,
|
||||
})
|
||||
|
||||
# Step 4: Respond if AI decided to
|
||||
# Step 4a: Handle STOP intent -- stop audio immediately
|
||||
if speechResult.detectedIntent == "stop":
|
||||
logger.info(f"Session {sessionId}: AI detected STOP intent: {speechResult.reasoning}")
|
||||
if websocket:
|
||||
try:
|
||||
await websocket.send_text(json.dumps({
|
||||
"type": "stopAudio",
|
||||
"sessionId": sessionId,
|
||||
}))
|
||||
except Exception as stopErr:
|
||||
logger.warning(f"Failed to send stop command: {stopErr}")
|
||||
return
|
||||
|
||||
# Step 4b: Respond if AI decided to
|
||||
if speechResult.shouldRespond and speechResult.responseText:
|
||||
|
||||
if self.config.responseMode == TeamsbotResponseMode.MANUAL:
|
||||
|
|
@ -677,6 +679,42 @@ class TeamsbotService:
|
|||
# Context Summarization (for long sessions)
|
||||
# =========================================================================
|
||||
|
||||
async def _summarizeSessionContext(self, sessionId: str, rawContext: str) -> str:
|
||||
"""Summarize a long user-provided session context to its essential points.
|
||||
This reduces token usage in every subsequent AI call."""
|
||||
try:
|
||||
from modules.services.serviceAi.mainServiceAi import AiService
|
||||
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum
|
||||
|
||||
serviceContext = _ServiceContext(self.currentUser, self.mandateId, self.instanceId)
|
||||
aiService = AiService(serviceCenter=serviceContext)
|
||||
await aiService.ensureAiObjectsInitialized()
|
||||
|
||||
request = AiCallRequest(
|
||||
prompt=(
|
||||
"Fasse den folgenden Kontext auf die wesentlichen Punkte zusammen. "
|
||||
"Behalte alle wichtigen Fakten, Namen, Zahlen, Entscheidungen und Aktionspunkte. "
|
||||
"Entferne Fuelltext und Wiederholungen. "
|
||||
"Antworte NUR mit der Zusammenfassung, keine Erklaerungen oder Einleitungen."
|
||||
),
|
||||
context=rawContext,
|
||||
options=AiCallOptions(
|
||||
operationType=OperationTypeEnum.DATA_ANALYSE,
|
||||
priority=PriorityEnum.SPEED,
|
||||
)
|
||||
)
|
||||
|
||||
response = await aiService.callAi(request)
|
||||
if response and response.errorCount == 0 and response.content:
|
||||
summary = response.content.strip()
|
||||
logger.info(f"Session {sessionId}: Context summarized from {len(rawContext)} to {len(summary)} chars")
|
||||
return summary
|
||||
except Exception as e:
|
||||
logger.warning(f"Session context summarization failed for {sessionId}: {e}")
|
||||
|
||||
# Fallback: return original (truncated if very long)
|
||||
return rawContext[:2000] if len(rawContext) > 2000 else rawContext
|
||||
|
||||
async def _summarizeContextBuffer(self, sessionId: str):
|
||||
"""Summarize the older part of the context buffer to preserve information
|
||||
without exceeding the context window. This runs in the background."""
|
||||
|
|
|
|||
|
|
@ -357,7 +357,13 @@ ANTWORT-STIL (wenn du antwortest):
|
|||
- NICHT "Ich bin {botName} und ich bin hier um zu helfen" wiederholen
|
||||
- NICHT frueheres wiederholen das du schon gesagt hast
|
||||
- Max 1-2 Saetze, praezise auf den Punkt
|
||||
- Sieh dir an was du (markiert als [YOU]) bereits gesagt hast und wiederhole es NICHT"""
|
||||
- Sieh dir an was du (markiert als [YOU]) bereits gesagt hast und wiederhole es NICHT
|
||||
|
||||
STOP-ERKENNUNG:
|
||||
Wenn jemand dich bittet aufzuhoeren, still zu sein, zu stoppen, oder nicht mehr zu reden
|
||||
(in JEDER Sprache, z.B. "Shelly stop", "Shelly sei still", "Shelly halt", "Shelly be quiet",
|
||||
"Shelly shut up", "Shelly arrete", etc.), dann setze detectedIntent auf "stop" und
|
||||
shouldRespond auf false. Du musst NICHT antworten wenn jemand dich stoppt."""
|
||||
|
||||
# Append user-configured instructions if provided
|
||||
if userSystemPrompt and userSystemPrompt.strip():
|
||||
|
|
@ -370,13 +376,14 @@ WICHTIG: Antworte IMMER als valides JSON in exakt diesem Format:
|
|||
"shouldRespond": true/false,
|
||||
"responseText": "Deine Antwort hier" oder null,
|
||||
"reasoning": "Kurze Begruendung deiner Entscheidung",
|
||||
"detectedIntent": "addressed" | "question" | "proactive" | "none"
|
||||
"detectedIntent": "addressed" | "question" | "proactive" | "stop" | "none"
|
||||
}}
|
||||
|
||||
detectedIntent-Werte:
|
||||
- "addressed": {botName} wurde direkt angesprochen
|
||||
- "question": Eine allgemeine Frage wurde gestellt
|
||||
- "proactive": Du hast einen wertvollen proaktiven Beitrag
|
||||
- "stop": Der User bittet {botName} aufzuhoeren/still zu sein (in jeder Sprache)
|
||||
- "none": Kein Handlungsbedarf"""
|
||||
|
||||
return basePrompt
|
||||
|
|
|
|||
Loading…
Reference in a new issue