feat(teamsbot): stop via AI intent (multilingual), initial session context summarization
Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
parent
4186ec6188
commit
a67c7f8b81
3 changed files with 68 additions and 22 deletions
|
|
@ -36,6 +36,7 @@ class TeamsbotDetectedIntent(str, Enum):
|
||||||
ADDRESSED = "addressed" # Bot was directly addressed
|
ADDRESSED = "addressed" # Bot was directly addressed
|
||||||
QUESTION = "question" # A general question was asked
|
QUESTION = "question" # A general question was asked
|
||||||
PROACTIVE = "proactive" # Bot has a valuable proactive contribution
|
PROACTIVE = "proactive" # Bot has a valuable proactive contribution
|
||||||
|
STOP = "stop" # User asked the bot to stop/be quiet
|
||||||
NONE = "none" # No action needed
|
NONE = "none" # No action needed
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -213,11 +213,17 @@ class TeamsbotService:
|
||||||
voiceInterface = getVoiceInterface(self.currentUser, self.mandateId)
|
voiceInterface = getVoiceInterface(self.currentUser, self.mandateId)
|
||||||
|
|
||||||
# Load session context (user-provided background knowledge)
|
# Load session context (user-provided background knowledge)
|
||||||
|
# If the context is long (>500 chars), summarize it to reduce token usage
|
||||||
session = interface.getSession(sessionId)
|
session = interface.getSession(sessionId)
|
||||||
if session:
|
if session:
|
||||||
self._sessionContext = session.get("sessionContext")
|
rawContext = session.get("sessionContext")
|
||||||
|
if rawContext and len(rawContext) > 500:
|
||||||
|
logger.info(f"Session {sessionId}: Summarizing long session context ({len(rawContext)} chars)...")
|
||||||
|
self._sessionContext = await self._summarizeSessionContext(sessionId, rawContext)
|
||||||
|
elif rawContext:
|
||||||
|
self._sessionContext = rawContext
|
||||||
if self._sessionContext:
|
if self._sessionContext:
|
||||||
logger.info(f"Session {sessionId}: Loaded session context ({len(self._sessionContext)} chars)")
|
logger.info(f"Session {sessionId}: Session context ready ({len(self._sessionContext)} chars)")
|
||||||
|
|
||||||
logger.info(f"[WS-DEBUG] WebSocket handler started for session {sessionId}")
|
logger.info(f"[WS-DEBUG] WebSocket handler started for session {sessionId}")
|
||||||
|
|
||||||
|
|
@ -332,23 +338,6 @@ class TeamsbotService:
|
||||||
if not text:
|
if not text:
|
||||||
return
|
return
|
||||||
|
|
||||||
# Check for STOP command: "<botname> stop" or "<botname> STOP"
|
|
||||||
# This immediately stops the bot from speaking and clears the audio queue.
|
|
||||||
botNameLower = self.config.botName.lower()
|
|
||||||
textLower = text.lower()
|
|
||||||
if botNameLower in textLower and "stop" in textLower:
|
|
||||||
logger.info(f"Session {sessionId}: STOP command detected: [{speaker}] {text[:60]}")
|
|
||||||
if websocket:
|
|
||||||
try:
|
|
||||||
await websocket.send_text(json.dumps({
|
|
||||||
"type": "stopAudio",
|
|
||||||
"sessionId": sessionId,
|
|
||||||
}))
|
|
||||||
except Exception as stopErr:
|
|
||||||
logger.warning(f"Failed to send stop command: {stopErr}")
|
|
||||||
# Don't trigger AI analysis for stop commands
|
|
||||||
return
|
|
||||||
|
|
||||||
# Filter out the bot's own speech from AI triggering.
|
# Filter out the bot's own speech from AI triggering.
|
||||||
# The bot hears itself via captions — these should be stored in the
|
# The bot hears itself via captions — these should be stored in the
|
||||||
# transcript for the record, but must NOT trigger AI analysis (feedback loop).
|
# transcript for the record, but must NOT trigger AI analysis (feedback loop).
|
||||||
|
|
@ -571,7 +560,20 @@ class TeamsbotService:
|
||||||
"priceCHF": response.priceCHF,
|
"priceCHF": response.priceCHF,
|
||||||
})
|
})
|
||||||
|
|
||||||
# Step 4: Respond if AI decided to
|
# Step 4a: Handle STOP intent -- stop audio immediately
|
||||||
|
if speechResult.detectedIntent == "stop":
|
||||||
|
logger.info(f"Session {sessionId}: AI detected STOP intent: {speechResult.reasoning}")
|
||||||
|
if websocket:
|
||||||
|
try:
|
||||||
|
await websocket.send_text(json.dumps({
|
||||||
|
"type": "stopAudio",
|
||||||
|
"sessionId": sessionId,
|
||||||
|
}))
|
||||||
|
except Exception as stopErr:
|
||||||
|
logger.warning(f"Failed to send stop command: {stopErr}")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Step 4b: Respond if AI decided to
|
||||||
if speechResult.shouldRespond and speechResult.responseText:
|
if speechResult.shouldRespond and speechResult.responseText:
|
||||||
|
|
||||||
if self.config.responseMode == TeamsbotResponseMode.MANUAL:
|
if self.config.responseMode == TeamsbotResponseMode.MANUAL:
|
||||||
|
|
@ -677,6 +679,42 @@ class TeamsbotService:
|
||||||
# Context Summarization (for long sessions)
|
# Context Summarization (for long sessions)
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
|
|
||||||
|
async def _summarizeSessionContext(self, sessionId: str, rawContext: str) -> str:
|
||||||
|
"""Summarize a long user-provided session context to its essential points.
|
||||||
|
This reduces token usage in every subsequent AI call."""
|
||||||
|
try:
|
||||||
|
from modules.services.serviceAi.mainServiceAi import AiService
|
||||||
|
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum
|
||||||
|
|
||||||
|
serviceContext = _ServiceContext(self.currentUser, self.mandateId, self.instanceId)
|
||||||
|
aiService = AiService(serviceCenter=serviceContext)
|
||||||
|
await aiService.ensureAiObjectsInitialized()
|
||||||
|
|
||||||
|
request = AiCallRequest(
|
||||||
|
prompt=(
|
||||||
|
"Fasse den folgenden Kontext auf die wesentlichen Punkte zusammen. "
|
||||||
|
"Behalte alle wichtigen Fakten, Namen, Zahlen, Entscheidungen und Aktionspunkte. "
|
||||||
|
"Entferne Fuelltext und Wiederholungen. "
|
||||||
|
"Antworte NUR mit der Zusammenfassung, keine Erklaerungen oder Einleitungen."
|
||||||
|
),
|
||||||
|
context=rawContext,
|
||||||
|
options=AiCallOptions(
|
||||||
|
operationType=OperationTypeEnum.DATA_ANALYSE,
|
||||||
|
priority=PriorityEnum.SPEED,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
response = await aiService.callAi(request)
|
||||||
|
if response and response.errorCount == 0 and response.content:
|
||||||
|
summary = response.content.strip()
|
||||||
|
logger.info(f"Session {sessionId}: Context summarized from {len(rawContext)} to {len(summary)} chars")
|
||||||
|
return summary
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Session context summarization failed for {sessionId}: {e}")
|
||||||
|
|
||||||
|
# Fallback: return original (truncated if very long)
|
||||||
|
return rawContext[:2000] if len(rawContext) > 2000 else rawContext
|
||||||
|
|
||||||
async def _summarizeContextBuffer(self, sessionId: str):
|
async def _summarizeContextBuffer(self, sessionId: str):
|
||||||
"""Summarize the older part of the context buffer to preserve information
|
"""Summarize the older part of the context buffer to preserve information
|
||||||
without exceeding the context window. This runs in the background."""
|
without exceeding the context window. This runs in the background."""
|
||||||
|
|
|
||||||
|
|
@ -357,7 +357,13 @@ ANTWORT-STIL (wenn du antwortest):
|
||||||
- NICHT "Ich bin {botName} und ich bin hier um zu helfen" wiederholen
|
- NICHT "Ich bin {botName} und ich bin hier um zu helfen" wiederholen
|
||||||
- NICHT frueheres wiederholen das du schon gesagt hast
|
- NICHT frueheres wiederholen das du schon gesagt hast
|
||||||
- Max 1-2 Saetze, praezise auf den Punkt
|
- Max 1-2 Saetze, praezise auf den Punkt
|
||||||
- Sieh dir an was du (markiert als [YOU]) bereits gesagt hast und wiederhole es NICHT"""
|
- Sieh dir an was du (markiert als [YOU]) bereits gesagt hast und wiederhole es NICHT
|
||||||
|
|
||||||
|
STOP-ERKENNUNG:
|
||||||
|
Wenn jemand dich bittet aufzuhoeren, still zu sein, zu stoppen, oder nicht mehr zu reden
|
||||||
|
(in JEDER Sprache, z.B. "Shelly stop", "Shelly sei still", "Shelly halt", "Shelly be quiet",
|
||||||
|
"Shelly shut up", "Shelly arrete", etc.), dann setze detectedIntent auf "stop" und
|
||||||
|
shouldRespond auf false. Du musst NICHT antworten wenn jemand dich stoppt."""
|
||||||
|
|
||||||
# Append user-configured instructions if provided
|
# Append user-configured instructions if provided
|
||||||
if userSystemPrompt and userSystemPrompt.strip():
|
if userSystemPrompt and userSystemPrompt.strip():
|
||||||
|
|
@ -370,13 +376,14 @@ WICHTIG: Antworte IMMER als valides JSON in exakt diesem Format:
|
||||||
"shouldRespond": true/false,
|
"shouldRespond": true/false,
|
||||||
"responseText": "Deine Antwort hier" oder null,
|
"responseText": "Deine Antwort hier" oder null,
|
||||||
"reasoning": "Kurze Begruendung deiner Entscheidung",
|
"reasoning": "Kurze Begruendung deiner Entscheidung",
|
||||||
"detectedIntent": "addressed" | "question" | "proactive" | "none"
|
"detectedIntent": "addressed" | "question" | "proactive" | "stop" | "none"
|
||||||
}}
|
}}
|
||||||
|
|
||||||
detectedIntent-Werte:
|
detectedIntent-Werte:
|
||||||
- "addressed": {botName} wurde direkt angesprochen
|
- "addressed": {botName} wurde direkt angesprochen
|
||||||
- "question": Eine allgemeine Frage wurde gestellt
|
- "question": Eine allgemeine Frage wurde gestellt
|
||||||
- "proactive": Du hast einen wertvollen proaktiven Beitrag
|
- "proactive": Du hast einen wertvollen proaktiven Beitrag
|
||||||
|
- "stop": Der User bittet {botName} aufzuhoeren/still zu sein (in jeder Sprache)
|
||||||
- "none": Kein Handlungsbedarf"""
|
- "none": Kein Handlungsbedarf"""
|
||||||
|
|
||||||
return basePrompt
|
return basePrompt
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue