Teamsbot: prevent double AI triggers, remove caption text from AI context

Made-with: Cursor
This commit is contained in:
patrick-motsch 2026-02-27 16:40:08 +01:00
parent 275f7bbc66
commit 681c96be8e

View file

@ -78,6 +78,7 @@ class TeamsbotService:
# State
self._lastAiCallTime: float = 0.0
self._aiAnalysisInProgress: bool = False
self._contextBuffer: List[Dict[str, Any]] = []
self._sessionContext: Optional[str] = None # User-provided background context
self._contextSummary: Optional[str] = None # AI-generated summary of long context
@ -553,32 +554,19 @@ class TeamsbotService:
if not text:
return
# Speaker hints are lightweight caption-derived signals used for
# speaker attribution only. Caption text is NOT used as transcript
# (transcript comes from STT/audioCapture or chat).
# For address detection we still allow transient analysis from
# speaker hints (without DB write), otherwise direct calls like
# "Nyla, hörst du mich?" can be missed when audio capture is silent.
# Captions are used ONLY for speaker name resolution (never as transcript).
# Transcript text comes exclusively from audio STT or chat.
# Address detection (bot name in caption) still triggers AI analysis
# using existing audio-based context — but caption text itself is NOT
# added to the context buffer.
if source in ("caption", "speakerHint"):
self._registerSpeakerHint(speaker, text)
# Do NOT emit caption text as transcript to UI; caption is for name resolution only.
if (
source == "speakerHint"
and isFinal
and self.config.responseMode != TeamsbotResponseMode.TRANSCRIBE_ONLY
):
# Keep hint text only in volatile context (not persisted).
self._contextBuffer.append({
"speaker": speaker or "Unknown",
"text": text,
"timestamp": getUtcTimestamp(),
"source": "speakerHint",
})
maxSegments = self.config.contextWindowSegments
if len(self._contextBuffer) > maxSegments:
self._contextBuffer = self._contextBuffer[-maxSegments:]
shouldTriggerFromHint = self._shouldTriggerAnalysis(text, allowPeriodic=False)
logger.debug(
f"Session {sessionId}: speakerHint shouldTriggerAnalysis={shouldTriggerFromHint}, "
@ -586,8 +574,8 @@ class TeamsbotService:
)
if shouldTriggerFromHint:
logger.info(
f"Session {sessionId}: Triggering AI analysis from speakerHint "
f"(buffer: {len(self._contextBuffer)} segments)"
f"Session {sessionId}: Triggering AI analysis from speakerHint address detection "
f"(buffer: {len(self._contextBuffer)} segments, caption text NOT in buffer)"
)
await self._analyzeAndRespond(
sessionId,
@ -832,6 +820,10 @@ class TeamsbotService:
triggerTranscript: Dict[str, Any],
):
"""Run SPEECH_TEAMS AI analysis and respond if needed."""
if self._aiAnalysisInProgress:
logger.info(f"Session {sessionId}: AI analysis already in progress, skipping duplicate trigger")
return
self._aiAnalysisInProgress = True
self._lastAiCallTime = time.time()
# Build transcript context from buffer.
@ -1101,6 +1093,8 @@ class TeamsbotService:
except Exception as e:
logger.error(f"SPEECH_TEAMS analysis failed for session {sessionId}: {type(e).__name__}: {e}", exc_info=True)
await _emitSessionEvent(sessionId, "error", {"message": f"AI analysis failed: {type(e).__name__}: {str(e)}"})
finally:
self._aiAnalysisInProgress = False
# =========================================================================
# AI Command Execution