Teamsbot: prevent double AI triggers, remove caption text from AI context
Made-with: Cursor
This commit is contained in:
parent
275f7bbc66
commit
681c96be8e
1 changed files with 14 additions and 20 deletions
|
|
@ -78,6 +78,7 @@ class TeamsbotService:
|
|||
|
||||
# State
|
||||
self._lastAiCallTime: float = 0.0
|
||||
self._aiAnalysisInProgress: bool = False
|
||||
self._contextBuffer: List[Dict[str, Any]] = []
|
||||
self._sessionContext: Optional[str] = None # User-provided background context
|
||||
self._contextSummary: Optional[str] = None # AI-generated summary of long context
|
||||
|
|
@ -553,32 +554,19 @@ class TeamsbotService:
|
|||
if not text:
|
||||
return
|
||||
|
||||
# Speaker hints are lightweight caption-derived signals used for
|
||||
# speaker attribution only. Caption text is NOT used as transcript
|
||||
# (transcript comes from STT/audioCapture or chat).
|
||||
# For address detection we still allow transient analysis from
|
||||
# speaker hints (without DB write), otherwise direct calls like
|
||||
# "Nyla, hörst du mich?" can be missed when audio capture is silent.
|
||||
# Captions are used ONLY for speaker name resolution (never as transcript).
|
||||
# Transcript text comes exclusively from audio STT or chat.
|
||||
# Address detection (bot name in caption) still triggers AI analysis
|
||||
# using existing audio-based context — but caption text itself is NOT
|
||||
# added to the context buffer.
|
||||
if source in ("caption", "speakerHint"):
|
||||
self._registerSpeakerHint(speaker, text)
|
||||
# Do NOT emit caption text as transcript to UI; caption is for name resolution only.
|
||||
|
||||
if (
|
||||
source == "speakerHint"
|
||||
and isFinal
|
||||
and self.config.responseMode != TeamsbotResponseMode.TRANSCRIBE_ONLY
|
||||
):
|
||||
# Keep hint text only in volatile context (not persisted).
|
||||
self._contextBuffer.append({
|
||||
"speaker": speaker or "Unknown",
|
||||
"text": text,
|
||||
"timestamp": getUtcTimestamp(),
|
||||
"source": "speakerHint",
|
||||
})
|
||||
maxSegments = self.config.contextWindowSegments
|
||||
if len(self._contextBuffer) > maxSegments:
|
||||
self._contextBuffer = self._contextBuffer[-maxSegments:]
|
||||
|
||||
shouldTriggerFromHint = self._shouldTriggerAnalysis(text, allowPeriodic=False)
|
||||
logger.debug(
|
||||
f"Session {sessionId}: speakerHint shouldTriggerAnalysis={shouldTriggerFromHint}, "
|
||||
|
|
@ -586,8 +574,8 @@ class TeamsbotService:
|
|||
)
|
||||
if shouldTriggerFromHint:
|
||||
logger.info(
|
||||
f"Session {sessionId}: Triggering AI analysis from speakerHint "
|
||||
f"(buffer: {len(self._contextBuffer)} segments)"
|
||||
f"Session {sessionId}: Triggering AI analysis from speakerHint address detection "
|
||||
f"(buffer: {len(self._contextBuffer)} segments, caption text NOT in buffer)"
|
||||
)
|
||||
await self._analyzeAndRespond(
|
||||
sessionId,
|
||||
|
|
@ -832,6 +820,10 @@ class TeamsbotService:
|
|||
triggerTranscript: Dict[str, Any],
|
||||
):
|
||||
"""Run SPEECH_TEAMS AI analysis and respond if needed."""
|
||||
if self._aiAnalysisInProgress:
|
||||
logger.info(f"Session {sessionId}: AI analysis already in progress, skipping duplicate trigger")
|
||||
return
|
||||
self._aiAnalysisInProgress = True
|
||||
self._lastAiCallTime = time.time()
|
||||
|
||||
# Build transcript context from buffer.
|
||||
|
|
@ -1101,6 +1093,8 @@ class TeamsbotService:
|
|||
except Exception as e:
|
||||
logger.error(f"SPEECH_TEAMS analysis failed for session {sessionId}: {type(e).__name__}: {e}", exc_info=True)
|
||||
await _emitSessionEvent(sessionId, "error", {"message": f"AI analysis failed: {type(e).__name__}: {str(e)}"})
|
||||
finally:
|
||||
self._aiAnalysisInProgress = False
|
||||
|
||||
# =========================================================================
|
||||
# AI Command Execution
|
||||
|
|
|
|||
Loading…
Reference in a new issue