Allow speaker-hint captions to trigger AI without transcript persistence.

Keep aggressive hybrid persistence rules, but use final speaker hints as transient context to react to direct bot addressing even when audio capture is silent.

Made-with: Cursor
This commit is contained in:
patrick-motsch 2026-02-26 21:41:56 +01:00
parent fe1a97564b
commit 25854edb4d

View file

@ -524,11 +524,47 @@ class TeamsbotService:
if not text: if not text:
return return
# Speaker hints are lightweight caption-derived signals used only to # Speaker hints are lightweight caption-derived signals used for
# attribute audio-stream STT to likely speakers. They are not persisted. # speaker attribution. We do NOT persist caption text as transcript.
# However, for address detection we still allow transient analysis from
# speaker hints (without DB write), otherwise direct calls like
# "Nyla, hörst du mich?" can be missed when audio capture is silent.
if source in ("caption", "speakerHint"): if source in ("caption", "speakerHint"):
self._registerSpeakerHint(speaker, text) self._registerSpeakerHint(speaker, text)
if source in ("caption", "speakerHint"):
if (
source == "speakerHint"
and isFinal
and self.config.responseMode != TeamsbotResponseMode.TRANSCRIBE_ONLY
):
# Keep hint text only in volatile context (not persisted).
self._contextBuffer.append({
"speaker": speaker or "Unknown",
"text": text,
"timestamp": getUtcTimestamp(),
"source": "speakerHint",
})
maxSegments = self.config.contextWindowSegments
if len(self._contextBuffer) > maxSegments:
self._contextBuffer = self._contextBuffer[-maxSegments:]
shouldTriggerFromHint = self._shouldTriggerAnalysis(text, allowPeriodic=False)
logger.debug(
f"Session {sessionId}: speakerHint shouldTriggerAnalysis={shouldTriggerFromHint}, "
f"bufferSize={len(self._contextBuffer)}"
)
if shouldTriggerFromHint:
logger.info(
f"Session {sessionId}: Triggering AI analysis from speakerHint "
f"(buffer: {len(self._contextBuffer)} segments)"
)
await self._analyzeAndRespond(
sessionId,
interface,
voiceInterface,
websocket,
{"id": None, "speaker": speaker, "text": text, "source": source},
)
return return
# Filter out the bot's own speech entirely — captions of the bot's # Filter out the bot's own speech entirely — captions of the bot's
@ -667,7 +703,7 @@ class TeamsbotService:
return False return False
def _shouldTriggerAnalysis(self, transcriptText: str) -> bool: def _shouldTriggerAnalysis(self, transcriptText: str, allowPeriodic: bool = True) -> bool:
""" """
Decide whether to trigger AI analysis based on the latest transcript. Decide whether to trigger AI analysis based on the latest transcript.
Triggers: Triggers:
@ -711,7 +747,7 @@ class TeamsbotService:
return False return False
# Periodic trigger # Periodic trigger
if timeSinceLastCall >= self.config.triggerIntervalSeconds: if allowPeriodic and timeSinceLastCall >= self.config.triggerIntervalSeconds:
logger.info(f"Trigger: Periodic interval ({self.config.triggerIntervalSeconds}s) elapsed ({timeSinceLastCall:.1f}s since last call)") logger.info(f"Trigger: Periodic interval ({self.config.triggerIntervalSeconds}s) elapsed ({timeSinceLastCall:.1f}s since last call)")
return True return True