From 275f7bbc66d452a7acaebeeaf4c1c22645c6cb8f Mon Sep 17 00:00:00 2001
From: patrick-motsch
Date: Fri, 27 Feb 2026 13:56:46 +0100
Subject: [PATCH] Teamsbot: handle chatHistory source - store context but skip
AI trigger
Made-with: Cursor
---
modules/features/teamsbot/service.py | 76 +++++++++++++++++++++-------
1 file changed, 58 insertions(+), 18 deletions(-)
diff --git a/modules/features/teamsbot/service.py b/modules/features/teamsbot/service.py
index bce4edb0..2408f291 100644
--- a/modules/features/teamsbot/service.py
+++ b/modules/features/teamsbot/service.py
@@ -271,7 +271,10 @@ class TeamsbotService:
if msgType == "transcript":
transcript = message.get("transcript", {})
source = transcript.get("source", "caption")
- logger.info(f"[WS] Transcript: speaker={transcript.get('speaker')}, text={transcript.get('text', '')[:60]}...")
+ speaker = transcript.get("speaker", "Unknown")
+ textPreview = (transcript.get("text", "") or "")[:60]
+ # Caption/speakerHint: name resolution only; transcript comes from STT
+ logger.info(f"[WS] Transcript (source={source}, speaker={speaker}): {textPreview}...")
await self._processTranscript(
sessionId=sessionId,
speaker=transcript.get("speaker", "Unknown"),
@@ -285,7 +288,12 @@ class TeamsbotService:
elif msgType == "chatMessage":
chat = message.get("chat", {})
- logger.info(f"[WS] Chat: speaker={chat.get('speaker')}, text={chat.get('text', '')[:60]}...")
+ isHistory = chat.get("isHistory", False)
+ source = "chatHistory" if isHistory else "chat"
+ logger.info(
+ f"[WS] Chat{'[HISTORY]' if isHistory else ''}: "
+ f"speaker={chat.get('speaker')}, text={chat.get('text', '')[:60]}..."
+ )
await self._processTranscript(
sessionId=sessionId,
speaker=chat.get("speaker", "Unknown"),
@@ -294,7 +302,7 @@ class TeamsbotService:
interface=interface,
voiceInterface=voiceInterface,
websocket=websocket,
- source="chat",
+ source=source,
)
elif msgType == "status":
@@ -466,7 +474,11 @@ class TeamsbotService:
text = sttResult["text"].strip()
if text:
resolvedSpeaker = self._resolveSpeakerForAudioCapture()
- logger.info(f"[AudioChunk] STT result: {text[:80]}...")
+ fromCaption = resolvedSpeaker.get("speakerResolvedFromHint", False)
+ logger.info(
+ f"[AudioChunk] STT result: speaker={resolvedSpeaker.get('speaker', 'Meeting Audio')} "
+ f"(fromCaption={fromCaption}), text={text[:80]}..."
+ )
await self._processTranscript(
sessionId=sessionId,
speaker=resolvedSpeaker["speaker"],
@@ -542,24 +554,14 @@ class TeamsbotService:
return
# Speaker hints are lightweight caption-derived signals used for
- # speaker attribution. We do NOT persist caption text as transcript.
- # However, for address detection we still allow transient analysis from
+ # speaker attribution only. Caption text is NOT used as transcript
+ # (transcript comes from STT/audioCapture or chat).
+ # For address detection we still allow transient analysis from
# speaker hints (without DB write), otherwise direct calls like
# "Nyla, hörst du mich?" can be missed when audio capture is silent.
if source in ("caption", "speakerHint"):
self._registerSpeakerHint(speaker, text)
-
- # Emit caption/speakerHint to UI so user sees who spoke (audioCapture alone shows "Meeting Audio")
- await _emitSessionEvent(sessionId, "transcript", {
- "id": None,
- "speaker": speaker or "Unknown",
- "text": text,
- "confidence": 1.0,
- "timestamp": getIsoTimestamp(),
- "isContinuation": False,
- "source": source,
- "speakerResolvedFromHint": False,
- })
+ # Do NOT emit caption text as transcript to UI; caption is for name resolution only.
if (
source == "speakerHint"
@@ -596,6 +598,44 @@ class TeamsbotService:
)
return
+ # Chat history: messages sent before the bot joined the meeting.
+ # Stored in DB and context (available if someone refers to chat history)
+ # but never used to trigger AI responses.
+ if source == "chatHistory":
+ transcriptData = TeamsbotTranscript(
+ sessionId=sessionId,
+ speaker=speaker,
+ text=text,
+ timestamp=getIsoTimestamp(),
+ confidence=1.0,
+ language=self.config.language,
+ isFinal=True,
+ ).model_dump()
+ createdTranscript = interface.createTranscript(transcriptData)
+
+ self._contextBuffer.append({
+ "speaker": speaker or "Unknown",
+ "text": text,
+ "timestamp": getUtcTimestamp(),
+ "source": "chatHistory",
+ })
+ maxSegments = self.config.contextWindowSegments
+ if len(self._contextBuffer) > maxSegments:
+ self._contextBuffer = self._contextBuffer[-maxSegments:]
+
+ await _emitSessionEvent(sessionId, "transcript", {
+ "id": createdTranscript.get("id"),
+ "speaker": speaker,
+ "text": text,
+ "confidence": 1.0,
+ "timestamp": getIsoTimestamp(),
+ "isContinuation": False,
+ "source": "chatHistory",
+ "isHistory": True,
+ })
+ logger.debug(f"Session {sessionId}: Chat history stored (no AI trigger): [{speaker}] {text[:60]}")
+ return
+
# Filter out the bot's own speech entirely — captions of the bot's
# own voice come back as garbled text (e.g. German TTS → English caption)
# which pollutes the context buffer and confuses AI analysis.