From 275f7bbc66d452a7acaebeeaf4c1c22645c6cb8f Mon Sep 17 00:00:00 2001
From: patrick-motsch <p.motsch@valueon.ch>
Date: Fri, 27 Feb 2026 13:56:46 +0100
Subject: [PATCH] Teamsbot: handle chatHistory source - store context but skip
 AI trigger

Made-with: Cursor
---
 modules/features/teamsbot/service.py | 76 +++++++++++++++++++++-------
 1 file changed, 58 insertions(+), 18 deletions(-)

diff --git a/modules/features/teamsbot/service.py b/modules/features/teamsbot/service.py
index bce4edb0..2408f291 100644
--- a/modules/features/teamsbot/service.py
+++ b/modules/features/teamsbot/service.py
@@ -271,7 +271,10 @@ class TeamsbotService:
                 if msgType == "transcript":
                     transcript = message.get("transcript", {})
                     source = transcript.get("source", "caption")
-                    logger.info(f"[WS] Transcript: speaker={transcript.get('speaker')}, text={transcript.get('text', '')[:60]}...")
+                    speaker = transcript.get("speaker", "Unknown")
+                    textPreview = (transcript.get("text", "") or "")[:60]
+                    # Caption/speakerHint: name resolution only; transcript comes from STT
+                    logger.info(f"[WS] Transcript (source={source}, speaker={speaker}): {textPreview}...")
                     await self._processTranscript(
                         sessionId=sessionId,
                         speaker=transcript.get("speaker", "Unknown"),
@@ -285,7 +288,12 @@ class TeamsbotService:
 
                 elif msgType == "chatMessage":
                     chat = message.get("chat", {})
-                    logger.info(f"[WS] Chat: speaker={chat.get('speaker')}, text={chat.get('text', '')[:60]}...")
+                    isHistory = chat.get("isHistory", False)
+                    source = "chatHistory" if isHistory else "chat"
+                    logger.info(
+                        f"[WS] Chat{'[HISTORY]' if isHistory else ''}: "
+                        f"speaker={chat.get('speaker')}, text={chat.get('text', '')[:60]}..."
+                    )
                     await self._processTranscript(
                         sessionId=sessionId,
                         speaker=chat.get("speaker", "Unknown"),
@@ -294,7 +302,7 @@ class TeamsbotService:
                         interface=interface,
                         voiceInterface=voiceInterface,
                         websocket=websocket,
-                        source="chat",
+                        source=source,
                     )
 
                 elif msgType == "status":
@@ -466,7 +474,11 @@ class TeamsbotService:
                 text = sttResult["text"].strip()
                 if text:
                     resolvedSpeaker = self._resolveSpeakerForAudioCapture()
-                    logger.info(f"[AudioChunk] STT result: {text[:80]}...")
+                    fromCaption = resolvedSpeaker.get("speakerResolvedFromHint", False)
+                    logger.info(
+                        f"[AudioChunk] STT result: speaker={resolvedSpeaker.get('speaker', 'Meeting Audio')} "
+                        f"(fromCaption={fromCaption}), text={text[:80]}..."
+                    )
                     await self._processTranscript(
                         sessionId=sessionId,
                         speaker=resolvedSpeaker["speaker"],
@@ -542,24 +554,14 @@ class TeamsbotService:
             return
 
         # Speaker hints are lightweight caption-derived signals used for
-        # speaker attribution. We do NOT persist caption text as transcript.
-        # However, for address detection we still allow transient analysis from
+        # speaker attribution only. Caption text is NOT used as transcript
+        # (transcript comes from STT/audioCapture or chat).
+        # For address detection we still allow transient analysis from
         # speaker hints (without DB write), otherwise direct calls like
         # "Nyla, hörst du mich?" can be missed when audio capture is silent.
         if source in ("caption", "speakerHint"):
             self._registerSpeakerHint(speaker, text)
-
-            # Emit caption/speakerHint to UI so user sees who spoke (audioCapture alone shows "Meeting Audio")
-            await _emitSessionEvent(sessionId, "transcript", {
-                "id": None,
-                "speaker": speaker or "Unknown",
-                "text": text,
-                "confidence": 1.0,
-                "timestamp": getIsoTimestamp(),
-                "isContinuation": False,
-                "source": source,
-                "speakerResolvedFromHint": False,
-            })
+            # Do NOT emit caption text as transcript to UI; caption is for name resolution only.
 
             if (
                 source == "speakerHint"
@@ -596,6 +598,44 @@ class TeamsbotService:
                     )
             return
 
+        # Chat history: messages sent before the bot joined the meeting.
+        # Stored in DB and context (available if someone refers to chat history)
+        # but never used to trigger AI responses.
+        if source == "chatHistory":
+            transcriptData = TeamsbotTranscript(
+                sessionId=sessionId,
+                speaker=speaker,
+                text=text,
+                timestamp=getIsoTimestamp(),
+                confidence=1.0,
+                language=self.config.language,
+                isFinal=True,
+            ).model_dump()
+            createdTranscript = interface.createTranscript(transcriptData)
+
+            self._contextBuffer.append({
+                "speaker": speaker or "Unknown",
+                "text": text,
+                "timestamp": getUtcTimestamp(),
+                "source": "chatHistory",
+            })
+            maxSegments = self.config.contextWindowSegments
+            if len(self._contextBuffer) > maxSegments:
+                self._contextBuffer = self._contextBuffer[-maxSegments:]
+
+            await _emitSessionEvent(sessionId, "transcript", {
+                "id": createdTranscript.get("id"),
+                "speaker": speaker,
+                "text": text,
+                "confidence": 1.0,
+                "timestamp": getIsoTimestamp(),
+                "isContinuation": False,
+                "source": "chatHistory",
+                "isHistory": True,
+            })
+            logger.debug(f"Session {sessionId}: Chat history stored (no AI trigger): [{speaker}] {text[:60]}")
+            return
+
         # Filter out the bot's own speech entirely — captions of the bot's
         # own voice come back as garbled text (e.g. German TTS → English caption)
         # which pollutes the context buffer and confuses AI analysis.