Teamsbot: handle chatHistory source - store context but skip AI trigger
Made-with: Cursor
This commit is contained in:
parent
21f77d1924
commit
275f7bbc66
1 changed files with 58 additions and 18 deletions
|
|
@ -271,7 +271,10 @@ class TeamsbotService:
|
|||
if msgType == "transcript":
|
||||
transcript = message.get("transcript", {})
|
||||
source = transcript.get("source", "caption")
|
||||
logger.info(f"[WS] Transcript: speaker={transcript.get('speaker')}, text={transcript.get('text', '')[:60]}...")
|
||||
speaker = transcript.get("speaker", "Unknown")
|
||||
textPreview = (transcript.get("text", "") or "")[:60]
|
||||
# Caption/speakerHint: name resolution only; transcript comes from STT
|
||||
logger.info(f"[WS] Transcript (source={source}, speaker={speaker}): {textPreview}...")
|
||||
await self._processTranscript(
|
||||
sessionId=sessionId,
|
||||
speaker=transcript.get("speaker", "Unknown"),
|
||||
|
|
@ -285,7 +288,12 @@ class TeamsbotService:
|
|||
|
||||
elif msgType == "chatMessage":
|
||||
chat = message.get("chat", {})
|
||||
logger.info(f"[WS] Chat: speaker={chat.get('speaker')}, text={chat.get('text', '')[:60]}...")
|
||||
isHistory = chat.get("isHistory", False)
|
||||
source = "chatHistory" if isHistory else "chat"
|
||||
logger.info(
|
||||
f"[WS] Chat{'[HISTORY]' if isHistory else ''}: "
|
||||
f"speaker={chat.get('speaker')}, text={chat.get('text', '')[:60]}..."
|
||||
)
|
||||
await self._processTranscript(
|
||||
sessionId=sessionId,
|
||||
speaker=chat.get("speaker", "Unknown"),
|
||||
|
|
@ -294,7 +302,7 @@ class TeamsbotService:
|
|||
interface=interface,
|
||||
voiceInterface=voiceInterface,
|
||||
websocket=websocket,
|
||||
source="chat",
|
||||
source=source,
|
||||
)
|
||||
|
||||
elif msgType == "status":
|
||||
|
|
@ -466,7 +474,11 @@ class TeamsbotService:
|
|||
text = sttResult["text"].strip()
|
||||
if text:
|
||||
resolvedSpeaker = self._resolveSpeakerForAudioCapture()
|
||||
logger.info(f"[AudioChunk] STT result: {text[:80]}...")
|
||||
fromCaption = resolvedSpeaker.get("speakerResolvedFromHint", False)
|
||||
logger.info(
|
||||
f"[AudioChunk] STT result: speaker={resolvedSpeaker.get('speaker', 'Meeting Audio')} "
|
||||
f"(fromCaption={fromCaption}), text={text[:80]}..."
|
||||
)
|
||||
await self._processTranscript(
|
||||
sessionId=sessionId,
|
||||
speaker=resolvedSpeaker["speaker"],
|
||||
|
|
@ -542,24 +554,14 @@ class TeamsbotService:
|
|||
return
|
||||
|
||||
# Speaker hints are lightweight caption-derived signals used for
|
||||
# speaker attribution. We do NOT persist caption text as transcript.
|
||||
# However, for address detection we still allow transient analysis from
|
||||
# speaker attribution only. Caption text is NOT used as transcript
|
||||
# (transcript comes from STT/audioCapture or chat).
|
||||
# For address detection we still allow transient analysis from
|
||||
# speaker hints (without DB write), otherwise direct calls like
|
||||
# "Nyla, hörst du mich?" can be missed when audio capture is silent.
|
||||
if source in ("caption", "speakerHint"):
|
||||
self._registerSpeakerHint(speaker, text)
|
||||
|
||||
# Emit caption/speakerHint to UI so user sees who spoke (audioCapture alone shows "Meeting Audio")
|
||||
await _emitSessionEvent(sessionId, "transcript", {
|
||||
"id": None,
|
||||
"speaker": speaker or "Unknown",
|
||||
"text": text,
|
||||
"confidence": 1.0,
|
||||
"timestamp": getIsoTimestamp(),
|
||||
"isContinuation": False,
|
||||
"source": source,
|
||||
"speakerResolvedFromHint": False,
|
||||
})
|
||||
# Do NOT emit caption text as transcript to UI; caption is for name resolution only.
|
||||
|
||||
if (
|
||||
source == "speakerHint"
|
||||
|
|
@ -596,6 +598,44 @@ class TeamsbotService:
|
|||
)
|
||||
return
|
||||
|
||||
# Chat history: messages sent before the bot joined the meeting.
|
||||
# Stored in DB and context (available if someone refers to chat history)
|
||||
# but never used to trigger AI responses.
|
||||
if source == "chatHistory":
|
||||
transcriptData = TeamsbotTranscript(
|
||||
sessionId=sessionId,
|
||||
speaker=speaker,
|
||||
text=text,
|
||||
timestamp=getIsoTimestamp(),
|
||||
confidence=1.0,
|
||||
language=self.config.language,
|
||||
isFinal=True,
|
||||
).model_dump()
|
||||
createdTranscript = interface.createTranscript(transcriptData)
|
||||
|
||||
self._contextBuffer.append({
|
||||
"speaker": speaker or "Unknown",
|
||||
"text": text,
|
||||
"timestamp": getUtcTimestamp(),
|
||||
"source": "chatHistory",
|
||||
})
|
||||
maxSegments = self.config.contextWindowSegments
|
||||
if len(self._contextBuffer) > maxSegments:
|
||||
self._contextBuffer = self._contextBuffer[-maxSegments:]
|
||||
|
||||
await _emitSessionEvent(sessionId, "transcript", {
|
||||
"id": createdTranscript.get("id"),
|
||||
"speaker": speaker,
|
||||
"text": text,
|
||||
"confidence": 1.0,
|
||||
"timestamp": getIsoTimestamp(),
|
||||
"isContinuation": False,
|
||||
"source": "chatHistory",
|
||||
"isHistory": True,
|
||||
})
|
||||
logger.debug(f"Session {sessionId}: Chat history stored (no AI trigger): [{speaker}] {text[:60]}")
|
||||
return
|
||||
|
||||
# Filter out the bot's own speech entirely — captions of the bot's
|
||||
# own voice come back as garbled text (e.g. German TTS → English caption)
|
||||
# which pollutes the context buffer and confuses AI analysis.
|
||||
|
|
|
|||
Loading…
Reference in a new issue