fix(teamsbot): filter bot's own captions from AI trigger, mark bot responses in context to prevent repetition
Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
parent
33b6da0d18
commit
cc86b144ac
1 changed files with 42 additions and 2 deletions
|
|
@ -307,6 +307,11 @@ class TeamsbotService:
|
||||||
if not text:
|
if not text:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
# Filter out the bot's own speech from AI triggering.
|
||||||
|
# The bot hears itself via captions — these should be stored in the
|
||||||
|
# transcript for the record, but must NOT trigger AI analysis (feedback loop).
|
||||||
|
isBotSpeaker = self._isBotSpeaker(speaker)
|
||||||
|
|
||||||
# Store transcript segment
|
# Store transcript segment
|
||||||
transcriptData = TeamsbotTranscript(
|
transcriptData = TeamsbotTranscript(
|
||||||
sessionId=sessionId,
|
sessionId=sessionId,
|
||||||
|
|
@ -346,6 +351,11 @@ class TeamsbotService:
|
||||||
count = session.get("transcriptSegmentCount", 0) + 1
|
count = session.get("transcriptSegmentCount", 0) + 1
|
||||||
interface.updateSession(sessionId, {"transcriptSegmentCount": count})
|
interface.updateSession(sessionId, {"transcriptSegmentCount": count})
|
||||||
|
|
||||||
|
# Skip AI analysis for bot's own speech (prevents feedback loop)
|
||||||
|
if isBotSpeaker:
|
||||||
|
logger.debug(f"Session {sessionId}: Skipping AI trigger for bot's own speech: [{speaker}] {text[:60]}...")
|
||||||
|
return
|
||||||
|
|
||||||
# Check if AI analysis should be triggered (only for final transcripts)
|
# Check if AI analysis should be triggered (only for final transcripts)
|
||||||
if not isFinal:
|
if not isFinal:
|
||||||
return
|
return
|
||||||
|
|
@ -364,6 +374,32 @@ class TeamsbotService:
|
||||||
logger.info(f"Session {sessionId}: Triggering AI analysis (buffer: {len(self._contextBuffer)} segments)")
|
logger.info(f"Session {sessionId}: Triggering AI analysis (buffer: {len(self._contextBuffer)} segments)")
|
||||||
await self._analyzeAndRespond(sessionId, interface, voiceInterface, websocket, createdTranscript)
|
await self._analyzeAndRespond(sessionId, interface, voiceInterface, websocket, createdTranscript)
|
||||||
|
|
||||||
|
def _isBotSpeaker(self, speaker: str) -> bool:
|
||||||
|
"""Check if a transcript speaker is the bot itself.
|
||||||
|
|
||||||
|
Teams captions show the bot as e.g. "Shelly Miller (Unverified)" or
|
||||||
|
"Nyla Larsson" depending on auth/anonymous join. We match against:
|
||||||
|
- The configured bot name (e.g. "Shelly Miller")
|
||||||
|
- The bot account display name if authenticated
|
||||||
|
"""
|
||||||
|
if not speaker:
|
||||||
|
return False
|
||||||
|
|
||||||
|
speakerLower = speaker.lower().strip()
|
||||||
|
|
||||||
|
# Match against configured bot name
|
||||||
|
botName = self.config.botName.lower().strip()
|
||||||
|
if botName and botName in speakerLower:
|
||||||
|
return True
|
||||||
|
|
||||||
|
# Match against bot account email prefix (e.g. "nyla.larsson" from "nyla.larsson@poweron.swiss")
|
||||||
|
if self.config.botAccountEmail:
|
||||||
|
emailPrefix = self.config.botAccountEmail.split("@")[0].lower().replace(".", " ")
|
||||||
|
if emailPrefix in speakerLower:
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
def _shouldTriggerAnalysis(self, transcriptText: str) -> bool:
|
def _shouldTriggerAnalysis(self, transcriptText: str) -> bool:
|
||||||
"""
|
"""
|
||||||
Decide whether to trigger AI analysis based on the latest transcript.
|
Decide whether to trigger AI analysis based on the latest transcript.
|
||||||
|
|
@ -405,12 +441,16 @@ class TeamsbotService:
|
||||||
"""Run SPEECH_TEAMS AI analysis and respond if needed."""
|
"""Run SPEECH_TEAMS AI analysis and respond if needed."""
|
||||||
self._lastAiCallTime = time.time()
|
self._lastAiCallTime = time.time()
|
||||||
|
|
||||||
# Build transcript context from buffer
|
# Build transcript context from buffer.
|
||||||
|
# Mark bot's own utterances so the AI knows what it already said.
|
||||||
contextLines = []
|
contextLines = []
|
||||||
for segment in self._contextBuffer:
|
for segment in self._contextBuffer:
|
||||||
speaker = segment.get("speaker", "Unknown")
|
speaker = segment.get("speaker", "Unknown")
|
||||||
text = segment.get("text", "")
|
text = segment.get("text", "")
|
||||||
contextLines.append(f"[{speaker}]: {text}")
|
if self._isBotSpeaker(speaker):
|
||||||
|
contextLines.append(f"[YOU ({self.config.botName})]: {text}")
|
||||||
|
else:
|
||||||
|
contextLines.append(f"[{speaker}]: {text}")
|
||||||
|
|
||||||
transcriptContext = f"BOT_NAME:{self.config.botName}\n" + "\n".join(contextLines)
|
transcriptContext = f"BOT_NAME:{self.config.botName}\n" + "\n".join(contextLines)
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue