diff --git a/modules/features/teamsbot/service.py b/modules/features/teamsbot/service.py index c92756fc..503cf347 100644 --- a/modules/features/teamsbot/service.py +++ b/modules/features/teamsbot/service.py @@ -87,6 +87,7 @@ class TeamsbotService: self._lastTranscriptSpeaker: Optional[str] = None self._lastTranscriptText: Optional[str] = None self._lastTranscriptId: Optional[str] = None + self._lastSttTime: float = 0.0 self._lastBotResponseText: Optional[str] = None self._lastBotResponseTs: float = 0.0 @@ -623,12 +624,14 @@ class TeamsbotService: # Differential transcript writing: # audioCapture from same speaker → append text (merge STT chunks into one block) - # other sources → always create a new record + # Start a new block after a pause (>5s gap between STT results) + sttPauseThreshold = 5.0 isMerge = ( source == "audioCapture" and self._lastTranscriptSpeaker == speaker and self._lastTranscriptText is not None and self._lastTranscriptId is not None + and (time.time() - self._lastSttTime) < sttPauseThreshold ) if isMerge: @@ -680,6 +683,9 @@ class TeamsbotService: count = session.get("transcriptSegmentCount", 0) + 1 interface.updateSession(sessionId, {"transcriptSegmentCount": count}) + if source == "audioCapture": + self._lastSttTime = time.time() + displayText = self._lastTranscriptText if isMerge else text await _emitSessionEvent(sessionId, "transcript", { "id": createdTranscript.get("id"),