feat: add speaker-hint debug flag and improve TTS diagnostics
Made-with: Cursor
This commit is contained in:
parent
0cd8e9ebfa
commit
02002f3576
1 changed files with 44 additions and 19 deletions
|
|
@ -444,13 +444,14 @@ class TeamsbotService:
|
|||
logger.info(f"[AudioChunk] STT result: {text[:80]}...")
|
||||
await self._processTranscript(
|
||||
sessionId=sessionId,
|
||||
speaker=resolvedSpeaker,
|
||||
speaker=resolvedSpeaker["speaker"],
|
||||
text=text,
|
||||
isFinal=True,
|
||||
interface=interface,
|
||||
voiceInterface=voiceInterface,
|
||||
websocket=websocket,
|
||||
source="audioCapture",
|
||||
speakerResolvedFromHint=resolvedSpeaker["speakerResolvedFromHint"],
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"[AudioChunk] STT error for session {sessionId}: {type(e).__name__}: {e}")
|
||||
|
|
@ -473,19 +474,22 @@ class TeamsbotService:
|
|||
if len(self._recentSpeakerHints) > 20:
|
||||
self._recentSpeakerHints = self._recentSpeakerHints[-20:]
|
||||
|
||||
def _resolveSpeakerForAudioCapture(self) -> str:
|
||||
def _resolveSpeakerForAudioCapture(self) -> Dict[str, Any]:
|
||||
"""Best-effort speaker name for audio chunks using recent caption hints."""
|
||||
if not self._recentSpeakerHints:
|
||||
return "Meeting Audio"
|
||||
return {"speaker": "Meeting Audio", "speakerResolvedFromHint": False}
|
||||
|
||||
nowTs = time.time()
|
||||
# Prefer very recent hints to reduce wrong attribution
|
||||
for hint in reversed(self._recentSpeakerHints):
|
||||
hintAge = nowTs - hint.get("timestamp", 0)
|
||||
if hintAge <= 15:
|
||||
return hint.get("speaker", "Meeting Audio")
|
||||
return {
|
||||
"speaker": hint.get("speaker", "Meeting Audio"),
|
||||
"speakerResolvedFromHint": True,
|
||||
}
|
||||
|
||||
return "Meeting Audio"
|
||||
return {"speaker": "Meeting Audio", "speakerResolvedFromHint": False}
|
||||
|
||||
async def _processTranscript(
|
||||
self,
|
||||
|
|
@ -497,6 +501,7 @@ class TeamsbotService:
|
|||
voiceInterface,
|
||||
websocket: WebSocket,
|
||||
source: str = "caption",
|
||||
speakerResolvedFromHint: Optional[bool] = None,
|
||||
):
|
||||
"""Process a transcript segment from captions or chat messages.
|
||||
|
||||
|
|
@ -601,6 +606,12 @@ class TeamsbotService:
|
|||
"confidence": 1.0,
|
||||
"timestamp": getIsoTimestamp(),
|
||||
"isContinuation": isContinuation,
|
||||
"source": source,
|
||||
"speakerResolvedFromHint": (
|
||||
speakerResolvedFromHint
|
||||
if speakerResolvedFromHint is not None
|
||||
else False
|
||||
),
|
||||
})
|
||||
|
||||
# Check if AI analysis should be triggered (only for final transcripts)
|
||||
|
|
@ -837,15 +848,26 @@ class TeamsbotService:
|
|||
# 4a: Voice response (TTS -> Audio to bot)
|
||||
if sendVoice:
|
||||
try:
|
||||
logger.info(
|
||||
f"Session {sessionId}: TTS requested (websocket_available={websocket is not None})"
|
||||
)
|
||||
ttsResult = await voiceInterface.textToSpeech(
|
||||
text=speechResult.responseText,
|
||||
languageCode=self.config.language,
|
||||
voiceName=self.config.voiceId
|
||||
)
|
||||
|
||||
if ttsResult and isinstance(ttsResult, dict):
|
||||
if not ttsResult or not isinstance(ttsResult, dict):
|
||||
raise RuntimeError("TTS returned invalid result payload")
|
||||
|
||||
if ttsResult.get("success") is False:
|
||||
raise RuntimeError(f"TTS backend error: {ttsResult.get('error', 'unknown')}")
|
||||
|
||||
audioContent = ttsResult.get("audioContent")
|
||||
if audioContent and websocket:
|
||||
if not audioContent:
|
||||
raise RuntimeError("TTS returned no audioContent")
|
||||
|
||||
if websocket:
|
||||
await websocket.send_text(json.dumps({
|
||||
"type": "playAudio",
|
||||
"sessionId": sessionId,
|
||||
|
|
@ -854,8 +876,11 @@ class TeamsbotService:
|
|||
"format": "mp3",
|
||||
},
|
||||
}))
|
||||
elif audioContent and not websocket:
|
||||
logger.info(f"TTS audio generated for session {sessionId} (HTTP mode - no WebSocket for playback)")
|
||||
logger.info(f"Session {sessionId}: TTS audio dispatched to bot")
|
||||
else:
|
||||
logger.warning(
|
||||
f"Session {sessionId}: TTS audio generated but cannot be played (bot websocket unavailable, likely fallback mode)"
|
||||
)
|
||||
except Exception as ttsErr:
|
||||
logger.warning(f"TTS failed for session {sessionId}: {ttsErr}")
|
||||
if not sendChat:
|
||||
|
|
|
|||
Loading…
Reference in a new issue