Voice: play TTS in correct frame (iframe), add mic toggle fallbacks
Made-with: Cursor
This commit is contained in:
parent
35852f691b
commit
533e976039
2 changed files with 69 additions and 8 deletions
|
|
@ -92,6 +92,41 @@ export class AudioProcedure {
|
||||||
this._logger.info('Audio getUserMedia override injected');
|
this._logger.info('Audio getUserMedia override injected');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Find the frame whose MediaStreamDestination track is used by the RTCPeerConnection.
|
||||||
|
* Teams meeting often runs in an iframe; page.evaluate runs in main frame, so we'd
|
||||||
|
* play into the wrong streamDest. Returns the frame to use, or null for main page.
|
||||||
|
*/
|
||||||
|
private async _getTtsFrame(): Promise<{ evaluate: typeof this._page.evaluate } | null> {
|
||||||
|
const frames = this._page.frames();
|
||||||
|
for (const frame of frames) {
|
||||||
|
try {
|
||||||
|
const match = await frame.evaluate(() => {
|
||||||
|
const pcs = ((window as any).__audioCapturePeerConnections || []) as RTCPeerConnection[];
|
||||||
|
const streamDest = (window as any).__ttsStreamDest as MediaStreamAudioDestinationNode;
|
||||||
|
if (!pcs.length || !streamDest) return false;
|
||||||
|
const ttsTrackId = streamDest.stream.getAudioTracks()[0]?.id;
|
||||||
|
if (!ttsTrackId) return false;
|
||||||
|
for (const pc of pcs) {
|
||||||
|
const senders = pc.getSenders?.() || [];
|
||||||
|
for (const s of senders) {
|
||||||
|
if (s?.track?.kind === 'audio' && s.track.id === ttsTrackId) return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
});
|
||||||
|
if (match) {
|
||||||
|
this._logger.info(`[Voice] Using frame for TTS (track match): ${frame.url().substring(0, 80)}`);
|
||||||
|
return frame;
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// Frame may be detached
|
||||||
|
}
|
||||||
|
}
|
||||||
|
this._logger.warn('[Voice] No frame with matching TTS track; using main page (voice may not reach participants)');
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Initialize the audio context in the browser for TTS playback.
|
* Initialize the audio context in the browser for TTS playback.
|
||||||
* Must be called after joining the meeting (user gesture context).
|
* Must be called after joining the meeting (user gesture context).
|
||||||
|
|
@ -198,6 +233,8 @@ export class AudioProcedure {
|
||||||
/**
|
/**
|
||||||
* Internal: Play audio in the browser (single clip, no queuing).
|
* Internal: Play audio in the browser (single clip, no queuing).
|
||||||
* Audio is piped into the MediaStreamDestination that Teams uses as mic input.
|
* Audio is piped into the MediaStreamDestination that Teams uses as mic input.
|
||||||
|
* Teams meeting may run in an iframe; we must play in the frame that has the
|
||||||
|
* RTCPeerConnection (otherwise trackMatch=false, voice does not reach participants).
|
||||||
*/
|
*/
|
||||||
private async _playAudioInternal(audioData: string, format: 'mp3' | 'wav' | 'pcm'): Promise<void> {
|
private async _playAudioInternal(audioData: string, format: 'mp3' | 'wav' | 'pcm'): Promise<void> {
|
||||||
if (!this._audioContext) {
|
if (!this._audioContext) {
|
||||||
|
|
@ -206,8 +243,11 @@ export class AudioProcedure {
|
||||||
|
|
||||||
this._logger.info(`Playing audio (format: ${format}, size: ${audioData.length} bytes base64)`);
|
this._logger.info(`Playing audio (format: ${format}, size: ${audioData.length} bytes base64)`);
|
||||||
|
|
||||||
|
const targetFrame = await this._getTtsFrame();
|
||||||
|
const evalTarget = targetFrame || this._page;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const playbackDiag = await this._page.evaluate(async ({ audioData, format }) => {
|
const playbackDiag = await evalTarget.evaluate(async ({ audioData, format }) => {
|
||||||
const ctx = (window as any).__ttsAudioContext as AudioContext;
|
const ctx = (window as any).__ttsAudioContext as AudioContext;
|
||||||
const streamDest = (window as any).__ttsStreamDest as MediaStreamAudioDestinationNode;
|
const streamDest = (window as any).__ttsStreamDest as MediaStreamAudioDestinationNode;
|
||||||
const pcs = ((window as any).__audioCapturePeerConnections || []) as RTCPeerConnection[];
|
const pcs = ((window as any).__audioCapturePeerConnections || []) as RTCPeerConnection[];
|
||||||
|
|
|
||||||
|
|
@ -493,6 +493,12 @@ export class BotOrchestrator {
|
||||||
const micToggle = await this._pollForElement([
|
const micToggle = await this._pollForElement([
|
||||||
'input[data-tid="toggle-audio"]',
|
'input[data-tid="toggle-audio"]',
|
||||||
'[data-tid="toggle-audio"]',
|
'[data-tid="toggle-audio"]',
|
||||||
|
'input[data-tid="toggle-mute"]',
|
||||||
|
'[data-tid="toggle-mute"]',
|
||||||
|
'button[id="microphone-button"]',
|
||||||
|
'button[data-inp="microphone-button"]',
|
||||||
|
'button[aria-label*="microphone" i]',
|
||||||
|
'button[aria-label*="Mikrofon" i]',
|
||||||
'input[role="switch"][title*="microphone" i]',
|
'input[role="switch"][title*="microphone" i]',
|
||||||
'input[role="switch"][title*="Mikrofon" i]',
|
'input[role="switch"][title*="Mikrofon" i]',
|
||||||
'input[role="switch"][title*="mic" i]',
|
'input[role="switch"][title*="mic" i]',
|
||||||
|
|
@ -501,14 +507,29 @@ export class BotOrchestrator {
|
||||||
|
|
||||||
if (!micToggle) return;
|
if (!micToggle) return;
|
||||||
|
|
||||||
const state = await micToggle.evaluate((el: HTMLInputElement) => ({
|
const state = await micToggle.evaluate((el: HTMLElement) => {
|
||||||
checked: el.checked,
|
const input = el as HTMLInputElement;
|
||||||
dataCid: el.getAttribute('data-cid') || '',
|
const label = (el.getAttribute('aria-label') || el.getAttribute('title') || '').toLowerCase();
|
||||||
title: el.getAttribute('title') || '',
|
const isInput = el.tagName === 'INPUT';
|
||||||
}));
|
const checked = isInput ? input.checked : undefined;
|
||||||
this._logger.info(`Mic state: checked=${state.checked}, data-cid="${state.dataCid}", title="${state.title}"`);
|
const looksMuted = !isInput && (
|
||||||
|
label.includes('unmute') || label.includes('einschalten') ||
|
||||||
|
label.includes('turn on') || label.includes('turn microphone on')
|
||||||
|
);
|
||||||
|
return {
|
||||||
|
checked,
|
||||||
|
dataCid: el.getAttribute('data-cid') || '',
|
||||||
|
title: el.getAttribute('title') || '',
|
||||||
|
tagName: el.tagName,
|
||||||
|
looksMuted,
|
||||||
|
};
|
||||||
|
});
|
||||||
|
this._logger.info(`Mic state: checked=${state.checked}, data-cid="${state.dataCid}", tag=${state.tagName}, looksMuted=${state.looksMuted}`);
|
||||||
|
|
||||||
if (!state.checked) {
|
const needsClick = state.tagName === 'INPUT'
|
||||||
|
? !state.checked
|
||||||
|
: (state.looksMuted === true);
|
||||||
|
if (needsClick) {
|
||||||
await micToggle.click();
|
await micToggle.click();
|
||||||
this._logger.info('Mic toggled ON');
|
this._logger.info('Mic toggled ON');
|
||||||
} else {
|
} else {
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue