diff --git a/src/bot/audioProcedure.ts b/src/bot/audioProcedure.ts index 37a900c..0b9ce6c 100644 --- a/src/bot/audioProcedure.ts +++ b/src/bot/audioProcedure.ts @@ -92,6 +92,41 @@ export class AudioProcedure { this._logger.info('Audio getUserMedia override injected'); } + /** + * Find the frame whose MediaStreamDestination track is used by the RTCPeerConnection. + * Teams meeting often runs in an iframe; page.evaluate runs in main frame, so we'd + * play into the wrong streamDest. Returns the frame to use, or null for main page. + */ + private async _getTtsFrame(): Promise<{ evaluate: typeof this._page.evaluate } | null> { + const frames = this._page.frames(); + for (const frame of frames) { + try { + const match = await frame.evaluate(() => { + const pcs = ((window as any).__audioCapturePeerConnections || []) as RTCPeerConnection[]; + const streamDest = (window as any).__ttsStreamDest as MediaStreamAudioDestinationNode; + if (!pcs.length || !streamDest) return false; + const ttsTrackId = streamDest.stream.getAudioTracks()[0]?.id; + if (!ttsTrackId) return false; + for (const pc of pcs) { + const senders = pc.getSenders?.() || []; + for (const s of senders) { + if (s?.track?.kind === 'audio' && s.track.id === ttsTrackId) return true; + } + } + return false; + }); + if (match) { + this._logger.info(`[Voice] Using frame for TTS (track match): ${frame.url().substring(0, 80)}`); + return frame; + } + } catch { + // Frame may be detached + } + } + this._logger.warn('[Voice] No frame with matching TTS track; using main page (voice may not reach participants)'); + return null; + } + /** * Initialize the audio context in the browser for TTS playback. * Must be called after joining the meeting (user gesture context). @@ -198,6 +233,8 @@ export class AudioProcedure { /** * Internal: Play audio in the browser (single clip, no queuing). * Audio is piped into the MediaStreamDestination that Teams uses as mic input. + * Teams meeting may run in an iframe; we must play in the frame that has the + * RTCPeerConnection (otherwise trackMatch=false, voice does not reach participants). */ private async _playAudioInternal(audioData: string, format: 'mp3' | 'wav' | 'pcm'): Promise { if (!this._audioContext) { @@ -206,8 +243,11 @@ export class AudioProcedure { this._logger.info(`Playing audio (format: ${format}, size: ${audioData.length} bytes base64)`); + const targetFrame = await this._getTtsFrame(); + const evalTarget = targetFrame || this._page; + try { - const playbackDiag = await this._page.evaluate(async ({ audioData, format }) => { + const playbackDiag = await evalTarget.evaluate(async ({ audioData, format }) => { const ctx = (window as any).__ttsAudioContext as AudioContext; const streamDest = (window as any).__ttsStreamDest as MediaStreamAudioDestinationNode; const pcs = ((window as any).__audioCapturePeerConnections || []) as RTCPeerConnection[]; diff --git a/src/bot/orchestrator.ts b/src/bot/orchestrator.ts index 3b44f20..39bca99 100644 --- a/src/bot/orchestrator.ts +++ b/src/bot/orchestrator.ts @@ -493,6 +493,12 @@ export class BotOrchestrator { const micToggle = await this._pollForElement([ 'input[data-tid="toggle-audio"]', '[data-tid="toggle-audio"]', + 'input[data-tid="toggle-mute"]', + '[data-tid="toggle-mute"]', + 'button[id="microphone-button"]', + 'button[data-inp="microphone-button"]', + 'button[aria-label*="microphone" i]', + 'button[aria-label*="Mikrofon" i]', 'input[role="switch"][title*="microphone" i]', 'input[role="switch"][title*="Mikrofon" i]', 'input[role="switch"][title*="mic" i]', @@ -501,14 +507,29 @@ export class BotOrchestrator { if (!micToggle) return; - const state = await micToggle.evaluate((el: HTMLInputElement) => ({ - checked: el.checked, - dataCid: el.getAttribute('data-cid') || '', - title: el.getAttribute('title') || '', - })); - this._logger.info(`Mic state: checked=${state.checked}, data-cid="${state.dataCid}", title="${state.title}"`); + const state = await micToggle.evaluate((el: HTMLElement) => { + const input = el as HTMLInputElement; + const label = (el.getAttribute('aria-label') || el.getAttribute('title') || '').toLowerCase(); + const isInput = el.tagName === 'INPUT'; + const checked = isInput ? input.checked : undefined; + const looksMuted = !isInput && ( + label.includes('unmute') || label.includes('einschalten') || + label.includes('turn on') || label.includes('turn microphone on') + ); + return { + checked, + dataCid: el.getAttribute('data-cid') || '', + title: el.getAttribute('title') || '', + tagName: el.tagName, + looksMuted, + }; + }); + this._logger.info(`Mic state: checked=${state.checked}, data-cid="${state.dataCid}", tag=${state.tagName}, looksMuted=${state.looksMuted}`); - if (!state.checked) { + const needsClick = state.tagName === 'INPUT' + ? !state.checked + : (state.looksMuted === true); + if (needsClick) { await micToggle.click(); this._logger.info('Mic toggled ON'); } else {