diff --git a/src/bot/audioProcedure.ts b/src/bot/audioProcedure.ts index 95d4565..901f23c 100644 --- a/src/bot/audioProcedure.ts +++ b/src/bot/audioProcedure.ts @@ -51,6 +51,7 @@ export class AudioProcedure { (window as any).__ttsAudioContext = ctx; (window as any).__ttsStreamDest = streamDest; (window as any).__ttsAudioStream = streamDest.stream; + (window as any).__ttsPeerConnections = [] as RTCPeerConnection[]; // Wrap getUserMedia to replace audio tracks with our TTS-injectable stream const originalGetUserMedia = navigator.mediaDevices.getUserMedia.bind(navigator.mediaDevices); @@ -85,6 +86,45 @@ export class AudioProcedure { // No audio requested - return the real stream as-is return realStream; }; + + // Track peer connections for a robust fallback injection path. + const OriginalRTCPeerConnection = window.RTCPeerConnection; + // @ts-ignore constructor wrapper + window.RTCPeerConnection = function (this: RTCPeerConnection, ...args: any[]) { + const pc = new OriginalRTCPeerConnection(...args); + try { + const pcs = (window as any).__ttsPeerConnections as RTCPeerConnection[]; + pcs.push(pc); + } catch { + // ignore + } + return pc; + } as any; + window.RTCPeerConnection.prototype = OriginalRTCPeerConnection.prototype; + Object.setPrototypeOf(window.RTCPeerConnection, OriginalRTCPeerConnection); + + // Helper to force outgoing audio sender track to the TTS stream track. + (window as any).__forceTtsTrackToSenders = async () => { + const pcs = (window as any).__ttsPeerConnections as RTCPeerConnection[]; + const ttsTrack = streamDest.stream.getAudioTracks()?.[0]; + if (!ttsTrack) return { replaced: 0, pcs: pcs?.length || 0, reason: 'no-tts-track' }; + + let replaced = 0; + for (const pc of pcs || []) { + try { + const senders = pc.getSenders?.() || []; + for (const sender of senders) { + if (sender?.track?.kind === 'audio') { + await sender.replaceTrack(ttsTrack); + replaced++; + } + } + } catch { + // ignore per peer connection + } + } + return { replaced, pcs: pcs?.length || 0, reason: 'ok' }; + }; }); this._initScriptInjected = true; @@ -206,6 +246,17 @@ export class AudioProcedure { this._logger.info(`Playing audio (format: ${format}, size: ${audioData.length} bytes base64)`); try { + const senderInjectInfo = await this._page.evaluate(async () => { + const forceFn = (window as any).__forceTtsTrackToSenders; + if (typeof forceFn === 'function') { + return await forceFn(); + } + return { replaced: 0, pcs: 0, reason: 'force-function-missing' }; + }); + this._logger.info( + `TTS sender injection: replaced=${senderInjectInfo?.replaced ?? 0}, pcs=${senderInjectInfo?.pcs ?? 0}, reason=${senderInjectInfo?.reason || 'n/a'}` + ); + await this._page.evaluate(async ({ audioData, format }) => { const ctx = (window as any).__ttsAudioContext as AudioContext; const streamDest = (window as any).__ttsStreamDest as MediaStreamAudioDestinationNode;