From e6d0cfdad2206fb35e5710d330c7ef78333c1928 Mon Sep 17 00:00:00 2001
From: ValueOn AG
Date: Fri, 27 Feb 2026 08:01:55 +0100
Subject: [PATCH] Add robust WebRTC sender fallback for TTS audio injection.
Track active RTCPeerConnections and force-replace outgoing audio sender tracks with the TTS track before playback to handle flows where getUserMedia override is bypassed.
Made-with: Cursor
---
src/bot/audioProcedure.ts | 51 +++++++++++++++++++++++++++++++++++++++
1 file changed, 51 insertions(+)
diff --git a/src/bot/audioProcedure.ts b/src/bot/audioProcedure.ts
index 95d4565..901f23c 100644
--- a/src/bot/audioProcedure.ts
+++ b/src/bot/audioProcedure.ts
@@ -51,6 +51,7 @@ export class AudioProcedure {
(window as any).__ttsAudioContext = ctx;
(window as any).__ttsStreamDest = streamDest;
(window as any).__ttsAudioStream = streamDest.stream;
+ (window as any).__ttsPeerConnections = [] as RTCPeerConnection[];
// Wrap getUserMedia to replace audio tracks with our TTS-injectable stream
const originalGetUserMedia = navigator.mediaDevices.getUserMedia.bind(navigator.mediaDevices);
@@ -85,6 +86,45 @@ export class AudioProcedure {
// No audio requested - return the real stream as-is
return realStream;
};
+
+ // Track peer connections for a robust fallback injection path.
+ const OriginalRTCPeerConnection = window.RTCPeerConnection;
+ // @ts-ignore constructor wrapper
+ window.RTCPeerConnection = function (this: RTCPeerConnection, ...args: any[]) {
+ const pc = new OriginalRTCPeerConnection(...args);
+ try {
+ const pcs = (window as any).__ttsPeerConnections as RTCPeerConnection[];
+ pcs.push(pc);
+ } catch {
+ // ignore
+ }
+ return pc;
+ } as any;
+ window.RTCPeerConnection.prototype = OriginalRTCPeerConnection.prototype;
+ Object.setPrototypeOf(window.RTCPeerConnection, OriginalRTCPeerConnection);
+
+ // Helper to force outgoing audio sender track to the TTS stream track.
+ (window as any).__forceTtsTrackToSenders = async () => {
+ const pcs = (window as any).__ttsPeerConnections as RTCPeerConnection[];
+ const ttsTrack = streamDest.stream.getAudioTracks()?.[0];
+ if (!ttsTrack) return { replaced: 0, pcs: pcs?.length || 0, reason: 'no-tts-track' };
+
+ let replaced = 0;
+ for (const pc of pcs || []) {
+ try {
+ const senders = pc.getSenders?.() || [];
+ for (const sender of senders) {
+ if (sender?.track?.kind === 'audio') {
+ await sender.replaceTrack(ttsTrack);
+ replaced++;
+ }
+ }
+ } catch {
+ // ignore per peer connection
+ }
+ }
+ return { replaced, pcs: pcs?.length || 0, reason: 'ok' };
+ };
});
this._initScriptInjected = true;
@@ -206,6 +246,17 @@ export class AudioProcedure {
this._logger.info(`Playing audio (format: ${format}, size: ${audioData.length} bytes base64)`);
try {
+ const senderInjectInfo = await this._page.evaluate(async () => {
+ const forceFn = (window as any).__forceTtsTrackToSenders;
+ if (typeof forceFn === 'function') {
+ return await forceFn();
+ }
+ return { replaced: 0, pcs: 0, reason: 'force-function-missing' };
+ });
+ this._logger.info(
+ `TTS sender injection: replaced=${senderInjectInfo?.replaced ?? 0}, pcs=${senderInjectInfo?.pcs ?? 0}, reason=${senderInjectInfo?.reason || 'n/a'}`
+ );
+
await this._page.evaluate(async ({ audioData, format }) => {
const ctx = (window as any).__ttsAudioContext as AudioContext;
const streamDest = (window as any).__ttsStreamDest as MediaStreamAudioDestinationNode;