From e6d0cfdad2206fb35e5710d330c7ef78333c1928 Mon Sep 17 00:00:00 2001
From: ValueOn AG <p.motsch@valueon.ch>
Date: Fri, 27 Feb 2026 08:01:55 +0100
Subject: [PATCH] Add robust WebRTC sender fallback for TTS audio injection.

Track active RTCPeerConnections and force-replace outgoing audio sender tracks with the TTS track before playback to handle flows where getUserMedia override is bypassed.

Made-with: Cursor
---
 src/bot/audioProcedure.ts | 51 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)

diff --git a/src/bot/audioProcedure.ts b/src/bot/audioProcedure.ts
index 95d4565..901f23c 100644
--- a/src/bot/audioProcedure.ts
+++ b/src/bot/audioProcedure.ts
@@ -51,6 +51,7 @@ export class AudioProcedure {
       (window as any).__ttsAudioContext = ctx;
       (window as any).__ttsStreamDest = streamDest;
       (window as any).__ttsAudioStream = streamDest.stream;
+      (window as any).__ttsPeerConnections = [] as RTCPeerConnection[];
 
       // Wrap getUserMedia to replace audio tracks with our TTS-injectable stream
       const originalGetUserMedia = navigator.mediaDevices.getUserMedia.bind(navigator.mediaDevices);
@@ -85,6 +86,45 @@ export class AudioProcedure {
         // No audio requested - return the real stream as-is
         return realStream;
       };
+
+      // Track peer connections for a robust fallback injection path.
+      const OriginalRTCPeerConnection = window.RTCPeerConnection;
+      // @ts-ignore constructor wrapper
+      window.RTCPeerConnection = function (this: RTCPeerConnection, ...args: any[]) {
+        const pc = new OriginalRTCPeerConnection(...args);
+        try {
+          const pcs = (window as any).__ttsPeerConnections as RTCPeerConnection[];
+          pcs.push(pc);
+        } catch {
+          // ignore
+        }
+        return pc;
+      } as any;
+      window.RTCPeerConnection.prototype = OriginalRTCPeerConnection.prototype;
+      Object.setPrototypeOf(window.RTCPeerConnection, OriginalRTCPeerConnection);
+
+      // Helper to force outgoing audio sender track to the TTS stream track.
+      (window as any).__forceTtsTrackToSenders = async () => {
+        const pcs = (window as any).__ttsPeerConnections as RTCPeerConnection[];
+        const ttsTrack = streamDest.stream.getAudioTracks()?.[0];
+        if (!ttsTrack) return { replaced: 0, pcs: pcs?.length || 0, reason: 'no-tts-track' };
+
+        let replaced = 0;
+        for (const pc of pcs || []) {
+          try {
+            const senders = pc.getSenders?.() || [];
+            for (const sender of senders) {
+              if (sender?.track?.kind === 'audio') {
+                await sender.replaceTrack(ttsTrack);
+                replaced++;
+              }
+            }
+          } catch {
+            // ignore per peer connection
+          }
+        }
+        return { replaced, pcs: pcs?.length || 0, reason: 'ok' };
+      };
     });
 
     this._initScriptInjected = true;
@@ -206,6 +246,17 @@ export class AudioProcedure {
     this._logger.info(`Playing audio (format: ${format}, size: ${audioData.length} bytes base64)`);
 
     try {
+      const senderInjectInfo = await this._page.evaluate(async () => {
+        const forceFn = (window as any).__forceTtsTrackToSenders;
+        if (typeof forceFn === 'function') {
+          return await forceFn();
+        }
+        return { replaced: 0, pcs: 0, reason: 'force-function-missing' };
+      });
+      this._logger.info(
+        `TTS sender injection: replaced=${senderInjectInfo?.replaced ?? 0}, pcs=${senderInjectInfo?.pcs ?? 0}, reason=${senderInjectInfo?.reason || 'n/a'}`
+      );
+
       await this._page.evaluate(async ({ audioData, format }) => {
         const ctx = (window as any).__ttsAudioContext as AudioContext;
         const streamDest = (window as any).__ttsStreamDest as MediaStreamAudioDestinationNode;