Add robust WebRTC sender fallback for TTS audio injection.

Track active RTCPeerConnections and force-replace outgoing audio sender tracks with the TTS track before playback to handle flows where getUserMedia override is bypassed.

Made-with: Cursor
This commit is contained in:
ValueOn AG 2026-02-27 08:01:55 +01:00
parent 3eaebae2a3
commit e6d0cfdad2

View file

@ -51,6 +51,7 @@ export class AudioProcedure {
(window as any).__ttsAudioContext = ctx;
(window as any).__ttsStreamDest = streamDest;
(window as any).__ttsAudioStream = streamDest.stream;
(window as any).__ttsPeerConnections = [] as RTCPeerConnection[];
// Wrap getUserMedia to replace audio tracks with our TTS-injectable stream
const originalGetUserMedia = navigator.mediaDevices.getUserMedia.bind(navigator.mediaDevices);
@ -85,6 +86,45 @@ export class AudioProcedure {
// No audio requested - return the real stream as-is
return realStream;
};
// Track peer connections for a robust fallback injection path.
const OriginalRTCPeerConnection = window.RTCPeerConnection;
// @ts-ignore constructor wrapper
window.RTCPeerConnection = function (this: RTCPeerConnection, ...args: any[]) {
const pc = new OriginalRTCPeerConnection(...args);
try {
const pcs = (window as any).__ttsPeerConnections as RTCPeerConnection[];
pcs.push(pc);
} catch {
// ignore
}
return pc;
} as any;
window.RTCPeerConnection.prototype = OriginalRTCPeerConnection.prototype;
Object.setPrototypeOf(window.RTCPeerConnection, OriginalRTCPeerConnection);
// Helper to force outgoing audio sender track to the TTS stream track.
(window as any).__forceTtsTrackToSenders = async () => {
const pcs = (window as any).__ttsPeerConnections as RTCPeerConnection[];
const ttsTrack = streamDest.stream.getAudioTracks()?.[0];
if (!ttsTrack) return { replaced: 0, pcs: pcs?.length || 0, reason: 'no-tts-track' };
let replaced = 0;
for (const pc of pcs || []) {
try {
const senders = pc.getSenders?.() || [];
for (const sender of senders) {
if (sender?.track?.kind === 'audio') {
await sender.replaceTrack(ttsTrack);
replaced++;
}
}
} catch {
// ignore per peer connection
}
}
return { replaced, pcs: pcs?.length || 0, reason: 'ok' };
};
});
this._initScriptInjected = true;
@ -206,6 +246,17 @@ export class AudioProcedure {
this._logger.info(`Playing audio (format: ${format}, size: ${audioData.length} bytes base64)`);
try {
const senderInjectInfo = await this._page.evaluate(async () => {
const forceFn = (window as any).__forceTtsTrackToSenders;
if (typeof forceFn === 'function') {
return await forceFn();
}
return { replaced: 0, pcs: 0, reason: 'force-function-missing' };
});
this._logger.info(
`TTS sender injection: replaced=${senderInjectInfo?.replaced ?? 0}, pcs=${senderInjectInfo?.pcs ?? 0}, reason=${senderInjectInfo?.reason || 'n/a'}`
);
await this._page.evaluate(async ({ audioData, format }) => {
const ctx = (window as any).__ttsAudioContext as AudioContext;
const streamDest = (window as any).__ttsStreamDest as MediaStreamAudioDestinationNode;