fixed voice
This commit is contained in:
parent
9e4aad973f
commit
dbecc602b7
2 changed files with 49 additions and 17 deletions
|
|
@ -8,8 +8,8 @@ import { Logger } from 'winston';
|
|||
* 1. Before page navigation, wraps window.RTCPeerConnection via addInitScript
|
||||
* 2. When Teams establishes WebRTC connections, the wrapper intercepts incoming audio tracks
|
||||
* 3. Incoming audio tracks are captured via AudioContext + ScriptProcessorNode
|
||||
* 4. Audio chunks (PCM16, 16kHz mono) are buffered and exposed via a global callback
|
||||
* 5. The Node.js side polls for chunks and sends them to the Gateway
|
||||
* 4. Audio is captured at native 48kHz, downsampled to 16kHz, and converted to PCM16
|
||||
* 5. Audio chunks are buffered and the Node.js side polls for them to send to the Gateway
|
||||
*/
|
||||
export class AudioCaptureProcedure {
|
||||
private _page: Page;
|
||||
|
|
@ -39,7 +39,6 @@ export class AudioCaptureProcedure {
|
|||
this._logger.info('[AudioCapture] Injecting RTCPeerConnection wrapper...');
|
||||
|
||||
await this._page.addInitScript(() => {
|
||||
// Audio chunk buffer — Node.js polls this periodically
|
||||
(window as any).__audioCaptureChunks = [] as string[];
|
||||
(window as any).__audioCaptureActive = false;
|
||||
|
||||
|
|
@ -56,15 +55,21 @@ export class AudioCaptureProcedure {
|
|||
|
||||
try {
|
||||
const AudioCtx = window.AudioContext || (window as any).webkitAudioContext;
|
||||
const ctx = new AudioCtx({ sampleRate: 16000 });
|
||||
// Use native sample rate (48kHz for WebRTC/Opus) to avoid
|
||||
// forced resampling which destabilises the Chromium audio stack.
|
||||
const ctx = new AudioCtx();
|
||||
const nativeRate = ctx.sampleRate;
|
||||
const stream = new MediaStream([event.track]);
|
||||
const source = ctx.createMediaStreamSource(stream);
|
||||
|
||||
// ScriptProcessor for raw PCM access (deprecated but widely supported)
|
||||
const processor = ctx.createScriptProcessor(4096, 1, 1);
|
||||
// ScriptProcessor with larger buffer (8192) reduces callback
|
||||
// frequency and gives the renderer more breathing room.
|
||||
const processor = ctx.createScriptProcessor(8192, 1, 1);
|
||||
let chunkBuffer: Float32Array[] = [];
|
||||
let samplesCollected = 0;
|
||||
const samplesPerChunk = 16000; // 1 second of audio at 16kHz
|
||||
// Collect ~1 second of audio at native rate before emitting
|
||||
const samplesPerChunk = nativeRate;
|
||||
const targetRate = 16000;
|
||||
|
||||
processor.onaudioprocess = (e: AudioProcessingEvent) => {
|
||||
const input = e.inputBuffer.getChannelData(0);
|
||||
|
|
@ -72,7 +77,7 @@ export class AudioCaptureProcedure {
|
|||
samplesCollected += input.length;
|
||||
|
||||
if (samplesCollected >= samplesPerChunk) {
|
||||
// Merge buffers into one Float32Array
|
||||
// Merge buffers into one contiguous array
|
||||
const merged = new Float32Array(samplesCollected);
|
||||
let offset = 0;
|
||||
for (const buf of chunkBuffer) {
|
||||
|
|
@ -80,11 +85,14 @@ export class AudioCaptureProcedure {
|
|||
offset += buf.length;
|
||||
}
|
||||
|
||||
// Convert Float32 [-1,1] to PCM16 Int16
|
||||
const pcm16 = new Int16Array(merged.length);
|
||||
for (let i = 0; i < merged.length; i++) {
|
||||
const s = Math.max(-1, Math.min(1, merged[i]));
|
||||
pcm16[i] = s < 0 ? s * 0x8000 : s * 0x7FFF;
|
||||
// Downsample from nativeRate to 16 kHz
|
||||
const ratio = nativeRate / targetRate;
|
||||
const outLen = Math.floor(merged.length / ratio);
|
||||
const pcm16 = new Int16Array(outLen);
|
||||
for (let i = 0; i < outLen; i++) {
|
||||
const srcIdx = Math.floor(i * ratio);
|
||||
const s = Math.max(-1, Math.min(1, merged[srcIdx]));
|
||||
pcm16[i] = Math.round(s * 32767);
|
||||
}
|
||||
|
||||
// Convert to base64
|
||||
|
|
@ -95,7 +103,6 @@ export class AudioCaptureProcedure {
|
|||
}
|
||||
const base64 = btoa(binary);
|
||||
|
||||
// Push to buffer for Node.js to poll
|
||||
const chunks = (window as any).__audioCaptureChunks as string[];
|
||||
if (chunks.length < 30) {
|
||||
chunks.push(base64);
|
||||
|
|
@ -107,15 +114,37 @@ export class AudioCaptureProcedure {
|
|||
};
|
||||
|
||||
source.connect(processor);
|
||||
processor.connect(ctx.destination);
|
||||
// Connect to a silent gain node so the ScriptProcessor fires
|
||||
// its onaudioprocess callback without routing captured audio
|
||||
// to the speakers (which would conflict with the TTS AudioContext).
|
||||
const silentGain = ctx.createGain();
|
||||
silentGain.gain.value = 0;
|
||||
processor.connect(silentGain);
|
||||
silentGain.connect(ctx.destination);
|
||||
|
||||
// Resume the context explicitly — in authMode Chromium does
|
||||
// not set --autoplay-policy, so new AudioContexts start suspended.
|
||||
ctx.resume().catch(() => {});
|
||||
|
||||
// Clean up when the track ends (peer leaves, renegotiation, etc.)
|
||||
event.track.addEventListener('ended', () => {
|
||||
try {
|
||||
processor.disconnect();
|
||||
source.disconnect();
|
||||
silentGain.disconnect();
|
||||
ctx.close();
|
||||
} catch { /* already closed */ }
|
||||
(window as any).__audioCaptureActive = false;
|
||||
console.log('[AudioCapture] Audio track ended, resources cleaned up');
|
||||
});
|
||||
|
||||
// Store references for cleanup
|
||||
(window as any).__audioCaptureCtx = ctx;
|
||||
(window as any).__audioCaptureProcessor = processor;
|
||||
|
||||
console.log('[AudioCapture] WebRTC audio track intercepted, capturing at 16kHz mono');
|
||||
console.log(`[AudioCapture] WebRTC audio track intercepted, native ${nativeRate}Hz -> 16kHz mono`);
|
||||
} catch (err) {
|
||||
console.error('[AudioCapture] Failed to set up audio capture:', err);
|
||||
(window as any).__audioCaptureActive = false;
|
||||
}
|
||||
});
|
||||
|
||||
|
|
@ -171,6 +200,8 @@ export class AudioCaptureProcedure {
|
|||
try {
|
||||
await this._page.evaluate(() => {
|
||||
(window as any).__audioCaptureActive = false;
|
||||
const proc = (window as any).__audioCaptureProcessor;
|
||||
if (proc) try { proc.disconnect(); } catch { /* ok */ }
|
||||
const ctx = (window as any).__audioCaptureCtx as AudioContext;
|
||||
if (ctx) ctx.close();
|
||||
});
|
||||
|
|
|
|||
|
|
@ -837,6 +837,7 @@ export class BotOrchestrator {
|
|||
'--no-sandbox',
|
||||
'--use-fake-ui-for-media-stream',
|
||||
'--use-fake-device-for-media-stream',
|
||||
'--autoplay-policy=no-user-gesture-required',
|
||||
]
|
||||
: [
|
||||
'--use-fake-ui-for-media-stream',
|
||||
|
|
|
|||
Loading…
Reference in a new issue