diff --git a/src/bot/audioCaptureProcedure.ts b/src/bot/audioCaptureProcedure.ts index 95248b9..6ebb365 100644 --- a/src/bot/audioCaptureProcedure.ts +++ b/src/bot/audioCaptureProcedure.ts @@ -126,23 +126,30 @@ export class AudioCaptureProcedure { const minRmsThreshold = 0.0015; const samplesPerChunk = nativeRate * 2; const targetRate = 16000; + // Pre-roll: keep last 500ms of discarded silent chunks so that + // speech onsets at the tail of a silent window are preserved. + const preRollSamples = Math.ceil(nativeRate * 0.5); processor.onaudioprocess = (e: AudioProcessingEvent) => { const input = e.inputBuffer.getChannelData(0); callbackCount++; // #region agent log + // Count non-zero samples on EVERY callback for accurate diagnostics + let nonZeroThisCallback = 0; + for (let i = 0; i < input.length; i++) { + if (input[i] !== 0) nonZeroThisCallback++; + } + totalNonZeroSamples += nonZeroThisCallback; + if (callbackCount <= 3 || callbackCount % 50 === 0) { - let nonZero = 0; let maxAbs = 0; for (let i = 0; i < input.length; i++) { - if (input[i] !== 0) nonZero++; const abs = Math.abs(input[i]); if (abs > maxAbs) maxAbs = abs; } - totalNonZeroSamples += nonZero; console.log( - `[AudioCapture][DIAG] onaudioprocess #${callbackCount}: bufLen=${input.length}, nonZero=${nonZero}/${input.length}, maxAbs=${maxAbs.toFixed(8)}, track.enabled=${event.track.enabled}, track.muted=${event.track.muted}, track.readyState=${event.track.readyState}, ctx.state=${ctx.state}, totalNonZero=${totalNonZeroSamples}` + `[AudioCapture][DIAG] onaudioprocess #${callbackCount}: bufLen=${input.length}, nonZero=${nonZeroThisCallback}/${input.length}, maxAbs=${maxAbs.toFixed(8)}, track.enabled=${event.track.enabled}, track.muted=${event.track.muted}, track.readyState=${event.track.readyState}, ctx.state=${ctx.state}, totalNonZero=${totalNonZeroSamples}` ); } // #endregion @@ -171,8 +178,13 @@ export class AudioCaptureProcedure { `[AudioCapture] silent chunk skipped: track=${trackId}, readyState=${event.track.readyState}, muted=${event.track.muted}, enabled=${event.track.enabled}, rms=${rms.toFixed(6)}, callbacks=${callbackCount}, totalNonZero=${totalNonZeroSamples}` ); } - chunkBuffer = []; - samplesCollected = 0; + // Pre-roll: retain the tail of the silent chunk so the next + // voiced chunk includes the speech onset that may have started + // in the last few hundred ms of this window. + const keep = Math.min(preRollSamples, merged.length); + const preRoll = merged.slice(merged.length - keep); + chunkBuffer = [preRoll]; + samplesCollected = keep; return; }