AudioCapture: add pre-roll buffer for speech onset, fix diagnostic counter

Made-with: Cursor
This commit is contained in:
ValueOn AG 2026-02-27 22:14:58 +01:00
parent 681744292d
commit cd22b1da9f

View file

@ -126,23 +126,30 @@ export class AudioCaptureProcedure {
const minRmsThreshold = 0.0015;
const samplesPerChunk = nativeRate * 2;
const targetRate = 16000;
// Pre-roll: keep last 500ms of discarded silent chunks so that
// speech onsets at the tail of a silent window are preserved.
const preRollSamples = Math.ceil(nativeRate * 0.5);
processor.onaudioprocess = (e: AudioProcessingEvent) => {
const input = e.inputBuffer.getChannelData(0);
callbackCount++;
// #region agent log
// Count non-zero samples on EVERY callback for accurate diagnostics
let nonZeroThisCallback = 0;
for (let i = 0; i < input.length; i++) {
if (input[i] !== 0) nonZeroThisCallback++;
}
totalNonZeroSamples += nonZeroThisCallback;
if (callbackCount <= 3 || callbackCount % 50 === 0) {
let nonZero = 0;
let maxAbs = 0;
for (let i = 0; i < input.length; i++) {
if (input[i] !== 0) nonZero++;
const abs = Math.abs(input[i]);
if (abs > maxAbs) maxAbs = abs;
}
totalNonZeroSamples += nonZero;
console.log(
`[AudioCapture][DIAG] onaudioprocess #${callbackCount}: bufLen=${input.length}, nonZero=${nonZero}/${input.length}, maxAbs=${maxAbs.toFixed(8)}, track.enabled=${event.track.enabled}, track.muted=${event.track.muted}, track.readyState=${event.track.readyState}, ctx.state=${ctx.state}, totalNonZero=${totalNonZeroSamples}`
`[AudioCapture][DIAG] onaudioprocess #${callbackCount}: bufLen=${input.length}, nonZero=${nonZeroThisCallback}/${input.length}, maxAbs=${maxAbs.toFixed(8)}, track.enabled=${event.track.enabled}, track.muted=${event.track.muted}, track.readyState=${event.track.readyState}, ctx.state=${ctx.state}, totalNonZero=${totalNonZeroSamples}`
);
}
// #endregion
@ -171,8 +178,13 @@ export class AudioCaptureProcedure {
`[AudioCapture] silent chunk skipped: track=${trackId}, readyState=${event.track.readyState}, muted=${event.track.muted}, enabled=${event.track.enabled}, rms=${rms.toFixed(6)}, callbacks=${callbackCount}, totalNonZero=${totalNonZeroSamples}`
);
}
chunkBuffer = [];
samplesCollected = 0;
// Pre-roll: retain the tail of the silent chunk so the next
// voiced chunk includes the speech onset that may have started
// in the last few hundred ms of this window.
const keep = Math.min(preRollSamples, merged.length);
const preRoll = merged.slice(merged.length - keep);
chunkBuffer = [preRoll];
samplesCollected = keep;
return;
}