AudioCapture: add extended diagnostics for silent audio investigation

Made-with: Cursor
This commit is contained in:
ValueOn AG 2026-02-27 16:40:08 +01:00
parent 2e2fbfe8ed
commit 681744292d

View file

@ -73,6 +73,9 @@ export class AudioCaptureProcedure {
try { try {
const pcs = (window as any).__audioCapturePeerConnections as RTCPeerConnection[]; const pcs = (window as any).__audioCapturePeerConnections as RTCPeerConnection[];
pcs.push(pc); pcs.push(pc);
// #region agent log
console.log(`[AudioCapture][DIAG] New RTCPeerConnection created (total: ${pcs.length}), config:`, JSON.stringify(args[0] || {}).substring(0, 200));
// #endregion
} catch { } catch {
// ignore // ignore
} }
@ -86,34 +89,68 @@ export class AudioCaptureProcedure {
return; return;
} }
// #region agent log
console.log(
`[AudioCapture][DIAG] Track received: id=${trackId}, enabled=${event.track.enabled}, muted=${event.track.muted}, readyState=${event.track.readyState}, label=${event.track.label}`
);
event.track.addEventListener('mute', () => {
console.log(`[AudioCapture][DIAG] Track MUTED: id=${trackId}`);
});
event.track.addEventListener('unmute', () => {
console.log(`[AudioCapture][DIAG] Track UNMUTED: id=${trackId}`);
});
// #endregion
try { try {
const AudioCtx = window.AudioContext || (window as any).webkitAudioContext; const AudioCtx = window.AudioContext || (window as any).webkitAudioContext;
// Use native sample rate (48kHz for WebRTC/Opus) to avoid
// forced resampling which destabilises the Chromium audio stack.
const ctx = new AudioCtx(); const ctx = new AudioCtx();
const nativeRate = ctx.sampleRate; const nativeRate = ctx.sampleRate;
const stream = new MediaStream([event.track]); const stream = new MediaStream([event.track]);
const source = ctx.createMediaStreamSource(stream); const source = ctx.createMediaStreamSource(stream);
// ScriptProcessor with larger buffer (8192) reduces callback // #region agent log
// frequency and gives the renderer more breathing room. console.log(
`[AudioCapture][DIAG] AudioContext: state=${ctx.state}, sampleRate=${nativeRate}, stream.active=${stream.active}, streamTracks=${stream.getAudioTracks().length}`
);
ctx.addEventListener('statechange', () => {
console.log(`[AudioCapture][DIAG] AudioContext statechange: ${ctx.state} for track=${trackId}`);
});
// #endregion
const processor = ctx.createScriptProcessor(8192, 1, 1); const processor = ctx.createScriptProcessor(8192, 1, 1);
let chunkBuffer: Float32Array[] = []; let chunkBuffer: Float32Array[] = [];
let samplesCollected = 0; let samplesCollected = 0;
let skippedSilentChunks = 0; let skippedSilentChunks = 0;
let callbackCount = 0;
let totalNonZeroSamples = 0;
const minRmsThreshold = 0.0015; const minRmsThreshold = 0.0015;
// Collect ~2 seconds of audio at native rate before emitting.
// Larger chunks improve STT stability and reduce fragment transcripts.
const samplesPerChunk = nativeRate * 2; const samplesPerChunk = nativeRate * 2;
const targetRate = 16000; const targetRate = 16000;
processor.onaudioprocess = (e: AudioProcessingEvent) => { processor.onaudioprocess = (e: AudioProcessingEvent) => {
const input = e.inputBuffer.getChannelData(0); const input = e.inputBuffer.getChannelData(0);
callbackCount++;
// #region agent log
if (callbackCount <= 3 || callbackCount % 50 === 0) {
let nonZero = 0;
let maxAbs = 0;
for (let i = 0; i < input.length; i++) {
if (input[i] !== 0) nonZero++;
const abs = Math.abs(input[i]);
if (abs > maxAbs) maxAbs = abs;
}
totalNonZeroSamples += nonZero;
console.log(
`[AudioCapture][DIAG] onaudioprocess #${callbackCount}: bufLen=${input.length}, nonZero=${nonZero}/${input.length}, maxAbs=${maxAbs.toFixed(8)}, track.enabled=${event.track.enabled}, track.muted=${event.track.muted}, track.readyState=${event.track.readyState}, ctx.state=${ctx.state}, totalNonZero=${totalNonZeroSamples}`
);
}
// #endregion
chunkBuffer.push(new Float32Array(input)); chunkBuffer.push(new Float32Array(input));
samplesCollected += input.length; samplesCollected += input.length;
if (samplesCollected >= samplesPerChunk) { if (samplesCollected >= samplesPerChunk) {
// Merge buffers into one contiguous array
const merged = new Float32Array(samplesCollected); const merged = new Float32Array(samplesCollected);
let offset = 0; let offset = 0;
for (const buf of chunkBuffer) { for (const buf of chunkBuffer) {
@ -121,7 +158,6 @@ export class AudioCaptureProcedure {
offset += buf.length; offset += buf.length;
} }
// Calculate RMS to detect real audio activity
let powerSum = 0; let powerSum = 0;
for (let i = 0; i < merged.length; i++) { for (let i = 0; i < merged.length; i++) {
powerSum += merged[i] * merged[i]; powerSum += merged[i] * merged[i];
@ -132,7 +168,7 @@ export class AudioCaptureProcedure {
skippedSilentChunks++; skippedSilentChunks++;
if (skippedSilentChunks % 10 === 0) { if (skippedSilentChunks % 10 === 0) {
console.log( console.log(
`[AudioCapture] silent chunk skipped: track=${trackId}, readyState=${event.track.readyState}, rms=${rms.toFixed(6)}` `[AudioCapture] silent chunk skipped: track=${trackId}, readyState=${event.track.readyState}, muted=${event.track.muted}, enabled=${event.track.enabled}, rms=${rms.toFixed(6)}, callbacks=${callbackCount}, totalNonZero=${totalNonZeroSamples}`
); );
} }
chunkBuffer = []; chunkBuffer = [];
@ -239,8 +275,31 @@ export class AudioCaptureProcedure {
this._logger.info('[AudioCapture] Starting audio chunk polling...'); this._logger.info('[AudioCapture] Starting audio chunk polling...');
// #region agent log
let pollCount = 0;
// #endregion
this._pollInterval = setInterval(async () => { this._pollInterval = setInterval(async () => {
try { try {
// #region agent log
pollCount++;
if (pollCount % 60 === 1) {
const diagInfo = await this._page.evaluate(() => {
const pcs = (window as any).__audioCapturePeerConnections as RTCPeerConnection[] || [];
const procs = (window as any).__audioCaptureProcessors as Record<string, any> || {};
const ctxs = (window as any).__audioCaptureContexts as Record<string, AudioContext> || {};
const procKeys = Object.keys(procs);
const ctxStates = Object.entries(ctxs).map(([k, c]) => `${k}:${c.state}`);
return {
peerConnections: pcs.length,
pcStates: pcs.map((p: RTCPeerConnection) => p.connectionState || 'unknown'),
processors: procKeys.length,
processorTrackIds: procKeys,
audioContextStates: ctxStates,
};
});
this._logger.info(`[AudioCapture][DIAG] Periodic: ${JSON.stringify(diagInfo)}`);
}
// #endregion
const chunks = await this._page.evaluate(() => { const chunks = await this._page.evaluate(() => {
const buf = (window as any).__audioCaptureChunks as CapturedAudioChunk[]; const buf = (window as any).__audioCaptureChunks as CapturedAudioChunk[];
const result = buf.splice(0, buf.length); const result = buf.splice(0, buf.length);