From 681744292d541ff5143442389417e177afda8aa3 Mon Sep 17 00:00:00 2001
From: ValueOn AG
Date: Fri, 27 Feb 2026 16:40:08 +0100
Subject: [PATCH] AudioCapture: add extended diagnostics for silent audio
investigation
Made-with: Cursor
---
src/bot/audioCaptureProcedure.ts | 77 ++++++++++++++++++++++++++++----
1 file changed, 68 insertions(+), 9 deletions(-)
diff --git a/src/bot/audioCaptureProcedure.ts b/src/bot/audioCaptureProcedure.ts
index 819b6b5..95248b9 100644
--- a/src/bot/audioCaptureProcedure.ts
+++ b/src/bot/audioCaptureProcedure.ts
@@ -73,6 +73,9 @@ export class AudioCaptureProcedure {
try {
const pcs = (window as any).__audioCapturePeerConnections as RTCPeerConnection[];
pcs.push(pc);
+ // #region agent log
+ console.log(`[AudioCapture][DIAG] New RTCPeerConnection created (total: ${pcs.length}), config:`, JSON.stringify(args[0] || {}).substring(0, 200));
+ // #endregion
} catch {
// ignore
}
@@ -86,34 +89,68 @@ export class AudioCaptureProcedure {
return;
}
+ // #region agent log
+ console.log(
+ `[AudioCapture][DIAG] Track received: id=${trackId}, enabled=${event.track.enabled}, muted=${event.track.muted}, readyState=${event.track.readyState}, label=${event.track.label}`
+ );
+ event.track.addEventListener('mute', () => {
+ console.log(`[AudioCapture][DIAG] Track MUTED: id=${trackId}`);
+ });
+ event.track.addEventListener('unmute', () => {
+ console.log(`[AudioCapture][DIAG] Track UNMUTED: id=${trackId}`);
+ });
+ // #endregion
+
try {
const AudioCtx = window.AudioContext || (window as any).webkitAudioContext;
- // Use native sample rate (48kHz for WebRTC/Opus) to avoid
- // forced resampling which destabilises the Chromium audio stack.
const ctx = new AudioCtx();
const nativeRate = ctx.sampleRate;
const stream = new MediaStream([event.track]);
const source = ctx.createMediaStreamSource(stream);
- // ScriptProcessor with larger buffer (8192) reduces callback
- // frequency and gives the renderer more breathing room.
+ // #region agent log
+ console.log(
+ `[AudioCapture][DIAG] AudioContext: state=${ctx.state}, sampleRate=${nativeRate}, stream.active=${stream.active}, streamTracks=${stream.getAudioTracks().length}`
+ );
+ ctx.addEventListener('statechange', () => {
+ console.log(`[AudioCapture][DIAG] AudioContext statechange: ${ctx.state} for track=${trackId}`);
+ });
+ // #endregion
+
const processor = ctx.createScriptProcessor(8192, 1, 1);
let chunkBuffer: Float32Array[] = [];
let samplesCollected = 0;
let skippedSilentChunks = 0;
+ let callbackCount = 0;
+ let totalNonZeroSamples = 0;
const minRmsThreshold = 0.0015;
- // Collect ~2 seconds of audio at native rate before emitting.
- // Larger chunks improve STT stability and reduce fragment transcripts.
const samplesPerChunk = nativeRate * 2;
const targetRate = 16000;
processor.onaudioprocess = (e: AudioProcessingEvent) => {
const input = e.inputBuffer.getChannelData(0);
+ callbackCount++;
+
+ // #region agent log
+ if (callbackCount <= 3 || callbackCount % 50 === 0) {
+ let nonZero = 0;
+ let maxAbs = 0;
+ for (let i = 0; i < input.length; i++) {
+ if (input[i] !== 0) nonZero++;
+ const abs = Math.abs(input[i]);
+ if (abs > maxAbs) maxAbs = abs;
+ }
+ totalNonZeroSamples += nonZero;
+ console.log(
+ `[AudioCapture][DIAG] onaudioprocess #${callbackCount}: bufLen=${input.length}, nonZero=${nonZero}/${input.length}, maxAbs=${maxAbs.toFixed(8)}, track.enabled=${event.track.enabled}, track.muted=${event.track.muted}, track.readyState=${event.track.readyState}, ctx.state=${ctx.state}, totalNonZero=${totalNonZeroSamples}`
+ );
+ }
+ // #endregion
+
chunkBuffer.push(new Float32Array(input));
samplesCollected += input.length;
if (samplesCollected >= samplesPerChunk) {
- // Merge buffers into one contiguous array
const merged = new Float32Array(samplesCollected);
let offset = 0;
for (const buf of chunkBuffer) {
@@ -121,7 +158,6 @@ export class AudioCaptureProcedure {
offset += buf.length;
}
- // Calculate RMS to detect real audio activity
let powerSum = 0;
for (let i = 0; i < merged.length; i++) {
powerSum += merged[i] * merged[i];
@@ -132,7 +168,7 @@ export class AudioCaptureProcedure {
skippedSilentChunks++;
if (skippedSilentChunks % 10 === 0) {
console.log(
- `[AudioCapture] silent chunk skipped: track=${trackId}, readyState=${event.track.readyState}, rms=${rms.toFixed(6)}`
+ `[AudioCapture] silent chunk skipped: track=${trackId}, readyState=${event.track.readyState}, muted=${event.track.muted}, enabled=${event.track.enabled}, rms=${rms.toFixed(6)}, callbacks=${callbackCount}, totalNonZero=${totalNonZeroSamples}`
);
}
chunkBuffer = [];
@@ -239,8 +275,31 @@ export class AudioCaptureProcedure {
this._logger.info('[AudioCapture] Starting audio chunk polling...');
+ // #region agent log
+ let pollCount = 0;
+ // #endregion
this._pollInterval = setInterval(async () => {
try {
+ // #region agent log
+ pollCount++;
+ if (pollCount % 60 === 1) {
+ const diagInfo = await this._page.evaluate(() => {
+ const pcs = (window as any).__audioCapturePeerConnections as RTCPeerConnection[] || [];
+ const procs = (window as any).__audioCaptureProcessors as Record || {};
+ const ctxs = (window as any).__audioCaptureContexts as Record || {};
+ const procKeys = Object.keys(procs);
+ const ctxStates = Object.entries(ctxs).map(([k, c]) => `${k}:${c.state}`);
+ return {
+ peerConnections: pcs.length,
+ pcStates: pcs.map((p: RTCPeerConnection) => p.connectionState || 'unknown'),
+ processors: procKeys.length,
+ processorTrackIds: procKeys,
+ audioContextStates: ctxStates,
+ };
+ });
+ this._logger.info(`[AudioCapture][DIAG] Periodic: ${JSON.stringify(diagInfo)}`);
+ }
+ // #endregion
const chunks = await this._page.evaluate(() => {
const buf = (window as any).__audioCaptureChunks as CapturedAudioChunk[];
const result = buf.splice(0, buf.length);