AudioCapture: add extended diagnostics for silent audio investigation
Made-with: Cursor
This commit is contained in:
parent
2e2fbfe8ed
commit
681744292d
1 changed files with 68 additions and 9 deletions
|
|
@ -73,6 +73,9 @@ export class AudioCaptureProcedure {
|
|||
try {
|
||||
const pcs = (window as any).__audioCapturePeerConnections as RTCPeerConnection[];
|
||||
pcs.push(pc);
|
||||
// #region agent log
|
||||
console.log(`[AudioCapture][DIAG] New RTCPeerConnection created (total: ${pcs.length}), config:`, JSON.stringify(args[0] || {}).substring(0, 200));
|
||||
// #endregion
|
||||
} catch {
|
||||
// ignore
|
||||
}
|
||||
|
|
@ -86,34 +89,68 @@ export class AudioCaptureProcedure {
|
|||
return;
|
||||
}
|
||||
|
||||
// #region agent log
|
||||
console.log(
|
||||
`[AudioCapture][DIAG] Track received: id=${trackId}, enabled=${event.track.enabled}, muted=${event.track.muted}, readyState=${event.track.readyState}, label=${event.track.label}`
|
||||
);
|
||||
event.track.addEventListener('mute', () => {
|
||||
console.log(`[AudioCapture][DIAG] Track MUTED: id=${trackId}`);
|
||||
});
|
||||
event.track.addEventListener('unmute', () => {
|
||||
console.log(`[AudioCapture][DIAG] Track UNMUTED: id=${trackId}`);
|
||||
});
|
||||
// #endregion
|
||||
|
||||
try {
|
||||
const AudioCtx = window.AudioContext || (window as any).webkitAudioContext;
|
||||
// Use native sample rate (48kHz for WebRTC/Opus) to avoid
|
||||
// forced resampling which destabilises the Chromium audio stack.
|
||||
const ctx = new AudioCtx();
|
||||
const nativeRate = ctx.sampleRate;
|
||||
const stream = new MediaStream([event.track]);
|
||||
const source = ctx.createMediaStreamSource(stream);
|
||||
|
||||
// ScriptProcessor with larger buffer (8192) reduces callback
|
||||
// frequency and gives the renderer more breathing room.
|
||||
// #region agent log
|
||||
console.log(
|
||||
`[AudioCapture][DIAG] AudioContext: state=${ctx.state}, sampleRate=${nativeRate}, stream.active=${stream.active}, streamTracks=${stream.getAudioTracks().length}`
|
||||
);
|
||||
ctx.addEventListener('statechange', () => {
|
||||
console.log(`[AudioCapture][DIAG] AudioContext statechange: ${ctx.state} for track=${trackId}`);
|
||||
});
|
||||
// #endregion
|
||||
|
||||
const processor = ctx.createScriptProcessor(8192, 1, 1);
|
||||
let chunkBuffer: Float32Array[] = [];
|
||||
let samplesCollected = 0;
|
||||
let skippedSilentChunks = 0;
|
||||
let callbackCount = 0;
|
||||
let totalNonZeroSamples = 0;
|
||||
const minRmsThreshold = 0.0015;
|
||||
// Collect ~2 seconds of audio at native rate before emitting.
|
||||
// Larger chunks improve STT stability and reduce fragment transcripts.
|
||||
const samplesPerChunk = nativeRate * 2;
|
||||
const targetRate = 16000;
|
||||
|
||||
processor.onaudioprocess = (e: AudioProcessingEvent) => {
|
||||
const input = e.inputBuffer.getChannelData(0);
|
||||
callbackCount++;
|
||||
|
||||
// #region agent log
|
||||
if (callbackCount <= 3 || callbackCount % 50 === 0) {
|
||||
let nonZero = 0;
|
||||
let maxAbs = 0;
|
||||
for (let i = 0; i < input.length; i++) {
|
||||
if (input[i] !== 0) nonZero++;
|
||||
const abs = Math.abs(input[i]);
|
||||
if (abs > maxAbs) maxAbs = abs;
|
||||
}
|
||||
totalNonZeroSamples += nonZero;
|
||||
console.log(
|
||||
`[AudioCapture][DIAG] onaudioprocess #${callbackCount}: bufLen=${input.length}, nonZero=${nonZero}/${input.length}, maxAbs=${maxAbs.toFixed(8)}, track.enabled=${event.track.enabled}, track.muted=${event.track.muted}, track.readyState=${event.track.readyState}, ctx.state=${ctx.state}, totalNonZero=${totalNonZeroSamples}`
|
||||
);
|
||||
}
|
||||
// #endregion
|
||||
|
||||
chunkBuffer.push(new Float32Array(input));
|
||||
samplesCollected += input.length;
|
||||
|
||||
if (samplesCollected >= samplesPerChunk) {
|
||||
// Merge buffers into one contiguous array
|
||||
const merged = new Float32Array(samplesCollected);
|
||||
let offset = 0;
|
||||
for (const buf of chunkBuffer) {
|
||||
|
|
@ -121,7 +158,6 @@ export class AudioCaptureProcedure {
|
|||
offset += buf.length;
|
||||
}
|
||||
|
||||
// Calculate RMS to detect real audio activity
|
||||
let powerSum = 0;
|
||||
for (let i = 0; i < merged.length; i++) {
|
||||
powerSum += merged[i] * merged[i];
|
||||
|
|
@ -132,7 +168,7 @@ export class AudioCaptureProcedure {
|
|||
skippedSilentChunks++;
|
||||
if (skippedSilentChunks % 10 === 0) {
|
||||
console.log(
|
||||
`[AudioCapture] silent chunk skipped: track=${trackId}, readyState=${event.track.readyState}, rms=${rms.toFixed(6)}`
|
||||
`[AudioCapture] silent chunk skipped: track=${trackId}, readyState=${event.track.readyState}, muted=${event.track.muted}, enabled=${event.track.enabled}, rms=${rms.toFixed(6)}, callbacks=${callbackCount}, totalNonZero=${totalNonZeroSamples}`
|
||||
);
|
||||
}
|
||||
chunkBuffer = [];
|
||||
|
|
@ -239,8 +275,31 @@ export class AudioCaptureProcedure {
|
|||
|
||||
this._logger.info('[AudioCapture] Starting audio chunk polling...');
|
||||
|
||||
// #region agent log
|
||||
let pollCount = 0;
|
||||
// #endregion
|
||||
this._pollInterval = setInterval(async () => {
|
||||
try {
|
||||
// #region agent log
|
||||
pollCount++;
|
||||
if (pollCount % 60 === 1) {
|
||||
const diagInfo = await this._page.evaluate(() => {
|
||||
const pcs = (window as any).__audioCapturePeerConnections as RTCPeerConnection[] || [];
|
||||
const procs = (window as any).__audioCaptureProcessors as Record<string, any> || {};
|
||||
const ctxs = (window as any).__audioCaptureContexts as Record<string, AudioContext> || {};
|
||||
const procKeys = Object.keys(procs);
|
||||
const ctxStates = Object.entries(ctxs).map(([k, c]) => `${k}:${c.state}`);
|
||||
return {
|
||||
peerConnections: pcs.length,
|
||||
pcStates: pcs.map((p: RTCPeerConnection) => p.connectionState || 'unknown'),
|
||||
processors: procKeys.length,
|
||||
processorTrackIds: procKeys,
|
||||
audioContextStates: ctxStates,
|
||||
};
|
||||
});
|
||||
this._logger.info(`[AudioCapture][DIAG] Periodic: ${JSON.stringify(diagInfo)}`);
|
||||
}
|
||||
// #endregion
|
||||
const chunks = await this._page.evaluate(() => {
|
||||
const buf = (window as any).__audioCaptureChunks as CapturedAudioChunk[];
|
||||
const result = buf.splice(0, buf.length);
|
||||
|
|
|
|||
Loading…
Reference in a new issue