AudioCapture: add extended diagnostics for silent audio investigation
Made-with: Cursor
This commit is contained in:
parent
2e2fbfe8ed
commit
681744292d
1 changed files with 68 additions and 9 deletions
|
|
@ -73,6 +73,9 @@ export class AudioCaptureProcedure {
|
||||||
try {
|
try {
|
||||||
const pcs = (window as any).__audioCapturePeerConnections as RTCPeerConnection[];
|
const pcs = (window as any).__audioCapturePeerConnections as RTCPeerConnection[];
|
||||||
pcs.push(pc);
|
pcs.push(pc);
|
||||||
|
// #region agent log
|
||||||
|
console.log(`[AudioCapture][DIAG] New RTCPeerConnection created (total: ${pcs.length}), config:`, JSON.stringify(args[0] || {}).substring(0, 200));
|
||||||
|
// #endregion
|
||||||
} catch {
|
} catch {
|
||||||
// ignore
|
// ignore
|
||||||
}
|
}
|
||||||
|
|
@ -86,34 +89,68 @@ export class AudioCaptureProcedure {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// #region agent log
|
||||||
|
console.log(
|
||||||
|
`[AudioCapture][DIAG] Track received: id=${trackId}, enabled=${event.track.enabled}, muted=${event.track.muted}, readyState=${event.track.readyState}, label=${event.track.label}`
|
||||||
|
);
|
||||||
|
event.track.addEventListener('mute', () => {
|
||||||
|
console.log(`[AudioCapture][DIAG] Track MUTED: id=${trackId}`);
|
||||||
|
});
|
||||||
|
event.track.addEventListener('unmute', () => {
|
||||||
|
console.log(`[AudioCapture][DIAG] Track UNMUTED: id=${trackId}`);
|
||||||
|
});
|
||||||
|
// #endregion
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const AudioCtx = window.AudioContext || (window as any).webkitAudioContext;
|
const AudioCtx = window.AudioContext || (window as any).webkitAudioContext;
|
||||||
// Use native sample rate (48kHz for WebRTC/Opus) to avoid
|
|
||||||
// forced resampling which destabilises the Chromium audio stack.
|
|
||||||
const ctx = new AudioCtx();
|
const ctx = new AudioCtx();
|
||||||
const nativeRate = ctx.sampleRate;
|
const nativeRate = ctx.sampleRate;
|
||||||
const stream = new MediaStream([event.track]);
|
const stream = new MediaStream([event.track]);
|
||||||
const source = ctx.createMediaStreamSource(stream);
|
const source = ctx.createMediaStreamSource(stream);
|
||||||
|
|
||||||
// ScriptProcessor with larger buffer (8192) reduces callback
|
// #region agent log
|
||||||
// frequency and gives the renderer more breathing room.
|
console.log(
|
||||||
|
`[AudioCapture][DIAG] AudioContext: state=${ctx.state}, sampleRate=${nativeRate}, stream.active=${stream.active}, streamTracks=${stream.getAudioTracks().length}`
|
||||||
|
);
|
||||||
|
ctx.addEventListener('statechange', () => {
|
||||||
|
console.log(`[AudioCapture][DIAG] AudioContext statechange: ${ctx.state} for track=${trackId}`);
|
||||||
|
});
|
||||||
|
// #endregion
|
||||||
|
|
||||||
const processor = ctx.createScriptProcessor(8192, 1, 1);
|
const processor = ctx.createScriptProcessor(8192, 1, 1);
|
||||||
let chunkBuffer: Float32Array[] = [];
|
let chunkBuffer: Float32Array[] = [];
|
||||||
let samplesCollected = 0;
|
let samplesCollected = 0;
|
||||||
let skippedSilentChunks = 0;
|
let skippedSilentChunks = 0;
|
||||||
|
let callbackCount = 0;
|
||||||
|
let totalNonZeroSamples = 0;
|
||||||
const minRmsThreshold = 0.0015;
|
const minRmsThreshold = 0.0015;
|
||||||
// Collect ~2 seconds of audio at native rate before emitting.
|
|
||||||
// Larger chunks improve STT stability and reduce fragment transcripts.
|
|
||||||
const samplesPerChunk = nativeRate * 2;
|
const samplesPerChunk = nativeRate * 2;
|
||||||
const targetRate = 16000;
|
const targetRate = 16000;
|
||||||
|
|
||||||
processor.onaudioprocess = (e: AudioProcessingEvent) => {
|
processor.onaudioprocess = (e: AudioProcessingEvent) => {
|
||||||
const input = e.inputBuffer.getChannelData(0);
|
const input = e.inputBuffer.getChannelData(0);
|
||||||
|
callbackCount++;
|
||||||
|
|
||||||
|
// #region agent log
|
||||||
|
if (callbackCount <= 3 || callbackCount % 50 === 0) {
|
||||||
|
let nonZero = 0;
|
||||||
|
let maxAbs = 0;
|
||||||
|
for (let i = 0; i < input.length; i++) {
|
||||||
|
if (input[i] !== 0) nonZero++;
|
||||||
|
const abs = Math.abs(input[i]);
|
||||||
|
if (abs > maxAbs) maxAbs = abs;
|
||||||
|
}
|
||||||
|
totalNonZeroSamples += nonZero;
|
||||||
|
console.log(
|
||||||
|
`[AudioCapture][DIAG] onaudioprocess #${callbackCount}: bufLen=${input.length}, nonZero=${nonZero}/${input.length}, maxAbs=${maxAbs.toFixed(8)}, track.enabled=${event.track.enabled}, track.muted=${event.track.muted}, track.readyState=${event.track.readyState}, ctx.state=${ctx.state}, totalNonZero=${totalNonZeroSamples}`
|
||||||
|
);
|
||||||
|
}
|
||||||
|
// #endregion
|
||||||
|
|
||||||
chunkBuffer.push(new Float32Array(input));
|
chunkBuffer.push(new Float32Array(input));
|
||||||
samplesCollected += input.length;
|
samplesCollected += input.length;
|
||||||
|
|
||||||
if (samplesCollected >= samplesPerChunk) {
|
if (samplesCollected >= samplesPerChunk) {
|
||||||
// Merge buffers into one contiguous array
|
|
||||||
const merged = new Float32Array(samplesCollected);
|
const merged = new Float32Array(samplesCollected);
|
||||||
let offset = 0;
|
let offset = 0;
|
||||||
for (const buf of chunkBuffer) {
|
for (const buf of chunkBuffer) {
|
||||||
|
|
@ -121,7 +158,6 @@ export class AudioCaptureProcedure {
|
||||||
offset += buf.length;
|
offset += buf.length;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Calculate RMS to detect real audio activity
|
|
||||||
let powerSum = 0;
|
let powerSum = 0;
|
||||||
for (let i = 0; i < merged.length; i++) {
|
for (let i = 0; i < merged.length; i++) {
|
||||||
powerSum += merged[i] * merged[i];
|
powerSum += merged[i] * merged[i];
|
||||||
|
|
@ -132,7 +168,7 @@ export class AudioCaptureProcedure {
|
||||||
skippedSilentChunks++;
|
skippedSilentChunks++;
|
||||||
if (skippedSilentChunks % 10 === 0) {
|
if (skippedSilentChunks % 10 === 0) {
|
||||||
console.log(
|
console.log(
|
||||||
`[AudioCapture] silent chunk skipped: track=${trackId}, readyState=${event.track.readyState}, rms=${rms.toFixed(6)}`
|
`[AudioCapture] silent chunk skipped: track=${trackId}, readyState=${event.track.readyState}, muted=${event.track.muted}, enabled=${event.track.enabled}, rms=${rms.toFixed(6)}, callbacks=${callbackCount}, totalNonZero=${totalNonZeroSamples}`
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
chunkBuffer = [];
|
chunkBuffer = [];
|
||||||
|
|
@ -239,8 +275,31 @@ export class AudioCaptureProcedure {
|
||||||
|
|
||||||
this._logger.info('[AudioCapture] Starting audio chunk polling...');
|
this._logger.info('[AudioCapture] Starting audio chunk polling...');
|
||||||
|
|
||||||
|
// #region agent log
|
||||||
|
let pollCount = 0;
|
||||||
|
// #endregion
|
||||||
this._pollInterval = setInterval(async () => {
|
this._pollInterval = setInterval(async () => {
|
||||||
try {
|
try {
|
||||||
|
// #region agent log
|
||||||
|
pollCount++;
|
||||||
|
if (pollCount % 60 === 1) {
|
||||||
|
const diagInfo = await this._page.evaluate(() => {
|
||||||
|
const pcs = (window as any).__audioCapturePeerConnections as RTCPeerConnection[] || [];
|
||||||
|
const procs = (window as any).__audioCaptureProcessors as Record<string, any> || {};
|
||||||
|
const ctxs = (window as any).__audioCaptureContexts as Record<string, AudioContext> || {};
|
||||||
|
const procKeys = Object.keys(procs);
|
||||||
|
const ctxStates = Object.entries(ctxs).map(([k, c]) => `${k}:${c.state}`);
|
||||||
|
return {
|
||||||
|
peerConnections: pcs.length,
|
||||||
|
pcStates: pcs.map((p: RTCPeerConnection) => p.connectionState || 'unknown'),
|
||||||
|
processors: procKeys.length,
|
||||||
|
processorTrackIds: procKeys,
|
||||||
|
audioContextStates: ctxStates,
|
||||||
|
};
|
||||||
|
});
|
||||||
|
this._logger.info(`[AudioCapture][DIAG] Periodic: ${JSON.stringify(diagInfo)}`);
|
||||||
|
}
|
||||||
|
// #endregion
|
||||||
const chunks = await this._page.evaluate(() => {
|
const chunks = await this._page.evaluate(() => {
|
||||||
const buf = (window as any).__audioCaptureChunks as CapturedAudioChunk[];
|
const buf = (window as any).__audioCaptureChunks as CapturedAudioChunk[];
|
||||||
const result = buf.splice(0, buf.length);
|
const result = buf.splice(0, buf.length);
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue