diff --git a/src/bot/audioProcedure.ts b/src/bot/audioProcedure.ts index 73bb827..6be2c7d 100644 --- a/src/bot/audioProcedure.ts +++ b/src/bot/audioProcedure.ts @@ -105,12 +105,18 @@ export class AudioProcedure { } })(); const rr: any = r || {}; + const vsArr = (rr.videoStats || []) as any[]; + const vs = vsArr.length + ? vsArr.map(v => `${v.kind}:b=${v.bytes},p=${v.packets},fEnc=${v.framesEncoded},fSent=${v.framesSent},fps=${v.fps},${v.w}x${v.h}`).join(' | ') + : 'none'; parts.push( `[${shortUrl}] r=${rr.replaced ?? 0} add=${rr.added ?? 0} pcs=${rr.pcs ?? 0} ` + `tx=${rr.totalTransceivers ?? 0} vidTx=${rr.videoTransceivers ?? 0} ` + `vidWith=${rr.videoSendersWithTrack ?? 0} vidNoTrack=${rr.videoSendersWithoutTrack ?? 0} ` + `dirB=[${(rr.directionsBefore || []).join(',')}] dirA=[${(rr.directionsAfter || []).join(',')}] ` - + `${rr.reason || ''}`.trim(), + + `cd=[${(rr.currentDirections || []).join(',')}] ` + + `track=${rr.trackId || 'n/a'}(en=${rr.trackEnabled},rs=${rr.trackReady},mu=${rr.trackMuted}) ` + + `vstats=[${vs}] ${rr.reason || ''}`.trim(), ); } catch (e: any) { parts.push(`err=${String(e?.message || e).slice(0, 64)}`); diff --git a/src/bot/mediaGetUserMediaPatch.ts b/src/bot/mediaGetUserMediaPatch.ts index cc4456b..8827587 100644 --- a/src/bot/mediaGetUserMediaPatch.ts +++ b/src/bot/mediaGetUserMediaPatch.ts @@ -19,12 +19,20 @@ export const poweronMediaPatchInstall = (opts: MediaGetUserMediaPatchOptions) => w.__gumChromium = (navigator.mediaDevices as any).getUserMedia.bind(navigator.mediaDevices); } - // Patch RTCPeerConnection.prototype methods once per realm to observe + react to Teams' track placement. + // Patch RTCPeerConnection.prototype methods once per realm to observe Teams' + // track placement + SDP negotiation. We DO NOT modify any tracks here; gUM + // already returns the canvas video track to Teams, so the right track is + // placed on the sender automatically. We only OBSERVE so we can diagnose + // what Teams does (or fails to do). if (!w.__poweronRtcPatched && (window as any).RTCPeerConnection) { w.__poweronRtcPatched = true; const RTCProto: any = (window as any).RTCPeerConnection.prototype; const _origAddTrack = RTCProto.addTrack; const _origAddTransceiver = RTCProto.addTransceiver; + const _origRemoveTrack = RTCProto.removeTrack; + const _origReplaceTrackProto = (window as any).RTCRtpSender?.prototype?.replaceTrack; + const _origSetLocalDescription = RTCProto.setLocalDescription; + const _origSetRemoteDescription = RTCProto.setRemoteDescription; RTCProto.addTrack = function (track: MediaStreamTrack, ...streams: MediaStream[]) { try { // eslint-disable-next-line no-console @@ -36,28 +44,16 @@ export const poweronMediaPatchInstall = (opts: MediaGetUserMediaPatchOptions) => } catch { // ignore } - let useTrack: MediaStreamTrack = track; + const sender = _origAddTrack.call(this, track, ...streams); try { if (useCanvasVideo && track && track.kind === 'video') { - if (typeof w.__startBotAvatarStream === 'function') { - w.__startBotAvatarStream(); - } - const av: MediaStreamTrack | undefined = w.__botAvatarVideoTrack; - if (av && av.readyState === 'live') { - try { - track.stop(); - } catch { - // ignore - } - useTrack = av.clone(); - // eslint-disable-next-line no-console - console.log('[AudioPlayback] pc.addTrack swapped video -> avatar id=' + useTrack.id); - } + const list = (w.__poweronVideoSenders = w.__poweronVideoSenders || []); + list.push({ sender, originalTrackId: track.id }); } } catch { // ignore } - return _origAddTrack.call(this, useTrack, ...streams); + return sender; }; RTCProto.addTransceiver = function (trackOrKind: any, init?: any) { try { @@ -72,6 +68,86 @@ export const poweronMediaPatchInstall = (opts: MediaGetUserMediaPatchOptions) => } return _origAddTransceiver.call(this, trackOrKind, init); }; + if (_origRemoveTrack) { + RTCProto.removeTrack = function (sender: any) { + try { + // eslint-disable-next-line no-console + console.log( + '[AudioPlayback] pc.removeTrack senderTrackKind=' + + (sender && sender.track && sender.track.kind) + + ' senderTrackId=' + (sender && sender.track && sender.track.id), + ); + } catch { + // ignore + } + return _origRemoveTrack.call(this, sender); + }; + } + if (_origReplaceTrackProto) { + (window as any).RTCRtpSender.prototype.replaceTrack = function (track: any) { + try { + // eslint-disable-next-line no-console + console.log( + '[AudioPlayback] sender.replaceTrack(by=teams?) currentKind=' + + (this.track && this.track.kind) + + ' newKind=' + (track && track.kind) + + ' newId=' + (track && track.id), + ); + } catch { + // ignore + } + return _origReplaceTrackProto.call(this, track); + }; + } + const _logSdp = (label: string, sdp?: string) => { + try { + if (!sdp) { + // eslint-disable-next-line no-console + console.log('[AudioPlayback] ' + label + ' sdp='); + return; + } + const lines = sdp.split(/\r?\n/); + const interesting: string[] = []; + let curM = ''; + for (const ln of lines) { + if (ln.startsWith('m=')) { + curM = ln.slice(2, 7); + interesting.push('M:' + ln); + } else if ( + ln.startsWith('a=sendrecv') + || ln.startsWith('a=sendonly') + || ln.startsWith('a=recvonly') + || ln.startsWith('a=inactive') + ) { + interesting.push(curM + ':' + ln); + } else if (ln.startsWith('a=mid:')) { + interesting.push(curM + ':' + ln); + } + } + // eslint-disable-next-line no-console + console.log('[AudioPlayback] ' + label + ' ' + interesting.join(' | ')); + } catch { + // ignore + } + }; + RTCProto.setLocalDescription = function (desc?: any) { + try { + const t = desc && (desc.type || desc.sdp ? desc.type : 'auto'); + _logSdp('setLocalDescription type=' + t, desc && desc.sdp); + } catch { + // ignore + } + return _origSetLocalDescription.call(this, desc); + }; + RTCProto.setRemoteDescription = function (desc?: any) { + try { + const t = desc && desc.type; + _logSdp('setRemoteDescription type=' + t, desc && desc.sdp); + } catch { + // ignore + } + return _origSetRemoteDescription.call(this, desc); + }; } if (!w.__ttsStreamDest) { @@ -113,8 +189,10 @@ export const poweronMediaPatchInstall = (opts: MediaGetUserMediaPatchOptions) => canvas.width = 640; canvas.height = 360; canvas.setAttribute('data-poweron-avatar', '1'); + // Render at a real size so the compositor produces frames in headless mode. + // captureStream() in headless Chromium can stall when the canvas is 0/invisible. canvas.style.cssText = - 'position:fixed;right:0;bottom:0;width:4px;height:4px;z-index:2147483646;opacity:1;pointer-events:none;'; + 'position:fixed;left:0;top:0;width:160px;height:90px;z-index:2147483646;opacity:0.99;pointer-events:none;background:#000;'; (document.body || document.documentElement).appendChild(canvas); w.__botAvatarCanvas = canvas; const c2d = canvas.getContext('2d'); @@ -157,9 +235,11 @@ export const poweronMediaPatchInstall = (opts: MediaGetUserMediaPatchOptions) => c2d.fillRect(0, hPx - 6, wPx, 6); }; draw(); - w.__botAvatarDrawInterval = window.setInterval(draw, 1000 / _fps); - const cap = canvas.captureStream(_fps); + // Capture at fps for compositor-driven frames AND also push manual frames + // via requestFrame() each tick for headless reliability. + const cap = (canvas as any).captureStream(_fps) as MediaStream; w.__botAvatarVideoTrack = cap.getVideoTracks()[0]; + w.__botAvatarStreamObj = cap; if (w.__botAvatarVideoTrack) { w.__botAvatarVideoTrack.enabled = true; try { @@ -168,6 +248,22 @@ export const poweronMediaPatchInstall = (opts: MediaGetUserMediaPatchOptions) => // ignore } } + const _tickAndPush = () => { + try { + draw(); + } catch { + // ignore + } + try { + const tr: any = w.__botAvatarVideoTrack; + if (tr && typeof tr.requestFrame === 'function') { + tr.requestFrame(); + } + } catch { + // ignore + } + }; + w.__botAvatarDrawInterval = window.setInterval(_tickAndPush, 1000 / _fps); // eslint-disable-next-line no-console console.log( '[AudioPlayback] canvas avatar stream (re)built, videoTrack=', @@ -196,7 +292,17 @@ export const poweronMediaPatchInstall = (opts: MediaGetUserMediaPatchOptions) => for (const pc of pcs) { const transceivers = (pc as any).getTransceivers?.() || []; totalTransceivers += transceivers.length; - let pcHasVideoSender = false; + // Snapshot signaling/connection state for diagnostics. + try { + // eslint-disable-next-line no-console + console.log( + '[AudioPlayback] pc state sig=' + (pc as any).signalingState + + ' conn=' + (pc as any).connectionState + + ' ice=' + (pc as any).iceConnectionState, + ); + } catch { + // ignore + } for (const t of transceivers) { const sender = t.sender; if (!sender) { @@ -209,13 +315,25 @@ export const poweronMediaPatchInstall = (opts: MediaGetUserMediaPatchOptions) => continue; } videoTransceivers++; - pcHasVideoSender = true; directionsBefore.push(t.direction); if (sender.track) { videoSendersWithTrack++; } else { videoSendersWithoutTrack++; } + // Only replace the track if Teams has fully negotiated the video sender. + // Touching it before currentDirection is set can abort the in-flight + // SDP renegotiation and leave the stream stuck. + const cd = (t as any).currentDirection; + const alreadyOurs = sender.track && sender.track.id === src.id; + if (!cd || cd === 'inactive') { + directionsAfter.push('skip(cd=' + (cd || 'null') + ')'); + continue; + } + if (alreadyOurs) { + directionsAfter.push('keep(' + t.direction + ')'); + continue; + } try { // eslint-disable-next-line no-await-in-loop await sender.replaceTrack(src.clone()); @@ -224,29 +342,50 @@ export const poweronMediaPatchInstall = (opts: MediaGetUserMediaPatchOptions) => if (tr && !tr.enabled) { tr.enabled = true; } - if (t.direction === 'inactive' || t.direction === 'recvonly') { - try { - t.direction = 'sendrecv'; - } catch { - // ignore - } - } directionsAfter.push(t.direction); } catch (err: any) { directionsAfter.push('err:' + String(err && err.message ? err.message : err).slice(0, 32)); } } - if (!pcHasVideoSender) { - try { - const newSender = (pc as any).addTrack(src.clone(), w.__botAvatarCanvas?.captureStream - ? w.__botAvatarCanvas.captureStream(15) - : new MediaStream([src.clone()])); - if (newSender) { - added++; + } + // Read outbound stats unconditionally so we can see what RTP streams exist. + const videoStats: any[] = []; + const currentDirections: string[] = []; + for (const pc of pcs) { + try { + const transceivers = (pc as any).getTransceivers?.() || []; + for (const t of transceivers) { + const sender = t.sender; + const txKind = + (t as any).kind + || sender?.track?.kind + || t.receiver?.track?.kind + || null; + if (txKind === 'video') { + currentDirections.push(`d=${t.direction}/cd=${(t as any).currentDirection || 'n/a'}`); } - } catch (err) { - directionsAfter.push('addTrack-err:' + String((err as any)?.message || err).slice(0, 32)); + if (!sender) { + continue; + } + // eslint-disable-next-line no-await-in-loop + const stats = await sender.getStats(); + stats.forEach((r: any) => { + if (r.type === 'outbound-rtp') { + videoStats.push({ + kind: r.kind || r.mediaType || 'unknown', + bytes: r.bytesSent || 0, + packets: r.packetsSent || 0, + framesEncoded: r.framesEncoded || 0, + framesSent: r.framesSent || 0, + fps: r.framesPerSecond || 0, + w: r.frameWidth || 0, + h: r.frameHeight || 0, + }); + } + }); } + } catch { + // ignore } } return { @@ -260,6 +399,12 @@ export const poweronMediaPatchInstall = (opts: MediaGetUserMediaPatchOptions) => totalTransceivers, directionsBefore, directionsAfter, + currentDirections, + videoStats, + trackId: src.id, + trackEnabled: src.enabled, + trackReady: src.readyState, + trackMuted: src.muted, }; }; diff --git a/src/config.ts b/src/config.ts index 5e2bcd1..ee1c3ec 100644 --- a/src/config.ts +++ b/src/config.ts @@ -16,9 +16,14 @@ export const config = { botHeadless: process.env.BOT_HEADLESS !== 'false', /** * Replace Chromium's fake test-pattern video with a canvas stream (gradient + label). - * Unset in production with BOT_USE_CANVAS_VIDEO=false if you need camera off / profile tile only. + * Default OFF: in tests with the poweron tenant the Teams SFU rejects all + * outbound video m-lines (port=0/inactive) regardless of which track we send, + * so enabling video just costs CPU + adds a misleading "camera on" indicator + * for other participants without ever transmitting frames. Set + * BOT_USE_CANVAS_VIDEO=true if a future tenant policy permits IP video and + * you want to push the canvas stream. */ - botUseCanvasVideo: process.env.BOT_USE_CANVAS_VIDEO !== 'false', + botUseCanvasVideo: process.env.BOT_USE_CANVAS_VIDEO === 'true', // Logging logLevel: process.env.LOG_LEVEL || 'info',