diff --git a/CHANGES_SINCE_WORKING.md b/CHANGES_SINCE_WORKING.md new file mode 100644 index 0000000..1b436f4 --- /dev/null +++ b/CHANGES_SINCE_WORKING.md @@ -0,0 +1,146 @@ +# Changes since last working state ("jetzt geht es" — 12 May 2026, 14:01) + +All changes are **uncommitted** (working tree vs HEAD). HEAD = last git commit (the known-good baseline from ~3 weeks ago). + +## Legend + +- **PRE-SUCCESS**: Change was present at 14:01 when the bot successfully joined +- **POST-SUCCESS**: Change was made after 14:01 (potential regression source) +- **REVERTED**: Was added after 14:01 then reverted — should be back to pre-success state + +--- + +## 1. `audioCaptureProcedure.ts` — Audio Capture Gating (PRE-SUCCESS) + +**What:** Complete rewrite of the RTCPeerConnection wrapper. Instead of immediately building an AudioContext/MediaStreamSource on every `track` event, the wrapper now: +- Sets `window.__audioCaptureEnabled = false` by default +- Only logs diagnostics while disabled (no clone, no AudioContext, no MediaStream) +- Exposes `window.__audioCaptureAttachTrack(pc, track)` as the audio graph builder +- `startCapture()` (called AFTER `in_meeting`) sets the flag to `true` and retroactively attaches existing tracks via `getReceivers()` +- The `ended` handler does passive cleanup (no `ctx.close()`, no `disconnect()`) + +**Why:** Prevents `rejectMediaDescriptionsUpdateAsync` crash by keeping out of Teams' WebRTC pipeline during pre-join/lobby/SDP renegotiation. + +**Risk:** This was the exact fix that led to "jetzt geht es". Should be safe. + +--- + +## 2. `joinProcedure.ts` — Blind Wait Elimination + Selector Rewrite (PRE-SUCCESS) + +### 2a. New helper functions (replacing `waitForTimeout`) +- `_waitForPreJoinAfterLauncher()` — waits for pre-join UI (10s timeout) +- `_waitForNoAudioVideoModalGone()` — waits for no-AV modal dismissal +- `_waitForPermissionOverlayCleared()` — waits for permission dialog gone +- `_waitForLeaveUiSettled()` — waits for hangup button to disappear + +All `waitForTimeout` calls replaced with these. **Timeout is 10s** (vs original 1-2s). + +### 2b. `isInMeetingLobby()` rewrite +**OLD (HEAD):** Text-based detection using `bodyText.includes('will let you in')` + `data-tid` fallback. +**NEW:** Purely structural selectors: `[data-tid="lobby-screen"]`, `[data-cid="lobby-screen"]`, `[id*="lobby"]`, `[class*="lobby" i]`, etc. + +### 2c. `isInMeeting()` rewrite +**OLD (HEAD):** Mixed text-based (`aria-label*="Leave"`, `bodyText.includes('Mute')`) + `data-tid` selectors. +**NEW:** Purely structural: `button[id="hangup-button"]`, `button[id="microphone-button"]`, `[data-cid="ts-hangup-btn"]`, `[data-cid="calling-unified-bar"]`, etc. Fallback uses DOM structure check (class-based `calling-controls`, button ID counting). + +**Risk:** The new `_waitForPreJoinAfterLauncher()` has a 10s timeout. In logs, this timeout fires ("Pre-join UI not detected after launcher") — the old 2s `waitForTimeout` was faster. This extra delay might change timing. + +--- + +## 3. `orchestrator.ts` — Lobby Admission + Browser Args (MIXED) + +### 3a. `_waitForMeetingAdmission()` (POST-SUCCESS, then re-fixed) +**At 14:01:** Had `wasInLobby` tracking — when lobby disappeared, it kept waiting patiently for meeting UI. +**After 14:01:** Was simplified to just poll `isInMeeting()` without tracking lobby state. Bot couldn't detect admission. +**Current:** Re-added `wasInLobby` tracking + `isInMeetingLobby()` check per iteration. + +### 3b. `_launchBrowser()` comment (PRE-SUCCESS) +Added documentation comment explaining why anon and auth use different Chromium args. + +### 3c. `_attemptAuthJoin()` cleanup (PRE-SUCCESS) +Minor comment/formatting changes in the `anon=true` stripping logic. + +### 3d. `_stripAnonFromInnerMeetingUrl()` comment (PRE-SUCCESS) +Expanded docstring. + +### 3e. ChatProcedure constructor trailing comma (PRE-SUCCESS) +Cosmetic: added trailing comma after the callback parameter. + +--- + +## 4. `chatProcedure.ts` — Simplified Compose + Root-Cause Guard (MIXED) + +### 4a. `_ensureComposeExpanded()` (PRE-SUCCESS — was being developed at 14:01) +New method that detects Teams' "simplified compose" layout (light-meetings) and clicks the expand button so ckeditor is visible. + +### 4b. Updated `_isChatPanelOpen()` selectors (PRE-SUCCESS) +Added `[data-tid="ckeditor"]`, `[data-tid="newMessageCommands-expand-compose"]`, `[data-tid="simplified-compose-bottom-toolbar"]`. + +### 4c. Root-cause guard: `if (!text && author !== 'Unknown')` (PRE-SUCCESS) +In both periodic scan and MutationObserver paths: the `innerText` fallback only fires when an author was actually identified. Prevents "Unknown: 22:04" timestamp noise. + +### 4d. Container fallback for light-meetings (PRE-SUCCESS) +When the chat container has `offsetHeight === 0` or yields 0 candidates but global `fui-ChatMessage` elements exist, promotes search target from container to `document`. + +### 4e. `_sendChatMessage()` selector additions (PRE-SUCCESS) +Added `[data-tid="ckeditor"]` as a selector for the input box. + +### 4f. Reverted changes (were POST-SUCCESS, now back to PRE-SUCCESS state) +- ~~`botName` constructor parameter~~ → removed +- ~~expanded `noisePatterns`~~ → removed +- ~~`isOwnAuthor()` skip logic~~ → removed + +--- + +## 5. `teamsActionsService.ts` — Blind Wait Elimination (PRE-SUCCESS) + +Replaced `waitForTimeout` calls with targeted waits: +- Menu dismissal: `waitForFunction(() => !document.querySelector('[role="menu"]'))` +- Chat input focus: `waitForFunction(() => document.activeElement?.matches(...))` +- More menu: `waitForSelector('[role="menu"]')` +- Captions button: `waitForSelector('#closed-captions-button')` +- Chat panel input: `waitForSelector('[data-tid="ckeditor-replyConversation"]')` +- Removed delay after media toggles + +--- + +## 6. `authProcedure.ts` — Blind Wait Elimination (PRE-SUCCESS) + +Replaced `waitForTimeout` calls with targeted waits: +- After sign-in click: wait for MFA/error/KMSI indicator +- After email Next click: wait for password input +- After "Stay signed in": wait for KMSI banner dismissal + +--- + +## 7. `authTestProcedure.ts` — Blind Wait Elimination (PRE-SUCCESS) + +Removed redundant `waitForTimeout` calls and replaced with targeted `waitForSelector` calls for: +- Login page elements, pre-join screen, meeting UI, launcher, sign-in dialog + +--- + +## 8. `backgroundProcedure.ts` — Blind Wait Elimination (PRE-SUCCESS) + +Replaced `waitForTimeout` calls with targeted waits: +- Background effects panel visibility +- File input appearance after add-image click +- Background thumbnail appearance after upload +- Panel dismissal via Escape + +--- + +## Summary: What to roll back for a clean test + +If rolling back to HEAD (git checkout): +- You lose the **audio capture gating** (the fix that made "jetzt geht es" work) +- You lose the **blind wait elimination** (improves robustness) +- You lose the **chat simplified compose** fix +- You lose the **structural selector rewrites** for lobby/meeting detection + +**Recommended selective rollback** (keep what was working): +1. Keep `audioCaptureProcedure.ts` changes (audio capture gating) +2. Keep `chatProcedure.ts` changes (compose expand, root-cause guard, container fallback) +3. Consider reverting `joinProcedure.ts` helper timeouts back to shorter values (10s → 2-3s) — the 10s `_waitForPreJoinAfterLauncher` is timing out and adding unnecessary delay +4. Keep `orchestrator.ts` `_waitForMeetingAdmission` with `wasInLobby` tracking +5. The blind-wait changes in auth/background/teamsActions are safe (only affect those specific flows) diff --git a/src/bot/audioCaptureProcedure.ts b/src/bot/audioCaptureProcedure.ts index c140fc9..c0179c8 100644 --- a/src/bot/audioCaptureProcedure.ts +++ b/src/bot/audioCaptureProcedure.ts @@ -157,10 +157,218 @@ export class AudioCaptureProcedure { this._logger.info('[AudioCapture] Injecting RTCPeerConnection wrapper (all frames)...'); await this._page.context().addInitScript((workletCode: string) => { - (window as any).__audioCaptureChunks = [] as any[]; - (window as any).__audioCaptureProcessors = {} as Record; - (window as any).__audioCaptureContexts = {} as Record; - (window as any).__audioCapturePeerConnections = [] as RTCPeerConnection[]; + const w = window as any; + w.__audioCaptureChunks = [] as any[]; + w.__audioCaptureProcessors = {} as Record; + w.__audioCaptureContexts = {} as Record; + w.__audioCapturePeerConnections = [] as RTCPeerConnection[]; + // Flag controlled from Node.js (orchestrator): false until the bot + // is actually `in_meeting`. While false, we observe PCs and tracks + // but DO NOT build any audio graph. This keeps us completely out of + // Teams' WebRTC pipeline during pre-join, lobby, and SDP renegotiation + // — which is where the `rejectMediaDescriptionsUpdateAsync` crash + // would otherwise occur. + w.__audioCaptureEnabled = false; + + // The audio graph builder is exposed on window so it can be invoked + // from both the wrapped 'track' event handler (for new tracks after + // capture is enabled) and from startCapture() (to attach to tracks + // that already exist on connected PCs at the moment of enable). + w.__audioCaptureAttachTrack = (pc: RTCPeerConnection, track: MediaStreamTrack) => { + if (track.kind !== 'audio') return; + const trackId = track.id || `audio-track-${Date.now()}`; + const processors = w.__audioCaptureProcessors as Record; + if (processors[trackId]) return; + if (track.readyState === 'ended') { + console.log(`[AudioCapture] Track already ended; skipping: ${trackId}`); + return; + } + console.log( + `[AudioCapture][DIAG] Attaching audio graph: trackId=${trackId}, label="${track.label}", pc.connectionState=${pc.connectionState}` + ); + + try { + const AudioCtx = window.AudioContext || (w.webkitAudioContext); + const ctx = new AudioCtx(); + const nativeRate = ctx.sampleRate; + // Clone so our capture holds an independent handle; Teams keeps + // its original track untouched. + const capturedTrack = track.clone(); + const stream = new MediaStream([capturedTrack]); + const source = ctx.createMediaStreamSource(stream); + const targetRate = 16000; + + ctx.addEventListener('statechange', () => { + console.log(`[AudioCapture][DIAG] AudioContext statechange: ${ctx.state} for track=${trackId}`); + }); + + const silentGain = ctx.createGain(); + silentGain.gain.value = 0; + + const pushChunk = (base64Data: string, rms: number) => { + const chunks = w.__audioCaptureChunks as any[]; + if (chunks.length < 60) { + chunks.push({ + data: base64Data, + sampleRate: targetRate, + captureDiagnostics: { + trackId, + readyState: track.readyState, + rms: Number(rms.toFixed(6)), + nativeSampleRate: nativeRate, + }, + }); + } + }; + + let workletNode: AudioWorkletNode | null = null; + let scriptProcessor: ScriptProcessorNode | null = null; + + const useWorklet = async () => { + try { + const blob = new Blob([workletCode], { type: 'application/javascript' }); + const blobUrl = URL.createObjectURL(blob); + await ctx.audioWorklet.addModule(blobUrl); + URL.revokeObjectURL(blobUrl); + + workletNode = new AudioWorkletNode(ctx, 'audio-capture-processor', { + processorOptions: { nativeRate, targetRate }, + }); + + workletNode.port.onmessage = (ev: MessageEvent) => { + if (ev.data?.type !== 'chunk' || !ev.data.data) return; + const pcm16 = new Int16Array(ev.data.data); + const bytes = new Uint8Array(pcm16.buffer); + let binary = ''; + for (let i = 0; i < bytes.length; i++) { + binary += String.fromCharCode(bytes[i]); + } + pushChunk(btoa(binary), ev.data.rms || 0); + }; + + source.connect(workletNode); + workletNode.connect(silentGain); + silentGain.connect(ctx.destination); + + processors[trackId] = workletNode; + console.log(`[AudioCapture] WebRTC audio track intercepted (AudioWorklet): track=${trackId}, native=${nativeRate}Hz -> 16kHz mono`); + return true; + } catch (err) { + console.warn(`[AudioCapture] AudioWorklet not available, falling back to ScriptProcessor: ${err}`); + return false; + } + }; + + const useScriptProcessor = () => { + const minRmsThreshold = 0.0003; + const maxSamplesPerChunk = nativeRate * 8; + const preRollSamples = Math.ceil(nativeRate * 1.0); + const minFlushSamples = Math.ceil(nativeRate * 0.5); + const silenceFlushCallbacks = 6; + const ratio = nativeRate / targetRate; + + scriptProcessor = ctx.createScriptProcessor(8192, 1, 1); + let chunkBuffer: Float32Array[] = []; + let samplesCollected = 0; + let hasVoicedContent = false; + let consecutiveSilentCallbacks = 0; + + scriptProcessor.onaudioprocess = (e: AudioProcessingEvent) => { + const input = e.inputBuffer.getChannelData(0); + let cbPower = 0; + for (let i = 0; i < input.length; i++) { + cbPower += input[i] * input[i]; + } + const cbRms = Math.sqrt(cbPower / Math.max(input.length, 1)); + + if (cbRms >= minRmsThreshold) { + hasVoicedContent = true; + consecutiveSilentCallbacks = 0; + } else { + consecutiveSilentCallbacks++; + } + + chunkBuffer.push(new Float32Array(input)); + samplesCollected += input.length; + + const shouldFlush = ( + samplesCollected >= maxSamplesPerChunk + || (hasVoicedContent + && consecutiveSilentCallbacks >= silenceFlushCallbacks + && samplesCollected > minFlushSamples) + ); + + if (shouldFlush) { + const merged = new Float32Array(samplesCollected); + let offset = 0; + for (const buf of chunkBuffer) { + merged.set(buf, offset); + offset += buf.length; + } + + let powerSum = 0; + for (let i = 0; i < merged.length; i++) { + powerSum += merged[i] * merged[i]; + } + const rms = Math.sqrt(powerSum / Math.max(merged.length, 1)); + + hasVoicedContent = false; + consecutiveSilentCallbacks = 0; + + if (rms >= minRmsThreshold) { + const outLen = Math.floor(merged.length / ratio); + const pcm16 = new Int16Array(outLen); + for (let i = 0; i < outLen; i++) { + const srcIdx = Math.floor(i * ratio); + const s = Math.max(-1, Math.min(1, merged[srcIdx])); + pcm16[i] = Math.round(s * 32767); + } + const bytes = new Uint8Array(pcm16.buffer); + let binary = ''; + for (let i = 0; i < bytes.length; i++) { + binary += String.fromCharCode(bytes[i]); + } + pushChunk(btoa(binary), rms); + } else { + const keep = Math.min(preRollSamples, merged.length); + const preRoll = merged.slice(merged.length - keep); + chunkBuffer = [preRoll]; + samplesCollected = keep; + return; + } + const keep = Math.min(preRollSamples, merged.length); + const preRoll = merged.slice(merged.length - keep); + chunkBuffer = [preRoll]; + samplesCollected = keep; + } + }; + + source.connect(scriptProcessor); + scriptProcessor.connect(silentGain); + silentGain.connect(ctx.destination); + + processors[trackId] = scriptProcessor; + console.log(`[AudioCapture] WebRTC audio track intercepted (ScriptProcessor fallback): track=${trackId}, native=${nativeRate}Hz -> 16kHz mono`); + }; + + (async () => { + const ok = await useWorklet(); + if (!ok) useScriptProcessor(); + ctx.resume().catch(() => {}); + })(); + + track.addEventListener('ended', () => { + try { capturedTrack.stop(); } catch { /* already stopped */ } + delete processors[trackId]; + delete (w.__audioCaptureContexts as Record)[trackId]; + console.log(`[AudioCapture] Audio track ended: track=${trackId} (cloned track stopped; ctx kept open)`); + }); + + (w.__audioCaptureContexts as Record)[trackId] = ctx; + } catch (err) { + console.error('[AudioCapture] Failed to set up audio capture:', err); + } + }; const OrigRTC = window.RTCPeerConnection; @@ -168,235 +376,42 @@ export class AudioCaptureProcedure { window.RTCPeerConnection = function (this: RTCPeerConnection, ...args: any[]) { const pc = new OrigRTC(...args); try { - const pcs = (window as any).__audioCapturePeerConnections as RTCPeerConnection[]; + const pcs = w.__audioCapturePeerConnections as RTCPeerConnection[]; pcs.push(pc); - // #region agent log - console.log(`[AudioCapture][DIAG] New RTCPeerConnection created (total: ${pcs.length}), config:`, JSON.stringify(args[0] || {}).substring(0, 200)); - // #endregion + console.log(`[AudioCapture][DIAG] New RTCPeerConnection created (total: ${pcs.length})`); } catch { // ignore } + // CRITICAL: while `__audioCaptureEnabled === false` (i.e. the bot + // is still in pre-join / lobby), this handler MUST NOT touch the + // track at all — no clone(), no MediaStream(), no AudioContext. + // Any of those triggers Teams' bundle to crash with + // `rejectMediaDescriptionsUpdateAsync` during SDP renegotiation. + // We simply observe the event. Once the orchestrator calls + // startCapture() (after the bot is admitted into the meeting), + // it iterates existing receivers AND sets the flag so any later + // 'track' events attach immediately. pc.addEventListener('track', (event: RTCTrackEvent) => { if (event.track.kind !== 'audio') return; - - const trackId = event.track.id || `audio-track-${Date.now()}`; - const processors = (window as any).__audioCaptureProcessors as Record; - if (processors[trackId]) { - return; - } - - // #region agent log console.log( - `[AudioCapture][DIAG] Track received: id=${trackId}, enabled=${event.track.enabled}, muted=${event.track.muted}, readyState=${event.track.readyState}, label=${event.track.label}` + `[AudioCapture][DIAG] Track event: id=${event.track.id}, label="${event.track.label}", captureEnabled=${w.__audioCaptureEnabled}, pc.connectionState=${pc.connectionState}` ); - event.track.addEventListener('mute', () => { - console.log(`[AudioCapture][DIAG] Track MUTED: id=${trackId}`); - }); - event.track.addEventListener('unmute', () => { - console.log(`[AudioCapture][DIAG] Track UNMUTED: id=${trackId}`); - }); - // #endregion - - try { - const AudioCtx = window.AudioContext || (window as any).webkitAudioContext; - const ctx = new AudioCtx(); - const nativeRate = ctx.sampleRate; - const stream = new MediaStream([event.track]); - const source = ctx.createMediaStreamSource(stream); - const targetRate = 16000; - - // #region agent log - console.log( - `[AudioCapture][DIAG] AudioContext: state=${ctx.state}, sampleRate=${nativeRate}, stream.active=${stream.active}, streamTracks=${stream.getAudioTracks().length}` - ); - ctx.addEventListener('statechange', () => { - console.log(`[AudioCapture][DIAG] AudioContext statechange: ${ctx.state} for track=${trackId}`); - }); - // #endregion - - const silentGain = ctx.createGain(); - silentGain.gain.value = 0; - - const pushChunk = (base64Data: string, rms: number) => { - const chunks = (window as any).__audioCaptureChunks as any[]; - if (chunks.length < 60) { - chunks.push({ - data: base64Data, - sampleRate: targetRate, - captureDiagnostics: { - trackId, - readyState: event.track.readyState, - rms: Number(rms.toFixed(6)), - nativeSampleRate: nativeRate, - }, - }); + if (!w.__audioCaptureEnabled) return; + if (pc.connectionState === 'connected') { + w.__audioCaptureAttachTrack(pc, event.track); + } else { + const onStateChange = () => { + if (pc.connectionState === 'connected') { + pc.removeEventListener('connectionstatechange', onStateChange); + if (w.__audioCaptureEnabled) { + w.__audioCaptureAttachTrack(pc, event.track); + } + } else if (pc.connectionState === 'failed' || pc.connectionState === 'closed') { + pc.removeEventListener('connectionstatechange', onStateChange); } }; - - let workletNode: AudioWorkletNode | null = null; - let scriptProcessor: ScriptProcessorNode | null = null; - - const useWorklet = async () => { - try { - const blob = new Blob([workletCode], { type: 'application/javascript' }); - const blobUrl = URL.createObjectURL(blob); - await ctx.audioWorklet.addModule(blobUrl); - URL.revokeObjectURL(blobUrl); - - workletNode = new AudioWorkletNode(ctx, 'audio-capture-processor', { - processorOptions: { nativeRate, targetRate }, - }); - - workletNode.port.onmessage = (ev: MessageEvent) => { - if (ev.data?.type !== 'chunk' || !ev.data.data) return; - const pcm16 = new Int16Array(ev.data.data); - const bytes = new Uint8Array(pcm16.buffer); - let binary = ''; - for (let i = 0; i < bytes.length; i++) { - binary += String.fromCharCode(bytes[i]); - } - pushChunk(btoa(binary), ev.data.rms || 0); - }; - - source.connect(workletNode); - workletNode.connect(silentGain); - silentGain.connect(ctx.destination); - - const processorsObj = (window as any).__audioCaptureProcessors as Record; - processorsObj[trackId] = workletNode; - console.log(`[AudioCapture] WebRTC audio track intercepted (AudioWorklet): track=${trackId}, native=${nativeRate}Hz -> 16kHz mono`); - return true; - } catch (err) { - console.warn(`[AudioCapture] AudioWorklet not available, falling back to ScriptProcessor: ${err}`); - return false; - } - }; - - const useScriptProcessor = () => { - const minRmsThreshold = 0.0003; - const maxSamplesPerChunk = nativeRate * 8; - const preRollSamples = Math.ceil(nativeRate * 1.0); - const minFlushSamples = Math.ceil(nativeRate * 0.5); - const silenceFlushCallbacks = 6; - const ratio = nativeRate / targetRate; - - scriptProcessor = ctx.createScriptProcessor(8192, 1, 1); - let chunkBuffer: Float32Array[] = []; - let samplesCollected = 0; - let hasVoicedContent = false; - let consecutiveSilentCallbacks = 0; - - scriptProcessor.onaudioprocess = (e: AudioProcessingEvent) => { - const input = e.inputBuffer.getChannelData(0); - let cbPower = 0; - for (let i = 0; i < input.length; i++) { - cbPower += input[i] * input[i]; - } - const cbRms = Math.sqrt(cbPower / Math.max(input.length, 1)); - - if (cbRms >= minRmsThreshold) { - hasVoicedContent = true; - consecutiveSilentCallbacks = 0; - } else { - consecutiveSilentCallbacks++; - } - - chunkBuffer.push(new Float32Array(input)); - samplesCollected += input.length; - - const shouldFlush = ( - samplesCollected >= maxSamplesPerChunk - || (hasVoicedContent - && consecutiveSilentCallbacks >= silenceFlushCallbacks - && samplesCollected > minFlushSamples) - ); - - if (shouldFlush) { - const merged = new Float32Array(samplesCollected); - let offset = 0; - for (const buf of chunkBuffer) { - merged.set(buf, offset); - offset += buf.length; - } - - let powerSum = 0; - for (let i = 0; i < merged.length; i++) { - powerSum += merged[i] * merged[i]; - } - const rms = Math.sqrt(powerSum / Math.max(merged.length, 1)); - - hasVoicedContent = false; - consecutiveSilentCallbacks = 0; - - if (rms >= minRmsThreshold) { - const outLen = Math.floor(merged.length / ratio); - const pcm16 = new Int16Array(outLen); - for (let i = 0; i < outLen; i++) { - const srcIdx = Math.floor(i * ratio); - const s = Math.max(-1, Math.min(1, merged[srcIdx])); - pcm16[i] = Math.round(s * 32767); - } - const bytes = new Uint8Array(pcm16.buffer); - let binary = ''; - for (let i = 0; i < bytes.length; i++) { - binary += String.fromCharCode(bytes[i]); - } - pushChunk(btoa(binary), rms); - } else { - const keep = Math.min(preRollSamples, merged.length); - const preRoll = merged.slice(merged.length - keep); - chunkBuffer = [preRoll]; - samplesCollected = keep; - return; - } - const keep = Math.min(preRollSamples, merged.length); - const preRoll = merged.slice(merged.length - keep); - chunkBuffer = [preRoll]; - samplesCollected = keep; - } - }; - - source.connect(scriptProcessor); - scriptProcessor.connect(silentGain); - silentGain.connect(ctx.destination); - - const processorsObj = (window as any).__audioCaptureProcessors as Record; - processorsObj[trackId] = scriptProcessor; - console.log(`[AudioCapture] WebRTC audio track intercepted (ScriptProcessor fallback): track=${trackId}, native=${nativeRate}Hz -> 16kHz mono`); - }; - - (async () => { - const ok = await useWorklet(); - if (!ok) useScriptProcessor(); - - ctx.resume().catch(() => {}); - })(); - - // Clean up when the track ends (peer leaves, renegotiation, etc.) - event.track.addEventListener('ended', () => { - try { - if (workletNode) { - workletNode.disconnect(); - } - if (scriptProcessor) { - scriptProcessor.disconnect(); - } - source.disconnect(); - silentGain.disconnect(); - ctx.close(); - } catch { /* already closed */ } - const processorsObj = (window as any).__audioCaptureProcessors as Record; - const contextsObj = (window as any).__audioCaptureContexts as Record; - delete processorsObj[trackId]; - delete contextsObj[trackId]; - console.log(`[AudioCapture] Audio track ended: track=${trackId}, resources cleaned up`); - }); - - const contextsObj = (window as any).__audioCaptureContexts as Record; - contextsObj[trackId] = ctx; - } catch (err) { - console.error('[AudioCapture] Failed to set up audio capture:', err); + pc.addEventListener('connectionstatechange', onStateChange); } }); @@ -419,6 +434,35 @@ export class AudioCaptureProcedure { if (this._isCapturing) return; this._isCapturing = true; + // Enable capture and attach to any audio tracks already present on + // connected peer connections. From this moment on, future 'track' + // events also build their audio graph automatically. We deliberately + // do NOT do any of this earlier — see the wrapper for the reason. + try { + const attached = await this._page.evaluate(() => { + const w = window as any; + w.__audioCaptureEnabled = true; + const pcs = (w.__audioCapturePeerConnections as RTCPeerConnection[]) || []; + const result: { trackId: string; label: string; pcState: string }[] = []; + for (const pc of pcs) { + if (pc.connectionState !== 'connected') continue; + const receivers = pc.getReceivers(); + for (const r of receivers) { + const t = r.track; + if (!t || t.kind !== 'audio' || t.readyState !== 'live') continue; + result.push({ trackId: t.id, label: t.label || '', pcState: pc.connectionState }); + w.__audioCaptureAttachTrack(pc, t); + } + } + return result; + }); + this._logger.info( + `[AudioCapture] Capture enabled. Existing audio tracks attached: ${attached.length} (${JSON.stringify(attached)})` + ); + } catch (err) { + this._logger.warn(`[AudioCapture] Failed to enable capture / iterate existing tracks: ${err}`); + } + this._logger.info('[AudioCapture] Starting audio chunk polling...'); // #region agent log @@ -478,8 +522,10 @@ export class AudioCaptureProcedure { try { await this._page.evaluate(() => { - const processors = (window as any).__audioCaptureProcessors as Record; - const contexts = (window as any).__audioCaptureContexts as Record; + const w = window as any; + w.__audioCaptureEnabled = false; + const processors = w.__audioCaptureProcessors as Record; + const contexts = w.__audioCaptureContexts as Record; Object.keys(processors || {}).forEach((trackId) => { try { processors[trackId]?.disconnect(); @@ -494,8 +540,8 @@ export class AudioCaptureProcedure { // ignore } }); - (window as any).__audioCaptureProcessors = {}; - (window as any).__audioCaptureContexts = {}; + w.__audioCaptureProcessors = {}; + w.__audioCaptureContexts = {}; }); } catch { // Page might already be closed diff --git a/src/bot/authProcedure.ts b/src/bot/authProcedure.ts index 95c6c7d..b9c19e8 100644 --- a/src/bot/authProcedure.ts +++ b/src/bot/authProcedure.ts @@ -97,7 +97,20 @@ export class AuthProcedure { this._logger.info('Password entered'); await this._clickSignInButton(); - await this._page.waitForTimeout(3000); + try { + await this._page.waitForFunction( + () => + !!document.querySelector('#idRichContext_DisplaySign') || + !!document.querySelector('#idDiv_SAOTCAS_Description') || + !!document.querySelector('#idDiv_SAOTCC_Description') || + !!document.querySelector('#passwordError') || + !!document.querySelector('#idSIButton9') || + !!document.querySelector('#KmsiBanner'), + { timeout: 10000 }, + ); + } catch { + this._logger.warn('No MFA/error/redirect indicator found after sign-in click'); + } // Step 5: Check for MFA or password error const mfaChallenge = await this._detectMfaChallenge(); @@ -197,7 +210,7 @@ export class AuthProcedure { if (button) { await button.click(); this._logger.info(`Clicked Next: ${selector}`); - await this._page.waitForTimeout(3000); + await this._waitForNextStepAfterEmail(); return; } } catch { @@ -207,7 +220,21 @@ export class AuthProcedure { this._logger.warn('No Next button found via selectors, pressing Enter'); await this._page.keyboard.press('Enter'); - await this._page.waitForTimeout(3000); + await this._waitForNextStepAfterEmail(); + } + + private async _waitForNextStepAfterEmail(): Promise { + try { + await this._page.waitForFunction( + () => + !!document.querySelector('input#i0118, input[name="passwd"], input[type="password"]') || + !!document.querySelector('#usernameError, #displayName') || + !!document.querySelector('#idRichContext_DisplaySign'), + { timeout: 10000 }, + ); + } catch { + this._logger.warn('No expected element appeared after clicking Next (email step)'); + } } /** @@ -500,7 +527,14 @@ export class AuthProcedure { if (button) { await button.click(); this._logger.info('Clicked "Stay signed in" - Yes'); - await this._page.waitForTimeout(2000); + try { + await this._page.waitForFunction( + () => !document.querySelector('#idSIButton9, #KmsiBanner'), + { timeout: 10000 }, + ); + } catch { + this._logger.warn('"Stay signed in" prompt did not dismiss in time'); + } return; } } catch { diff --git a/src/bot/authTestProcedure.ts b/src/bot/authTestProcedure.ts index 1e1808b..d8b142c 100644 --- a/src/bot/authTestProcedure.ts +++ b/src/bot/authTestProcedure.ts @@ -250,13 +250,10 @@ async function _runVariant( _log('warn', `No login redirect, current URL: ${page.url().substring(0, 150)}`); } - // Wait for login page to render try { await page.waitForSelector('input[name="loginfmt"], input[type="email"]', { timeout: 15000, state: 'visible' }); - await page.waitForTimeout(1000); } catch { _log('warn', 'Login page elements not found'); - await page.waitForTimeout(2000); } await _screenshotStep('1 - Login-Seite'); @@ -357,7 +354,6 @@ async function _runVariant( } catch { _log('warn', 'Pre-join "Join now" button not found after 30s'); } - await page.waitForTimeout(2000); } await _screenshotStep('3 - Pre-Join Ansicht'); @@ -402,7 +398,6 @@ async function _runVariant( // Wait for the actual meeting view (hangup button = we're in the meeting) if (joinNowClicked) { _log('info', 'Waiting for meeting to load...'); - await page.waitForTimeout(5000); try { await page.waitForSelector( 'button[id="hangup-button"], button[data-tid="hangup-button"], #hangup-button', @@ -412,7 +407,6 @@ async function _runVariant( } catch { _log('warn', 'Hangup button not found after 30s'); } - await page.waitForTimeout(3000); } await _screenshotStep('4 - Im Meeting'); @@ -964,7 +958,14 @@ async function _handleLauncher(page: Page): Promise { if (button) { await button.click(); logger.info(`[AuthTest] Clicked launcher: ${selector}`); - await page.waitForTimeout(3000); + try { + await page.waitForSelector( + 'input[data-tid="prejoin-display-name-input"], #prejoin-join-button, button[data-tid="prejoin-join-button"]', + { timeout: 10000 }, + ); + } catch { + logger.warn('[AuthTest] Pre-join screen not found after launcher click'); + } return; } } catch { @@ -1002,8 +1003,14 @@ async function _clickSignInLink( if (element) { await element.click(); _log('info', `Clicked Sign-in link: ${selector}`); - // Wait for the inline modal or redirect to appear - await page.waitForTimeout(3000); + try { + await page.waitForSelector( + 'input[name="loginfmt"], input[type="email"], [data-testid="authLoginDialogNextButton"]', + { timeout: 10000, state: 'visible' }, + ); + } catch { + _log('warn', 'Auth dialog / email input not found after sign-in click'); + } return true; } } catch { @@ -1156,7 +1163,14 @@ async function _attemptAuth(page: Page, email: string, password: string): Promis await el.click(); logger.info(`[AuthTest] Clicked sign-in link: ${selector}`); clicked = true; - await page.waitForTimeout(3000); + try { + await page.waitForSelector( + 'input[name="loginfmt"], input[type="email"], [data-testid="authLoginDialogNextButton"]', + { timeout: 10000, state: 'visible' }, + ); + } catch { + logger.warn('[AuthTest] Auth dialog not found after sign-in click'); + } break; } } catch { @@ -1173,9 +1187,15 @@ async function _attemptAuth(page: Page, email: string, password: string): Promis const authProcedure = new AuthProcedure(page, logger); const authResult = await authProcedure.authenticateWithMicrosoft(email, password, true); - // Wait for redirect back to Teams after auth if (authResult) { - await page.waitForTimeout(5000); + try { + await page.waitForSelector( + '#prejoin-join-button, button[data-tid="prejoin-join-button"], button[id="hangup-button"]', + { timeout: 15000 }, + ); + } catch { + logger.warn('[AuthTest] Teams page not loaded after auth'); + } } return authResult; diff --git a/src/bot/backgroundProcedure.ts b/src/bot/backgroundProcedure.ts index f3d94a1..6338181 100644 --- a/src/bot/backgroundProcedure.ts +++ b/src/bot/backgroundProcedure.ts @@ -10,6 +10,11 @@ import * as os from 'os'; * Must be called AFTER the bot is on the pre-join screen but BEFORE clicking "Join now". * Only works for authenticated joins (anonymous guests may not have background options). */ +const _BG_WAIT_MS = 10000; +const _BG_EFFECT_GRID_SEL = + 'button[aria-label*="None" i], button[aria-label*="Kein" i], [data-tid="background-item-none"], ' + + '[data-tid="background-image"], [class*="background-item"], li[role="listitem"] button'; + export class BackgroundProcedure { private _page: Page; private _logger: Logger; @@ -30,7 +35,6 @@ export class BackgroundProcedure { if (!opened) { return false; } - await this._page.waitForTimeout(500); const noEffectSelectors: string[] = [ 'button[aria-label*="None" i]', @@ -46,7 +50,6 @@ export class BackgroundProcedure { if (btn) { await btn.click(); this._logger.info(`Selected no background effect: ${sel}`); - await this._page.waitForTimeout(500); await this._dismissPanelIfOpen(); return true; } @@ -59,7 +62,6 @@ export class BackgroundProcedure { if (tile) { await tile.click(); this._logger.info('Clicked first background effects tile (often no effect)'); - await this._page.waitForTimeout(400); await this._dismissPanelIfOpen(); return true; } @@ -76,7 +78,10 @@ export class BackgroundProcedure { private async _dismissPanelIfOpen(): Promise { try { await this._page.keyboard.press('Escape'); - await this._page.waitForTimeout(200); + await this._page.waitForFunction( + () => !document.querySelector('[data-tid="background-settings-panel"], [class*="background-effects"]'), + { timeout: 5000 }, + ).catch(() => {}); } catch { // ignore } @@ -151,7 +156,11 @@ export class BackgroundProcedure { if (button) { await button.click(); this._logger.info(`Clicked background effects button: ${selector}`); - await this._page.waitForTimeout(2000); + try { + await this._page.waitForSelector(_BG_EFFECT_GRID_SEL, { state: 'visible', timeout: _BG_WAIT_MS }); + } catch { + this._logger.warn('Background effects panel elements not visible after click'); + } return true; } } catch { @@ -186,7 +195,11 @@ export class BackgroundProcedure { await button.click(); this._logger.info(`Clicked add image button: ${selector}`); addButtonClicked = true; - await this._page.waitForTimeout(1000); + try { + await this._page.waitForSelector('input[type="file"]', { state: 'attached', timeout: _BG_WAIT_MS }); + } catch { + this._logger.warn('File input not found after clicking add-image button'); + } break; } } catch { @@ -199,7 +212,14 @@ export class BackgroundProcedure { if (fileInput) { await fileInput.setInputFiles(filePath); this._logger.info('Background image uploaded via file input'); - await this._page.waitForTimeout(2000); + try { + await this._page.waitForSelector( + '[data-tid="background-image"], .background-image-item', + { state: 'visible', timeout: _BG_WAIT_MS }, + ); + } catch { + this._logger.warn('No background thumbnail appeared after upload'); + } // The uploaded image should be auto-selected, but click it to be sure // Look for the last image in the background gallery (newly uploaded) @@ -209,7 +229,6 @@ export class BackgroundProcedure { const lastImage = images[images.length - 1]; await lastImage.click(); this._logger.info('Selected uploaded background image'); - await this._page.waitForTimeout(1000); } } catch { this._logger.debug('Could not click uploaded image - may be auto-selected'); diff --git a/src/bot/chatProcedure.ts b/src/bot/chatProcedure.ts index d5d92a8..fe27c31 100644 --- a/src/bot/chatProcedure.ts +++ b/src/bot/chatProcedure.ts @@ -64,12 +64,85 @@ export class ChatProcedure { const isOpen = await this._isChatPanelOpen(); if (isOpen) { this._logger.info('Chat panel opened successfully'); + // Light-meetings ships a "simplified compose" with a collapsed + // placeholder + dedicated expand button. The real ckeditor textbox + // is rendered but Playwright considers it invisible until expanded. + // Expand once now so the periodic scan and send path see the + // canonical ckeditor surface. + await this._ensureComposeExpanded(); } else { this._logger.warn('Chat panel could not be opened - chat send/receive will not work'); } return isOpen; } + /** + * Detect Teams' "simplified compose" layout (light-meetings / new meeting + * chat UI) and expand it. In that layout the chat side-pane shows a + * compact placeholder toolbar with a `newMessageCommands-expand-compose` + * button; the actual `
` + * is mounted but rendered in a state where Playwright's `isVisible()` is + * false (the parent layer is hidden until expand is pressed). Clicking + * the expand button promotes the full ckeditor surface to a visible + * compose region anchored under `chat-pane-compose-message-footer`. + * + * Idempotent: returns true also when no expand button is present (the + * compose is already in its full form). + */ + private async _ensureComposeExpanded(): Promise { + const expandSelector = '[data-tid="newMessageCommands-expand-compose"]'; + const expandBtn = await this._page.$(expandSelector); + if (!expandBtn) { + return true; + } + const isVisible = await expandBtn.isVisible().catch(() => false); + if (!isVisible) { + await expandBtn.dispose(); + return true; + } + + try { + await expandBtn.click(); + this._logger.info('Compose expanded: clicked newMessageCommands-expand-compose'); + } catch (err) { + this._logger.warn(`Compose expand click failed: ${err}`); + await expandBtn.dispose(); + return false; + } + await expandBtn.dispose(); + + // Confirm the compose actually expanded: either the simplified toolbar + // is gone, or a ckeditor textbox is now visible (Playwright sense). + try { + await this._page.waitForFunction( + () => { + const simplified = document.querySelector( + '[data-tid="simplified-compose-bottom-toolbar"]' + ) as HTMLElement | null; + const simplifiedGone = !simplified || simplified.offsetHeight === 0; + if (simplifiedGone) return true; + const ck = document.querySelector( + '[data-tid="ckeditor"], div.ck-editor__editable[contenteditable="true"]' + ) as HTMLElement | null; + if (!ck) return false; + const rect = ck.getBoundingClientRect(); + const style = window.getComputedStyle(ck); + return ( + rect.width > 0 && rect.height > 0 + && style.visibility !== 'hidden' + && style.display !== 'none' + && style.opacity !== '0' + ); + }, + { timeout: 5000 }, + ); + return true; + } catch { + this._logger.warn('Compose expand: state did not stabilise within 5s'); + return false; + } + } + /** * Check if the chat panel is currently visible by probing for known * UI elements (chat input, message list, or aria-pressed toggle). @@ -109,12 +182,17 @@ export class ChatProcedure { // overlays, which is NOT the meeting chat. const inputSelectors = [ '[data-tid="ckeditor-replyConversation"]', + '[data-tid="ckeditor"]', '[data-tid="chat-pane-compose-message-footer"] div[contenteditable="true"]', '[data-tid="chat-pane-compose-message-footer"] div[role="textbox"]', '[data-tid="message-pane-footer"] div[contenteditable="true"]', '[data-tid="message-pane-footer"] div[role="textbox"]', 'div[role="textbox"][data-tid*="chat"]', 'div[role="textbox"][data-tid*="message"]', + // light-meetings: a visible "expand compose" button is itself a + // reliable signal that the meeting chat side-pane is open. + '[data-tid="newMessageCommands-expand-compose"]', + '[data-tid="simplified-compose-bottom-toolbar"]', ]; for (const sel of inputSelectors) { const el = document.querySelector(sel) as HTMLElement | null; @@ -406,9 +484,11 @@ export class ChatProcedure { const author = _findAuthor(messageEl); let text = _findBody(messageEl); - // Last resort: take innerText minus the author name & metadata so we - // at least surface something when the body wrapper changes again. - if (!text) { + // Root-cause guard: only fall back to innerText when an author + // was actually identified. Without this, structural fragments + // (bare timestamp separators, lone time elements) leak through + // as "Unknown: 22:04" / "Unknown: Sending..." style entries. + if (!text && author !== 'Unknown') { const full = (messageEl.innerText || '').trim(); if (full) { text = full @@ -592,6 +672,7 @@ export class ChatProcedure { const opened = await this._openChatPanel(); if (opened) { this._consecutiveOpenFailures = 0; + await this._ensureComposeExpanded(); } else { this._consecutiveOpenFailures++; this._logger.info( @@ -652,6 +733,8 @@ export class ChatProcedure { // Modern Teams chat bubbles have NO data-tid on the wrapper — // we match on Fluent UI v9 class prefixes and role="listitem". + // fui-ChatMessageCompact (light-meetings simplified layout) is + // covered by [class*="fui-ChatMessage"]. const messageSelectors = [ '[data-tid="chat-message"]', '[data-tid="chat-pane-message"]', @@ -662,8 +745,21 @@ export class ChatProcedure { '[class*="fui-ChatMyMessage"]', '[role="listitem"]', ]; - const target = container || document.body; - const candidates = target.querySelectorAll(messageSelectors.join(', ')); + // If the resolved container is collapsed (light-meetings: + // message-pane-layout often has h=0 because the chat bubbles are + // rendered in a sibling overlay branch, not inside the layout), + // a scoped query would return 0 candidates and we'd miss every + // visible chat. Promote target to document.body in that case. + let target: HTMLElement | Document = container || document.body; + let candidates: NodeListOf = target.querySelectorAll(messageSelectors.join(', ')); + if ( + (!candidates.length && container && container.offsetHeight === 0) + || (!candidates.length && document.querySelectorAll('[class*="fui-ChatMessage"]').length > 0) + ) { + target = document; + candidates = document.querySelectorAll(messageSelectors.join(', ')); + containerSrc = `${containerSrc} -> fallback:document (container collapsed or messages in sibling branch)`; + } const findAuthor = (root: HTMLElement, fallbackEl: HTMLElement): string => { const sels = [ @@ -698,7 +794,12 @@ export class ChatProcedure { const messageEl = (el.closest?.('[data-tid*="chat-message"], [data-tid*="message-list-item"], [class*="fui-ChatMessage"], [class*="fui-ChatMyMessage"]') as HTMLElement | null) || el; const author = findAuthor(messageEl, el); let text = findBody(messageEl, el); - if (!text) { + // Root-cause guard: if no structured body was found AND no + // author was identified, the element is not a chat message — + // skip it. Without this, the innerText fallback below would + // emit bare timestamp separators / structural fragments as + // "Unknown: 22:04" / "Unknown: Sending..." etc. + if (!text && author !== 'Unknown') { const full = (messageEl.innerText || '').trim(); if (full) { text = full @@ -860,6 +961,11 @@ export class ChatProcedure { return false; } + // Light-meetings ships the simplified compose by default and reverts + // to it after every panel re-open. Expand once per send so the + // ckeditor textbox is the active, Playwright-visible surface. + await this._ensureComposeExpanded(); + // Note: order matters — most specific selectors first; the `chat-pane-compose-message-footer` // ancestor lookup is needed because Teams Fluent UI v9 scopes the contenteditable inside it. // Modern Teams meeting chat uses CKEditor 5 (`.ck-editor__editable`) and its compose root @@ -867,6 +973,9 @@ export class ChatProcedure { const inputSelectors = [ // Classic data-tid selectors (older Teams builds) '[data-tid="ckeditor-replyConversation"]', + // Light-meetings new meeting chat composer (post-expand): the + // contenteditable surface has data-tid="ckeditor" directly. + '[data-tid="ckeditor"]', '[data-tid="chat-pane-compose-message-footer"] div[contenteditable="true"]', '[data-tid="chat-pane-compose-message-footer"] div[role="textbox"]', '[data-tid="message-pane-footer"] div[contenteditable="true"]', diff --git a/src/bot/joinProcedure.ts b/src/bot/joinProcedure.ts index d433f2d..8c74d54 100644 --- a/src/bot/joinProcedure.ts +++ b/src/bot/joinProcedure.ts @@ -13,6 +13,8 @@ import { resolveLaunchUrl, getMeetingLaunchUrl } from './meetingUrlParser'; * NOTE: The bot always joins as an anonymous guest with the configured bot name. * Authentication is disabled. See Teamsbot-Auth-Join-Learnings.md. */ +const _CONDITION_WAIT_MS = 10000; + export class JoinProcedure { private _page: Page; private _logger: Logger; @@ -24,15 +26,80 @@ export class JoinProcedure { this._botName = botName; } + private async _waitForPreJoinAfterLauncher(): Promise { + const preJoin = + 'input[data-tid="prejoin-display-name-input"], #prejoin-join-button, button[data-tid="prejoin-join-button"]'; + try { + await this._page.waitForSelector(preJoin, { state: 'visible', timeout: _CONDITION_WAIT_MS }); + } catch { + this._logger.warn('Pre-join UI not detected after launcher (continuing)'); + } + } + + private async _waitForNoAudioVideoModalGone(): Promise { + try { + await this._page.waitForFunction( + () => { + const modal = document.querySelector('[role="dialog"]'); + if (!modal) return true; + const t = modal.textContent || ''; + const isNoAv = + t.includes('audio or video') || + t.includes('Audio oder Video') || + t.includes('without audio'); + return !isNoAv; + }, + { timeout: _CONDITION_WAIT_MS }, + ); + } catch { + this._logger.warn('No-audio/video modal may still be visible (continuing)'); + } + } + + private async _waitForPermissionOverlayCleared(): Promise { + try { + await this._page.waitForFunction( + () => { + const dialogs = Array.from(document.querySelectorAll('[role="dialog"]')); + return !dialogs.some(d => { + const t = d.textContent || ''; + return ( + (t.includes('manage') || t.includes('Manage') || t.includes('display') || t.includes('window')) && + (t.includes('Allow') || t.includes('Erlauben') || t.includes('Zulassen')) + ); + }); + }, + { timeout: _CONDITION_WAIT_MS }, + ); + } catch { + this._logger.warn('Permission overlay may still be visible (continuing)'); + } + } + + private async _waitForLeaveUiSettled(): Promise { + try { + await this._page.waitForFunction( + () => + !document.querySelector('button[id="hangup-button"]') && + !document.querySelector('[data-tid="hangup-button"]'), + { timeout: _CONDITION_WAIT_MS }, + ); + } catch { + this._logger.warn('Hangup control still present or leave transition slow (continuing)'); + } + } + /** * Navigate to the meeting URL and handle the launcher dialog. - * - * Teams meeting URLs redirect through several hops. We resolve the redirect - * and add params (suppressPrompt, msLaunch=false, anon=true) to skip the - * "Open in Teams app?" native dialog. Then we click "Continue on this browser". + * + * Teams meeting URLs redirect through several hops. `resolveLaunchUrl()` + * follows them server-side and adds suppressPrompt params to the + * RESOLVED launcher URL so the browser does not show the native + * "Open Microsoft Teams?" protocol-handler modal. It also strips + * `anon=true` from the inner URL (Teams' server bakes this in for + * cookie-less fetch requests; we don't want it). */ async startMeetingLauncherFlow(meetingUrl: string): Promise { - // Resolve the meeting URL redirect and add suppressPrompt params let launchUrl: string; try { launchUrl = await resolveLaunchUrl(meetingUrl); @@ -49,7 +116,6 @@ export class JoinProcedure { timeout: config.timeouts.pageLoad, }); - // Handle "Continue on this browser" button await this._handleLauncherDialog(); } @@ -63,7 +129,7 @@ export class JoinProcedure { if (launcherButton) { this._logger.info('Launcher dialog found, clicking "Continue on this browser"'); await launcherButton.click(); - await this._page.waitForTimeout(2000); + await this._waitForPreJoinAfterLauncher(); } } catch { // No launcher - that's fine @@ -86,6 +152,7 @@ export class JoinProcedure { this._logger.info(`Found launcher button: ${primarySelector}`); await this._page.click(primarySelector); this._logger.info('Clicked "Continue on this browser" button'); + await this._waitForPreJoinAfterLauncher(); return; } catch { this._logger.info('Primary launcher selector not found, trying fallbacks...'); @@ -106,7 +173,7 @@ export class JoinProcedure { if (element) { this._logger.info(`Found launcher button (fallback): ${selector}`); await element.click(); - await this._page.waitForTimeout(2000); + await this._waitForPreJoinAfterLauncher(); return; } } catch { @@ -210,7 +277,6 @@ export class JoinProcedure { if (button) { await button.click(); this._logger.info(`Clicked "Join now" button (attempt ${attempt}/${maxRetries})`); - await this._page.waitForTimeout(2000); await this._dismissNoAudioVideoModal(); return; } @@ -226,7 +292,6 @@ export class JoinProcedure { if (button && await button.isVisible()) { await button.click(); this._logger.info(`Clicked join button fallback: ${selector} (attempt ${attempt}/${maxRetries})`); - await this._page.waitForTimeout(2000); await this._dismissNoAudioVideoModal(); return; } @@ -274,7 +339,7 @@ export class JoinProcedure { if (button) { await button.click(); this._logger.info(`Dismissed no-audio modal: ${selector}`); - await this._page.waitForTimeout(1000); + await this._waitForNoAudioVideoModalGone(); return; } } catch { @@ -306,7 +371,7 @@ export class JoinProcedure { if (text.includes('manage') || text.includes('Manage') || text.includes('display') || text.includes('window')) { await button.click(); this._logger.info(`Dismissed browser permission modal: ${selector}`); - await this._page.waitForTimeout(1000); + await this._waitForPermissionOverlayCleared(); return; } } @@ -326,45 +391,42 @@ export class JoinProcedure { async isInMeetingLobby(options: { waitForSeconds?: number } = {}): Promise { const timeout = (options.waitForSeconds || 5) * 1000; - // Check for any lobby text variant using page.evaluate for reliability - try { - const inLobby = await this._page.evaluate(() => { - const bodyText = document.body?.innerText || ''; - const lobbyIndicators = [ - 'Someone will let you in shortly', - 'Someone will let you in when the meeting starts', - 'will let you in', - 'waiting for someone to let you in', - 'Someone in the meeting should let you in', - ]; - return lobbyIndicators.some(text => bodyText.includes(text)); - }); - if (inLobby) return true; - } catch { - // Page may not be ready - } + const lobbySelectors = [ + '[data-tid="lobby-screen"]', + '[data-tid="waiting-screen"]', + '[data-tid="lobby-waiting-screen"]', + '[data-tid="lobby-container"]', + '[data-cid="lobby-screen"]', + '[data-cid="waiting-screen"]', + '#lobby-container', + '[id*="lobby"]', + ]; - // Primary: text-based check with waitFor (waits up to timeout) try { - await this._page.getByText('will let you in').waitFor({ + await this._page.waitForSelector(lobbySelectors.join(', '), { timeout, state: 'visible', }); return true; } catch { - // Not found within timeout + // No structural lobby element found } - // Fallback: data-tid selectors + // Fallback: check for the pre-join/lobby state via page structure — + // the lobby has no call-control bar but does have a waiting spinner or icon try { - await this._page.waitForSelector('[data-tid="lobby-screen"], [data-tid="waiting-screen"]', { - timeout: 1000, - state: 'visible', + const hasLobbyStructure = await this._page.evaluate(() => { + const el = document.querySelector( + '[class*="lobby" i], [class*="waiting-room" i], [class*="waitingScreen" i]' + ); + return !!el; }); - return true; + if (hasLobbyStructure) return true; } catch { - return false; + // Page may not be ready } + + return false; } /** @@ -378,25 +440,25 @@ export class JoinProcedure { async isInMeeting(options: { waitForSeconds?: number } = {}): Promise { const timeout = (options.waitForSeconds || 5) * 1000; - // Primary selectors - known meeting UI elements const inMeetingSelectors = [ + // Button IDs (Teams 2025+ redesign) 'button[id="hangup-button"]', + 'button[id="microphone-button"]', 'button[id="callingButtons-showMoreBtn"]', - // Fallbacks with data-tid (older Teams versions) + 'button[id="video-button"]', + // data-tid attributes '[data-tid="hangup-button"]', '[data-tid="call-composite"]', - 'button[aria-label*="Leave"]', '[data-tid="callingButtons-showMoreBtn"]', - // Teams v2 (2025+) additional selectors '[data-tid="call-controls"]', '[data-tid="meeting-composite"]', 'div[data-tid="video-gallery"]', - 'button[aria-label*="Hang up"]', - 'button[aria-label*="leave" i]', - // Mic/Camera toggle buttons are only visible in an active call - 'button[id="microphone-button"]', - 'button[data-tid="toggle-mute"]', '[data-tid="microphone-button"]', + '[data-tid="toggle-mute"]', + // data-cid attributes (light-meetings / anonymous join) + '[data-cid="ts-hangup-btn"]', + '[data-cid="calling-hangup-button"]', + '[data-cid="calling-unified-bar"]', ]; try { @@ -406,28 +468,31 @@ export class JoinProcedure { }); return true; } catch { - // Selector-based detection failed, try DOM evaluation as fallback + // Primary selector-based detection failed } - // Fallback: evaluate the page for meeting indicators + // Fallback: structural DOM check for call control containers try { const inMeeting = await this._page.evaluate(() => { - // Check for call-related aria roles and meeting elements - const bodyText = document.body?.innerText || ''; - const meetingIndicators = [ - 'Leave', // Leave button text - 'Mute', // Mic mute button - 'Unmute', // Mic unmute button - 'Turn off camera', // Camera control - 'Turn on camera', - 'Share', // Share screen - ]; - const found = meetingIndicators.filter(ind => bodyText.includes(ind)); - // Need at least 2 meeting indicators to confirm we're in a meeting - return found.length >= 2; + const callBar = document.querySelector( + '[class*="calling-controls" i], [class*="call-controls" i], ' + + '[class*="controlBar" i], [class*="unified-bar" i]' + ); + if (callBar) return true; + // Check for hangup/mic buttons by role+structure (language-independent) + const buttons = Array.from(document.querySelectorAll('button[id]')); + let callButtons = 0; + for (let i = 0; i < buttons.length; i++) { + const id = buttons[i].id.toLowerCase(); + if (id.includes('hangup') || id.includes('microphone') || + id.includes('video-button') || id.includes('mute')) { + callButtons++; + } + } + return callButtons >= 2; }); if (inMeeting) { - this._logger.info('Detected meeting via DOM text analysis (fallback)'); + this._logger.info('Detected meeting via structural DOM analysis (fallback)'); return true; } } catch { @@ -451,7 +516,7 @@ export class JoinProcedure { await this._page.waitForSelector(primarySelector, { timeout: 5000 }); await this._page.click(primarySelector); this._logger.info('Clicked leave button'); - await this._page.waitForTimeout(2000); + await this._waitForLeaveUiSettled(); return; } catch { this._logger.info('Primary leave selector not found, trying fallbacks...'); @@ -471,7 +536,7 @@ export class JoinProcedure { if (button) { await button.click(); this._logger.info(`Clicked leave button (fallback: ${selector})`); - await this._page.waitForTimeout(2000); + await this._waitForLeaveUiSettled(); return; } } catch { diff --git a/src/bot/orchestrator.ts b/src/bot/orchestrator.ts index 624306d..cbf7c0f 100644 --- a/src/bot/orchestrator.ts +++ b/src/bot/orchestrator.ts @@ -132,7 +132,8 @@ export class BotOrchestrator { /** * Teams launcher commonly embeds meeting params in hash-routed paths: * /_#/meet/?p=...&anon=true - * In this shape, "anon" is in the hash query (not URL.search). + * In this shape, "anon" is in the hash query (not URL.search). The auth + * path must strip it so the meeting client uses the signed-in identity. */ private _stripAnonFromInnerMeetingUrl(innerUrlPath: string): string { try { @@ -352,9 +353,10 @@ export class BotOrchestrator { // CRITICAL: The suppress params (msLaunch, suppressPrompt, directDl) must // be on the LAUNCHER URL itself, NOT inside the encoded meeting URL parameter. // resolveLaunchUrl follows redirects first (meeting URL → launcher URL), - // then adds the params to the RESOLVED launcher URL. getMeetingLaunchUrl - // adds params to the raw meeting URL — they end up encoded inside the - // launcher's url= parameter and have no effect on the launcher behavior. + // then adds the params to the RESOLVED launcher URL. resolveLaunchUrl + // also sets anon=true (correct default for the anon path); for the + // authenticated path we must explicitly strip it so Teams uses the + // signed-in identity instead of routing as a guest. let launchUrl: string; try { launchUrl = await resolveLaunchUrl(this._meetingUrl); @@ -362,22 +364,16 @@ export class BotOrchestrator { this._logger.warn(`Could not resolve launch URL, using fallback: ${error}`); launchUrl = getMeetingLaunchUrl(this._meetingUrl); } - // Remove anon=true since the user is authenticated try { const urlObj = new URL(launchUrl); urlObj.searchParams.delete('anon'); - // Some Teams launcher URLs carry the real meeting path in an encoded "url" param. - // In auth mode that inner URL can still contain anon=true, which forces guest-like behavior. const encodedInnerUrl = urlObj.searchParams.get('url'); if (encodedInnerUrl) { - const innerPath = this._stripAnonFromInnerMeetingUrl(encodedInnerUrl); - urlObj.searchParams.set('url', innerPath); + urlObj.searchParams.set('url', this._stripAnonFromInnerMeetingUrl(encodedInnerUrl)); } launchUrl = urlObj.toString(); } catch { /* keep as-is */ } - this._logger.info(`STEP 4: navigating to launch URL: ${launchUrl.substring(0, 120)}...`); - this._logger.info(`STEP 4: launch URL contains anon=true? ${launchUrl.includes('anon=true')}`); await this._page!.goto(launchUrl, { waitUntil: 'domcontentloaded', timeout: 30000, @@ -1031,6 +1027,13 @@ export class BotOrchestrator { /** * Launch the browser and create a new page. * @param authMode - If true, use headful + minimal args (Chromium Minimal, proven to work for auth) + * + * NOTE: anon and auth use DIFFERENT chromium args on purpose. The anon path + * relies on `--disable-web-security` + `--disable-features=IsolateOrigins,site-per-process` + * to make Teams' "light-meetings" guest bundle behave correctly across + * cross-origin iframes. Unifying the args breaks anon: Teams sends the + * bot to the lobby and the lobby→meeting WebRTC renegotiation crashes + * (`rejectMediaDescriptionsUpdateAsync`). Keep these flag sets separate. */ private async _launchBrowser(authMode: boolean = false): Promise { this._logger.info(`Launching browser (authMode=${authMode})...`); @@ -1145,7 +1148,7 @@ export class BotOrchestrator { timestamp: entry.timestamp, isFinal: true, }); - } + }, ); // Inject audio getUserMedia override BEFORE any navigation @@ -1225,49 +1228,51 @@ export class BotOrchestrator { /** * Wait for the bot to be admitted from the lobby. + * Bails out immediately if the page is closed (crash/disconnect) so we + * don't report a misleading "in_lobby" state for the next 2 minutes. */ private async _waitForMeetingAdmission(): Promise { const startTime = Date.now(); const timeout = config.timeouts.lobbyWait; - let consecutiveNoSignal = 0; - const maxNoSignal = 5; // Allow several cycles with no lobby/meeting signal before giving up + let loggedLobby = false; + let wasInLobby = false; while (Date.now() - startTime < timeout) { - // Check if we're in the meeting + if (!this._page || this._page.isClosed()) { + throw new Error('Page closed while waiting for meeting admission'); + } + const inMeeting = await this._joinProcedure!.isInMeeting({ waitForSeconds: 5 }); if (inMeeting) { + if (wasInLobby) { + this._logger.info('Admitted from lobby into meeting'); + } return; } - // Check if still in lobby const inLobby = await this._joinProcedure!.isInMeetingLobby({ waitForSeconds: 2 }); + if (inLobby) { - consecutiveNoSignal = 0; - this._logger.info('Still waiting in lobby...'); + wasInLobby = true; + if (!loggedLobby) { + loggedLobby = true; + this._setState('in_lobby'); + this._logger.info('Bot is in lobby, waiting for admission...'); + await this._takeScreenshot('in-lobby'); + } continue; } - // Neither in meeting nor in lobby — this can happen legitimately: - // - Authenticated users skip lobby, but meeting UI takes seconds to load - // - Page is transitioning between states - // Only give up after several consecutive cycles with no signal - consecutiveNoSignal++; - const currentUrl = this._page?.url() || 'unknown'; - this._logger.info(`No lobby/meeting signal detected (attempt ${consecutiveNoSignal}/${maxNoSignal}), URL: ${currentUrl}`); - - if (consecutiveNoSignal >= maxNoSignal) { - // Take a screenshot and log page content for debugging before giving up - await this._takeScreenshot('no-meeting-signal'); - try { - const bodySnippet = await this._page?.evaluate(() => - document.body?.innerText?.substring(0, 500) || '(empty)' - ); - this._logger.warn(`Page content before giving up: ${bodySnippet}`); - } catch { /* ignore */ } - throw new Error('Bot was removed from lobby or meeting ended'); + if (wasInLobby) { + // Lobby disappeared but isInMeeting not yet true — Teams is + // transitioning (WebRTC renegotiation, UI rendering). Keep + // polling; the meeting controls will appear shortly. + this._logger.info('Lobby gone, waiting for meeting UI to render...'); + await this._takeScreenshot('lobby-transition'); } } + await this._takeScreenshot('lobby-timeout'); throw new Error('Timeout waiting to be admitted from lobby'); } diff --git a/src/bot/teamsActionsService.ts b/src/bot/teamsActionsService.ts index 94ed31f..cd83463 100644 --- a/src/bot/teamsActionsService.ts +++ b/src/bot/teamsActionsService.ts @@ -1,6 +1,8 @@ import { Page } from 'playwright'; import { Logger } from 'winston'; +const _ACTION_WAIT_MS = 10000; + /** * Service center for all Teams meeting UI actions. * @@ -43,7 +45,12 @@ export class TeamsActionsService { await captionsBtn.click(); this._logger.info(`TeamsActions: Transcript toggled ${enable ? 'ON' : 'OFF'}`); - await this._page.waitForTimeout(1000); + try { + await this._page.waitForFunction( + () => !document.querySelector('[role="menu"]'), + { timeout: 5000 }, + ); + } catch { /* menu may already be gone */ } return true; } @@ -88,9 +95,13 @@ export class TeamsActionsService { } await input.click(); - await this._page.waitForTimeout(200); + try { + await this._page.waitForFunction( + () => document.activeElement?.matches('[contenteditable], [role="textbox"], input, textarea'), + { timeout: 3000 }, + ); + } catch { /* proceed even if focus check times out */ } await this._page.keyboard.type(text, { delay: 10 }); - await this._page.waitForTimeout(200); await this._page.keyboard.press('Enter'); this._logger.info('TeamsActions: Chat message sent'); @@ -190,7 +201,6 @@ export class TeamsActionsService { await btn.click(); this._logger.info(`TeamsActions: ${name} toggled ${enable ? 'ON' : 'OFF'}`); - await this._page.waitForTimeout(500); return true; } @@ -208,7 +218,11 @@ export class TeamsActionsService { const button = await this._page.$(selector); if (button) { await button.click(); - await this._page.waitForTimeout(1000); + try { + await this._page.waitForSelector('[role="menu"], [role="menubar"]', { state: 'visible', timeout: _ACTION_WAIT_MS }); + } catch { + this._logger.warn('TeamsActions: More menu did not appear'); + } return true; } } catch { @@ -237,7 +251,11 @@ export class TeamsActionsService { const item = await this._page.$(sel); if (item) { await item.click(); - await this._page.waitForTimeout(1500); + try { + await this._page.waitForSelector('#closed-captions-button', { state: 'visible', timeout: _ACTION_WAIT_MS }); + } catch { + this._logger.warn('TeamsActions: Captions button not visible in submenu'); + } btn = await this._page.$('#closed-captions-button'); if (btn) return btn; break; @@ -276,7 +294,14 @@ export class TeamsActionsService { if (button) { await button.click(); this._logger.info(`TeamsActions: Opened chat panel: ${selector}`); - await this._page.waitForTimeout(1000); + try { + await this._page.waitForSelector( + '[data-tid="ckeditor-replyConversation"], div[role="textbox"][data-tid*="chat"]', + { state: 'visible', timeout: _ACTION_WAIT_MS }, + ); + } catch { + this._logger.warn('TeamsActions: Chat panel input not visible after opening'); + } return; } } catch {