diff --git a/.env.sample b/.env.sample index 4de52ee..e6ce11f 100644 --- a/.env.sample +++ b/.env.sample @@ -9,6 +9,31 @@ GATEWAY_WS_URL=wss://gateway-int.poweron-center.net/api/teamsbot/ws BOT_NAME=PowerOn AI BOT_HEADLESS=true +# Static avatar tile (replaces Teams' green/spinning "no video" placeholder +# with a quiet, single-color surface + the bot's display name in the +# center). Recommended for the anonymous bot. Colors are CSS values. +# BOT_USE_CANVAS_VIDEO=true +# BOT_AVATAR_BG_COLOR=#a8d4f0 +# BOT_AVATAR_TEXT_COLOR=#1a3552 + +# DEBUG ONLY - leave commented in normal operation. +# Set to true to skip BOTH media wrappers (RTCPeerConnection wrapper + +# getUserMedia override) for isolating Teams' anonymous lobby preheating +# crash (rejectMediaDescriptionsUpdateAsync). With this on the bot has +# no audio in/out, no captions, no greeting - it sits silently. +# BOT_DISABLE_MEDIA_WRAPPERS=true + +# DEBUG ONLY - bisect the Teams anonymous preheated-PC crash by running +# anon with the auth Chromium args (minimal flags) AND no stealth init. +# BOT_ANON_USE_AUTH_BROWSER_SETUP=true + +# Playwright browser channel. Empty = bundled Chromium (default). +# Set to 'chrome' or 'msedge' to use the locally installed real browser. +# Strongly recommended for anonymous Teams joins: Playwright's bundled +# Chromium gets detected as automation and forced into a lobby + the +# buggy preheated-PC code path; real Chrome bypasses both. +# BOT_BROWSER_CHANNEL=chrome + # Logging LOG_LEVEL=info LOG_DIR=./output/logs diff --git a/Dockerfile b/Dockerfile index a242d35..ae89176 100644 --- a/Dockerfile +++ b/Dockerfile @@ -21,7 +21,7 @@ FROM mcr.microsoft.com/playwright:v1.50.0-jammy WORKDIR /app -# Install Xvfb for headful browser mode (Teams blocks headless browsers) +# Xvfb for headful browser mode (Teams blocks headless browsers). RUN apt-get update && apt-get install -y xvfb && rm -rf /var/lib/apt/lists/* # Copy built files and dependencies @@ -29,6 +29,14 @@ COPY --from=builder /app/dist ./dist COPY --from=builder /app/node_modules ./node_modules COPY package*.json ./ +# Real Google Chrome (stable channel) + its OS deps. Required for anonymous +# Teams joins: the bundled Playwright Chromium gets detected as automation +# by Teams' light-meetings flow and forced into a lobby + the buggy +# preheated-PC code path. Real Chrome bypasses both. Configured via +# BOT_BROWSER_CHANNEL=chrome (passed to chromium.launch({ channel })). +# See wiki/b-reference/teams-bot/architecture.md → "Browser-Channel". +RUN npx playwright install --with-deps chrome + # Create output directories RUN mkdir -p output/logs output/screenshots @@ -36,6 +44,7 @@ RUN mkdir -p output/logs output/screenshots ENV NODE_ENV=production ENV BOT_HEADLESS=false ENV DISPLAY=:99 +ENV BOT_BROWSER_CHANNEL=chrome # Expose port EXPOSE 4100 diff --git a/docker-compose.yml b/docker-compose.yml index 84d8f22..6257a82 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -12,6 +12,10 @@ services: - BOT_NAME=${BOT_NAME:-PowerOn AI} - BOT_HEADLESS=false - DISPLAY=:99 + # Real Chrome installed in the image (see Dockerfile). Required for + # anonymous Teams joins so the light-meetings flow does not detect + # automation and force the bot into a lobby. + - BOT_BROWSER_CHANNEL=${BOT_BROWSER_CHANNEL:-chrome} - LOG_LEVEL=info - SCREENSHOT_ON_ERROR=true volumes: diff --git a/src/bot/audioProcedure.ts b/src/bot/audioProcedure.ts index 6be2c7d..2d995b6 100644 --- a/src/bot/audioProcedure.ts +++ b/src/bot/audioProcedure.ts @@ -20,6 +20,10 @@ export type AudioProcedureOptions = { useCanvasVideo?: boolean; /** Shown in the center of the canvas (e.g. bot display name) */ displayLabel?: string; + /** Hex/CSS color of the static avatar background (default: light blue). */ + avatarBgColor?: string; + /** Hex/CSS color of the centered display label (default: dark blue). */ + avatarTextColor?: string; }; export class AudioProcedure { @@ -27,6 +31,8 @@ export class AudioProcedure { private _logger: Logger; private _useCanvasVideo: boolean; private _displayLabel: string; + private _avatarBgColor: string; + private _avatarTextColor: string; private _audioContext: boolean = false; private _initScriptInjected: boolean = false; private _audioQueue: Array<{ audioData: string; format: 'mp3' | 'wav' | 'pcm' }> = []; @@ -38,6 +44,8 @@ export class AudioProcedure { this._logger = logger; this._useCanvasVideo = !!options?.useCanvasVideo; this._displayLabel = (options?.displayLabel || 'Bot').trim() || 'Bot'; + this._avatarBgColor = (options?.avatarBgColor || '').trim() || '#a8d4f0'; + this._avatarTextColor = (options?.avatarTextColor || '').trim() || '#1a3552'; } /** @@ -59,6 +67,8 @@ export class AudioProcedure { await this._page.context().addInitScript(poweronMediaPatchInstall, { useCanvasVideo: this._useCanvasVideo, displayLabel: this._displayLabel, + avatarBgColor: this._avatarBgColor, + avatarTextColor: this._avatarTextColor, }); this._initScriptInjected = true; @@ -70,7 +80,12 @@ export class AudioProcedure { * in an iframe (addInitScript runs too early) or overwrites getUserMedia. */ async reinstallMediaPatchInAllFrames(): Promise { - const payload = { useCanvasVideo: this._useCanvasVideo, displayLabel: this._displayLabel }; + const payload = { + useCanvasVideo: this._useCanvasVideo, + displayLabel: this._displayLabel, + avatarBgColor: this._avatarBgColor, + avatarTextColor: this._avatarTextColor, + }; for (const frame of this._page.frames()) { try { await frame.evaluate(poweronMediaPatchInstall, payload); diff --git a/src/bot/chatProcedure.ts b/src/bot/chatProcedure.ts index fe27c31..6ab15f7 100644 --- a/src/bot/chatProcedure.ts +++ b/src/bot/chatProcedure.ts @@ -144,74 +144,57 @@ export class ChatProcedure { } /** - * Check if the chat panel is currently visible by probing for known - * UI elements (chat input, message list, or aria-pressed toggle). + * Is the meeting chat panel currently open? * - * IMPORTANT — what we DO NOT accept as proof of an open chat panel: - * * a generic ``[data-tid="ckeditor"]`` / ``[role=textbox]`` somewhere in - * the page. In the anonymous / compact in-meeting layout Teams renders - * a separate compose box (e.g. for reactions / inline comments) whose - * parent has NO data-tid at all and which does NOT post into the - * meeting chat. If we treated that as "panel open" the bot would skip - * the toggle and silently lose every chat send. - * * a "Besprechungschat" / "Meeting chat" heading. Teams keeps the side- - * pane heading mounted even when the pane is ``vdi-occlusion`` / h=0. + * Teams' calling layout (both anonymous light-meetings and the + * authenticated meeting view as of 2026-05) hosts a single right-side + * pane container `[data-tid="calling-right-side-panel"]` that can + * display Chat, People, Info, Captions etc. — only one at a time. + * Detection is therefore two simple checks: * - * What we DO accept: - * * ``#chat-button[aria-pressed="true"]`` — explicit toggle state. - * * a known **chat-side-panel-scoped** input (selectors anchored under - * ``chat-pane-compose-message-footer`` / ``message-pane-footer``). - * * a chat **message list container** with non-trivial height. + * 1. Does the side-pane container exist AND is it visually rendered + * (offsetWidth/Height > 0, has an offsetParent)? When the pane + * is closed it is unmounted or collapsed to zero size. + * 2. Does the visible pane contain a chat-specific child + * (`message-pane-layout`, `chat-pane-compose-message-footer`, + * `#chat-pane-list`, `[data-app-name="chats"]`, …)? This + * distinguishes Chat-mode from People/Info/Captions + * language-independently. + * + * Things we deliberately do NOT inspect (each one was empirically + * shown to be unreliable for this DOM): + * * `#chat-button` attributes — the button is byte-identical in + * both states and has no `aria-pressed`. Visual state comes from + * a CSS `:has()` selector on the side-pane. + * * `vdi-occlusion` class — a permanent structural marker on + * `calling-right-side-panel` and `message-pane-layout`, not a + * visibility flag. + * * "Besprechungschat" / "Meeting chat" headings or any other + * text — locale-dependent + Teams keeps headings mounted even + * when the pane is hidden. + * * generic `[data-tid="ckeditor"]` anywhere in the page — the + * compact layout renders unrelated compose boxes (reactions, + * inline comments) outside the meeting chat panel. */ private async _isChatPanelOpen(): Promise { return this._page.evaluate(() => { - // 1. Chat button aria-pressed state. This is the ONLY safe short- - // circuit — Teams keeps the panel "open" semantically even when its - // layout pane is briefly collapsed (h=0, vdi-occlusion). If we were - // stricter here the periodic scan / send path would re-trigger - // _openChatPanel which CLICKS the button — and a click on an already- - // pressed button TOGGLES the panel CLOSED. So aria-pressed=true must - // short-circuit to true. - const chatBtn = document.querySelector('#chat-button, button[id="chat-button"]') as HTMLElement | null; - if (chatBtn?.getAttribute('aria-pressed') === 'true') return true; - - // 2. Chat input / compose box visible — but ONLY accept selectors - // that are scoped to the actual chat side-pane (footer ancestors). - // Generic [contenteditable] / [role=textbox] matches would also hit - // the compact in-meeting compose box used by anonymous / pre-join - // overlays, which is NOT the meeting chat. - const inputSelectors = [ - '[data-tid="ckeditor-replyConversation"]', - '[data-tid="ckeditor"]', - '[data-tid="chat-pane-compose-message-footer"] div[contenteditable="true"]', - '[data-tid="chat-pane-compose-message-footer"] div[role="textbox"]', - '[data-tid="message-pane-footer"] div[contenteditable="true"]', - '[data-tid="message-pane-footer"] div[role="textbox"]', - 'div[role="textbox"][data-tid*="chat"]', - 'div[role="textbox"][data-tid*="message"]', - // light-meetings: a visible "expand compose" button is itself a - // reliable signal that the meeting chat side-pane is open. - '[data-tid="newMessageCommands-expand-compose"]', - '[data-tid="simplified-compose-bottom-toolbar"]', - ]; - for (const sel of inputSelectors) { - const el = document.querySelector(sel) as HTMLElement | null; - if (el && el.offsetHeight > 0) return true; - } - - // 3. Chat-specific containers (NOT [role="log"] which also matches captions) - const chatContainerSelectors = [ - '[data-tid="message-pane-list"]', - '[data-tid="chat-pane-list"]', - '[data-tid="chat-pane"]', - '.ts-message-list-container', - ]; - for (const sel of chatContainerSelectors) { - const el = document.querySelector(sel) as HTMLElement | null; - if (el && el.offsetHeight > 50) return true; - } - - return false; + const sidePanel = document.querySelector( + '[data-tid="calling-right-side-panel"]', + ) as HTMLElement | null; + if (!sidePanel) return false; + const isVisible = sidePanel.offsetWidth > 0 + && sidePanel.offsetHeight > 0 + && sidePanel.offsetParent !== null; + if (!isVisible) return false; + const chatHallmark = sidePanel.querySelector( + '[data-tid="message-pane-layout"], ' + + '[data-tid="message-pane-body"], ' + + '[data-tid="chat-pane-compose-message-footer"], ' + + '[data-tid="message-pane-footer"], ' + + '#chat-pane-list, ' + + '[data-app-name="chats"]', + ); + return chatHallmark !== null; }); } @@ -222,13 +205,21 @@ export class ChatProcedure { * loads from a chat thread). Clicking again would TOGGLE it closed — * that's why we always check ``_isChatPanelOpen()`` first. * - * The selector list below covers BOTH layouts: - * * authenticated full Teams meeting → ``#chat-button`` etc. - * * anonymous / compact in-meeting toolbar (Teams Live / pre-join stage) - * where the toggle has no stable id and only carries - * ``data-tid``/``aria-label`` hints. We therefore include data-tid- - * based and broader role-based fallbacks so the bot does not silently - * fall back to typing into a non-chat compose box. + * Auth Full-Teams ships TWO buttons with "Chat" in the aria-label: + * 1) the real toggle (UUID id, `aria="Chat (Ctrl+Shift+2)"`, + * `aria-pressed="false"|"true"`) + * 2) a sibling `#chat-button` without `aria-pressed` that is NOT a + * toggle (likely the side-nav chat-app entry) — clicking it does + * nothing for the meeting chat panel. + * light-meetings (anon) ships only `#chat-button`, which IS the real + * toggle there (no `aria-pressed` because Teams' compact bar uses + * menu-button semantics). + * + * Strategy: collect every visible button/role=button whose + * id/data-tid/aria-label hints at "chat", PREFER ones with a real + * `aria-pressed` attribute, click the best unclicked one each round, + * and never click the same button twice (so a non-toggle does not + * lock us into a loop). */ private async _openChatPanel(): Promise { if (await this._isChatPanelOpen()) { @@ -236,27 +227,9 @@ export class ChatProcedure { return true; } - const chatButtonSelectors = [ - '#chat-button', - 'button[id="chat-button"]', - 'button[data-tid="toggle-chat"]', - 'button[data-tid*="chat" i]', - 'button[data-tid*="conversation" i]', - 'button[aria-label="Chat"]', - 'button[aria-label*="Chat" i]', - 'button[aria-label*="Unterhaltung" i]', - 'button[aria-label*="Besprechungschat" i]', - 'button[aria-label*="Meeting chat" i]', - 'button[title*="Chat" i]', - 'button[title*="Besprechungschat" i]', - // role-based fallbacks for the compact / anonymous toolbar - '[role="button"][aria-label*="Chat" i]', - '[role="button"][aria-label*="Besprechungschat" i]', - '[role="menuitem"][aria-label*="Chat" i]', - ]; - const maxAttempts = 12; const pollIntervalMs = 2000; + const previouslyClicked: string[] = []; for (let attempt = 1; attempt <= maxAttempts; attempt++) { if (await this._isChatPanelOpen()) { @@ -264,49 +237,81 @@ export class ChatProcedure { return true; } - let clicked = false; - const triedSelectors: string[] = []; - for (const selector of chatButtonSelectors) { - try { - const button = await this._page.$(selector); - if (!button) continue; - const isVisible = await button.isVisible().catch(() => false); - triedSelectors.push(`${selector}=${isVisible ? 'visible' : 'hidden'}`); - if (!isVisible) continue; - await button.click(); - clicked = true; - this._logger.info(`Clicked chat button: ${selector} (attempt ${attempt}/${maxAttempts})`); - break; - } catch (err) { - triedSelectors.push(`${selector}=err:${String(err).substring(0, 40)}`); - } - } + const click = await this._page.evaluate((alreadyTried: string[]) => { + const matchesChatHint = (el: Element): boolean => { + const id = (el.id || '').toLowerCase(); + const tid = (el.getAttribute('data-tid') || '').toLowerCase(); + const aria = (el.getAttribute('aria-label') || '').toLowerCase(); + const title = (el.getAttribute('title') || '').toLowerCase(); + return [id, tid, aria, title].some((v) => + v.includes('chat') + || v.includes('unterhalt') + || v.includes('besprechung') + || v.includes('conversation'), + ); + }; + const isVisible = (el: HTMLElement): boolean => + el.offsetHeight > 0 && el.offsetWidth > 0 && el.offsetParent !== null; + const keyOf = (el: Element): string => + `${el.id || ''}|${el.getAttribute('data-tid') || ''}|${el.getAttribute('aria-label') || ''}`; + const isToggle = (el: Element): boolean => { + const p = el.getAttribute('aria-pressed'); + return p === 'true' || p === 'false'; + }; - if (clicked) { - await this._page.waitForTimeout(2500); - if (await this._isChatPanelOpen()) { - this._logger.info('Chat panel opened successfully'); - return true; - } - this._logger.info('Chat button clicked but panel not detected yet, waiting before next attempt'); - await this._page.waitForTimeout(pollIntervalMs); - } else { - // Log which selectors were tried — without this we can't tell whether - // the buttons are missing entirely or just hidden behind another layer. + const all = Array.from( + document.querySelectorAll('button, [role="button"], [role="menuitem"]'), + ) as HTMLElement[]; + const candidates = all + .filter((el) => matchesChatHint(el) && isVisible(el)) + .filter((el) => !alreadyTried.includes(keyOf(el))); + + if (candidates.length === 0) return { picked: null as null | { key: string; id: string; tid: string; aria: string; toggle: boolean } }; + + const toggles = candidates.filter(isToggle); + const pick = toggles[0] || candidates[0]; + pick.scrollIntoView({ block: 'center' }); + pick.click(); + return { + picked: { + key: keyOf(pick), + id: pick.id || '', + tid: pick.getAttribute('data-tid') || '', + aria: pick.getAttribute('aria-label') || '', + toggle: isToggle(pick), + }, + }; + }, previouslyClicked); + + if (!click.picked) { this._logger.info( - `Chat button not found, retry ${attempt}/${maxAttempts}` + - (triedSelectors.length ? ` | tried: ${triedSelectors.join(', ')}` : ''), + `No (more) un-clicked chat button candidates, retry ${attempt}/${maxAttempts}`, ); - // On the very first miss dump the full button diagnostics so the - // next code change has a real selector hint to work from instead - // of guessing. After that we throttle to avoid log spam. if (attempt === 1) { await this._dumpChatButtonDiagnostics(); } if (attempt < maxAttempts) { await this._page.waitForTimeout(pollIntervalMs); } + continue; } + + previouslyClicked.push(click.picked.key); + this._logger.info( + `Clicked chat button: id="${click.picked.id}" tid="${click.picked.tid}" ` + + `aria="${click.picked.aria}" toggle=${click.picked.toggle} ` + + `(attempt ${attempt}/${maxAttempts})`, + ); + + await this._page.waitForTimeout(2500); + if (await this._isChatPanelOpen()) { + this._logger.info('Chat panel opened successfully'); + return true; + } + this._logger.info( + 'Chat button clicked but panel not detected — will try a different candidate next round', + ); + await this._page.waitForTimeout(pollIntervalMs); } this._logger.warn('Could not open chat panel after polling - chat will not work'); diff --git a/src/bot/joinProcedure.ts b/src/bot/joinProcedure.ts index 8c74d54..c4d49b4 100644 --- a/src/bot/joinProcedure.ts +++ b/src/bot/joinProcedure.ts @@ -15,6 +15,21 @@ import { resolveLaunchUrl, getMeetingLaunchUrl } from './meetingUrlParser'; */ const _CONDITION_WAIT_MS = 10000; +/** + * Result of a structural state-detection probe. + * `via` names the trigger that decided the result so the orchestrator can + * log _why_ it transitioned the bot into the next state. + * + * Examples for `via`: + * - `selector:[data-tid="lobby-screen"]` — direct attribute match + * - `selector:button[id="hangup-button"]` — direct attribute match + * - `inferred:join-gone+no-hangup+no-callbar` — structural inference + */ +export interface DetectionResult { + matched: boolean; + via?: string; +} + export class JoinProcedure { private _page: Page; private _logger: Logger; @@ -383,12 +398,23 @@ export class JoinProcedure { /** * Check if the bot is currently in the lobby (waiting to be admitted). - * Teams shows various lobby messages depending on the meeting state: - * - "Someone will let you in shortly" (meeting active, waiting for admit) - * - "Someone will let you in when the meeting starts" (meeting not started yet) - * - "waiting for someone to let you in" (alternative wording) + * + * Detection is purely structural (no text matching) so it works in any + * Teams UI language: + * + * 1) Direct: a known lobby/waiting container `data-tid` / `data-cid` is + * visible. This is the strongest signal but the attributes drift + * between Teams releases. + * 2) Inferred: we have left pre-join (the `prejoin-join-button` is no + * longer visible) AND we are not yet in the meeting (no hangup button + * or call-control bar). After `_clickJoinNow()` succeeded, this state + * is unambiguously "lobby or in transition" — both meaning "wait for + * admission" from the orchestrator's point of view. + * + * Returns `{ matched, via }` where `via` names the trigger that decided + * the result so the orchestrator can log why it transitioned state. */ - async isInMeetingLobby(options: { waitForSeconds?: number } = {}): Promise { + async isInMeetingLobby(options: { waitForSeconds?: number } = {}): Promise { const timeout = (options.waitForSeconds || 5) * 1000; const lobbySelectors = [ @@ -396,110 +422,101 @@ export class JoinProcedure { '[data-tid="waiting-screen"]', '[data-tid="lobby-waiting-screen"]', '[data-tid="lobby-container"]', + '[data-tid="prejoin-lobby"]', '[data-cid="lobby-screen"]', '[data-cid="waiting-screen"]', + '[data-cid="ts-waiting-screen"]', '#lobby-container', - '[id*="lobby"]', ]; - try { - await this._page.waitForSelector(lobbySelectors.join(', '), { - timeout, - state: 'visible', - }); - return true; - } catch { - // No structural lobby element found - } - - // Fallback: check for the pre-join/lobby state via page structure — - // the lobby has no call-control bar but does have a waiting spinner or icon - try { - const hasLobbyStructure = await this._page.evaluate(() => { - const el = document.querySelector( - '[class*="lobby" i], [class*="waiting-room" i], [class*="waitingScreen" i]' - ); - return !!el; - }); - if (hasLobbyStructure) return true; - } catch { - // Page may not be ready - } - - return false; - } - - /** - * Check if the bot is currently in the meeting (admitted from lobby). - * Primary selector: button[id="hangup-button"] (confirmed by Recall.ai). - * Note: Teams uses `id` (not `data-tid`) for the hangup button since 2025 redesign. - * - * For authenticated joins, Teams v2 sometimes renders differently. - * Additional fallback: check the URL for meeting patterns and DOM for call UI. - */ - async isInMeeting(options: { waitForSeconds?: number } = {}): Promise { - const timeout = (options.waitForSeconds || 5) * 1000; - - const inMeetingSelectors = [ - // Button IDs (Teams 2025+ redesign) - 'button[id="hangup-button"]', - 'button[id="microphone-button"]', - 'button[id="callingButtons-showMoreBtn"]', - 'button[id="video-button"]', - // data-tid attributes - '[data-tid="hangup-button"]', - '[data-tid="call-composite"]', - '[data-tid="callingButtons-showMoreBtn"]', - '[data-tid="call-controls"]', - '[data-tid="meeting-composite"]', - 'div[data-tid="video-gallery"]', - '[data-tid="microphone-button"]', - '[data-tid="toggle-mute"]', - // data-cid attributes (light-meetings / anonymous join) - '[data-cid="ts-hangup-btn"]', - '[data-cid="calling-hangup-button"]', - '[data-cid="calling-unified-bar"]', - ]; - - try { - await this._page.waitForSelector(inMeetingSelectors.join(', '), { - timeout, - state: 'visible', - }); - return true; - } catch { - // Primary selector-based detection failed - } - - // Fallback: structural DOM check for call control containers - try { - const inMeeting = await this._page.evaluate(() => { - const callBar = document.querySelector( - '[class*="calling-controls" i], [class*="call-controls" i], ' + - '[class*="controlBar" i], [class*="unified-bar" i]' - ); - if (callBar) return true; - // Check for hangup/mic buttons by role+structure (language-independent) - const buttons = Array.from(document.querySelectorAll('button[id]')); - let callButtons = 0; - for (let i = 0; i < buttons.length; i++) { - const id = buttons[i].id.toLowerCase(); - if (id.includes('hangup') || id.includes('microphone') || - id.includes('video-button') || id.includes('mute')) { - callButtons++; - } + for (const selector of lobbySelectors) { + try { + const handle = await this._page.waitForSelector(selector, { + timeout: Math.max(50, Math.floor(timeout / lobbySelectors.length)), + state: 'visible', + }); + if (handle) { + return { matched: true, via: `selector:${selector}` }; } - return callButtons >= 2; + } catch { + // Try next selector + } + } + + try { + const inferred = await this._page.evaluate(() => { + const isVisible = (el: Element | null): boolean => { + if (!el) return false; + const he = el as HTMLElement; + if (he.offsetParent === null) return false; + const rect = he.getBoundingClientRect(); + return rect.width > 0 && rect.height > 0; + }; + + const joinBtn = document.querySelector( + '#prejoin-join-button, button[data-tid="prejoin-join-button"]', + ); + const hangupBtn = document.querySelector( + 'button[id="hangup-button"], [data-tid="hangup-button"], ' + + '[data-cid="ts-hangup-btn"], [data-cid="calling-hangup-button"]', + ); + const callBar = document.querySelector( + '[data-tid="call-controls"], [data-tid="call-composite"], ' + + '[data-tid="meeting-composite"], [data-tid="video-gallery"], ' + + '[data-cid="calling-unified-bar"]', + ); + + return !isVisible(joinBtn) && !isVisible(hangupBtn) && !isVisible(callBar); }); - if (inMeeting) { - this._logger.info('Detected meeting via structural DOM analysis (fallback)'); - return true; + if (inferred) { + return { matched: true, via: 'inferred:join-gone+no-hangup+no-callbar' }; } } catch { // Page may not be ready } - return false; + return { matched: false }; + } + + /** + * Check if the bot is currently in the meeting (admitted from lobby). + * + * The ONLY reliable indicator that we are admitted is the hangup button: + * it does not exist in pre-join nor in the lobby. Other elements such as + * `data-tid="toggle-mute"` or `id="microphone-button"` also appear in the + * pre-join / lobby preview UI and would produce false positives that make + * the orchestrator think the bot is in the meeting while it is still + * waiting in the lobby. + * + * Returns `{ matched, via }` where `via` names the matched hangup + * selector so the orchestrator can log the trigger that promoted the + * bot into the `in_meeting` state. + */ + async isInMeeting(options: { waitForSeconds?: number } = {}): Promise { + const timeout = (options.waitForSeconds || 5) * 1000; + + const hangupSelectors = [ + 'button[id="hangup-button"]', + '[data-tid="hangup-button"]', + '[data-cid="ts-hangup-btn"]', + '[data-cid="calling-hangup-button"]', + ]; + + for (const selector of hangupSelectors) { + try { + const handle = await this._page.waitForSelector(selector, { + timeout: Math.max(50, Math.floor(timeout / hangupSelectors.length)), + state: 'visible', + }); + if (handle) { + return { matched: true, via: `selector:${selector}` }; + } + } catch { + // Try next selector + } + } + + return { matched: false }; } /** diff --git a/src/bot/mediaGetUserMediaPatch.ts b/src/bot/mediaGetUserMediaPatch.ts index 8827587..6988342 100644 --- a/src/bot/mediaGetUserMediaPatch.ts +++ b/src/bot/mediaGetUserMediaPatch.ts @@ -8,11 +8,17 @@ export type MediaGetUserMediaPatchOptions = { useCanvasVideo: boolean; displayLabel: string; + /** Hex/CSS color of the static avatar background (default: light blue). */ + avatarBgColor?: string; + /** Hex/CSS color of the centered display label (default: dark blue). */ + avatarTextColor?: string; }; export const poweronMediaPatchInstall = (opts: MediaGetUserMediaPatchOptions) => { 'use strict'; const { useCanvasVideo, displayLabel } = opts; + const avatarBgColor = opts.avatarBgColor || '#a8d4f0'; + const avatarTextColor = opts.avatarTextColor || '#1a3552'; const w: any = window as any; if (!w.__gumChromium) { @@ -163,7 +169,11 @@ export const poweronMediaPatchInstall = (opts: MediaGetUserMediaPatchOptions) => return; } - const _fps = 15; + // Low fps: the avatar is intentionally STATIC. The interval still ticks so + // captureStream() in headless Chromium gets fresh frames (some Chromium + // builds pause the track if no new frames arrive), but each tick draws an + // identical image — no animation, no flicker. + const _fps = 2; w.__startBotAvatarStream = () => { if ( w.__botAvatarStreamStarted @@ -196,43 +206,20 @@ export const poweronMediaPatchInstall = (opts: MediaGetUserMediaPatchOptions) => (document.body || document.documentElement).appendChild(canvas); w.__botAvatarCanvas = canvas; const c2d = canvas.getContext('2d'); - let t = 0; const draw = () => { if (!c2d) { return; } - t += 0.04; const wPx = canvas.width; const hPx = canvas.height; - c2d.fillStyle = '#061525'; + c2d.fillStyle = avatarBgColor; c2d.fillRect(0, 0, wPx, hPx); - const g = c2d.createLinearGradient(0, 0, wPx, hPx); - g.addColorStop(0, '#1a4f8c'); - g.addColorStop(0.5, '#0c305a'); - g.addColorStop(1, '#132e6e'); - c2d.fillStyle = g; - c2d.fillRect(0, 0, wPx, hPx); - c2d.strokeStyle = 'rgba(255, 200, 80, 0.95)'; - c2d.lineWidth = 3; - c2d.strokeRect(6, 6, wPx - 12, hPx - 12); - c2d.fillStyle = 'rgba(255, 220, 120, 0.95)'; - c2d.font = '600 13px system-ui, "Segoe UI", sans-serif'; - c2d.textAlign = 'left'; - c2d.textBaseline = 'top'; - c2d.fillText('PORTA', 14, 10); + c2d.fillStyle = avatarTextColor; + c2d.font = 'bold 28px system-ui, "Segoe UI", sans-serif'; c2d.textAlign = 'center'; c2d.textBaseline = 'middle'; - c2d.fillStyle = '#ffffff'; - c2d.font = 'bold 28px system-ui, "Segoe UI", sans-serif'; const line = (w.__botAvatarDisplayLabel || displayLabel).toString().slice(0, 72); - c2d.fillText(line, wPx / 2, hPx / 2 - 6); - c2d.fillStyle = 'rgba(255,255,255,0.78)'; - c2d.font = '14px system-ui, "Segoe UI", sans-serif'; - c2d.fillText('poweron', wPx / 2, hPx / 2 + 26); - const pulse = 0.75 + 0.25 * Math.sin(t); - c2d.fillStyle = 'rgba(120, 200, 255, ' + 0.15 * pulse + ')'; - c2d.fillRect(0, 0, wPx, 6); - c2d.fillRect(0, hPx - 6, wPx, 6); + c2d.fillText(line, wPx / 2, hPx / 2); }; draw(); // Capture at fps for compositor-driven frames AND also push manual frames @@ -243,7 +230,9 @@ export const poweronMediaPatchInstall = (opts: MediaGetUserMediaPatchOptions) => if (w.__botAvatarVideoTrack) { w.__botAvatarVideoTrack.enabled = true; try { - w.__botAvatarVideoTrack.contentHint = 'motion'; + // 'detail' = static / low-motion content -> WebRTC uses lower + // bitrate + preserves text sharpness instead of motion smoothing. + w.__botAvatarVideoTrack.contentHint = 'detail'; } catch { // ignore } diff --git a/src/bot/orchestrator.ts b/src/bot/orchestrator.ts index cbf7c0f..becd907 100644 --- a/src/bot/orchestrator.ts +++ b/src/bot/orchestrator.ts @@ -173,7 +173,7 @@ export class BotOrchestrator { } try { - this._setState('launching'); + this._setState('launching', 'trigger=start invoked'); // Connect to Gateway WebSocket first await this._connectToGateway(); @@ -203,7 +203,7 @@ export class BotOrchestrator { // Launch browser await this._launchBrowser(); - this._setState('navigating'); + this._setState('navigating', 'trigger=browser launched'); // STEP 1: Navigate to meeting URL and click "Continue on this browser" await this._takeScreenshot('anon-step1-before-launcher', this._isDebugMode); @@ -223,19 +223,12 @@ export class BotOrchestrator { await this._joinProcedure!.joinMeetingLobbyFlow(); await this._takeScreenshot('anon-step2-after-join', this._isDebugMode); - // Check if we're in lobby - const inLobby = await this._joinProcedure!.isInMeetingLobby({ waitForSeconds: 10 }); - if (inLobby) { - this._setState('in_lobby'); - this._logger.info('Bot is in lobby, waiting to be admitted...'); - await this._takeScreenshot('anon-step3-in-lobby', this._isDebugMode); - } + // Wait to be admitted to the meeting (handles lobby state internally). + // Returns the trigger (matched selector) that promoted us into the meeting. + const admissionVia = await this._waitForMeetingAdmission(); - // Wait to be admitted to the meeting - await this._waitForMeetingAdmission(); - - this._setState('in_meeting'); - this._logger.info(`Bot joined the meeting as "${this._botName}"`); + this._setState('in_meeting', `trigger=${admissionVia}`); + this._logger.info(`Bot joined the meeting as "${this._botName}" (trigger=${admissionVia})`); await this._takeScreenshot('anon-step4-in-meeting', this._isDebugMode); // Start keepalive to prevent idle disconnect @@ -244,6 +237,15 @@ export class BotOrchestrator { // Dismiss any post-join permission modals (e.g. "Manage windows on all displays") await this._joinProcedure!.dismissBrowserPermissionModals(); + if (config.botDisableMediaWrappers) { + this._logger.warn( + 'BOT_DISABLE_MEDIA_WRAPPERS=true: skipping audio init, transcript capture and ' + + 'join greeting. Bot will sit silently in the meeting until stopped.', + ); + await this._takeScreenshot('anon-step5-ready', this._isDebugMode); + return; + } + // Initialize audio playback await this._audioProcedure!.initialize(); if (config.botUseCanvasVideo) { @@ -271,7 +273,7 @@ export class BotOrchestrator { */ private async _attemptAuthJoin(): Promise { await this._launchBrowser(true); - this._setState('navigating'); + this._setState('navigating', 'trigger=browser launched (auth)'); // STEP 1: Navigate to teams.microsoft.com to trigger authentication this._logger.info('STEP 1: navigating to teams.microsoft.com'); @@ -444,10 +446,12 @@ export class BotOrchestrator { await this._takeScreenshot('step5-join-now-clicked', this._isDebugMode); // STEP 6: Wait for meeting admission (hangup button = in meeting) - await this._waitForMeetingAdmission(); + const admissionVia = await this._waitForMeetingAdmission(); - this._setState('in_meeting'); - this._logger.info(`STEP 6: bot joined the meeting (authenticated as ${this._options.botAccountEmail})`); + this._setState('in_meeting', `trigger=${admissionVia}`); + this._logger.info( + `STEP 6: bot joined the meeting (authenticated as ${this._options.botAccountEmail}, trigger=${admissionVia})`, + ); await this._takeScreenshot('step6-in-meeting', this._isDebugMode); this._startKeepAlive(); @@ -940,7 +944,7 @@ export class BotOrchestrator { this._stopKeepAlive(); try { - this._setState('leaving'); + this._setState('leaving', 'trigger=stop() invoked'); // Stop audio capture if (this._audioCaptureProcedure) { @@ -977,7 +981,7 @@ export class BotOrchestrator { this._gatewayWs = null; } - this._setState('disconnected'); + this._setState('disconnected', 'trigger=shutdown completed'); } } @@ -1038,7 +1042,21 @@ export class BotOrchestrator { private async _launchBrowser(authMode: boolean = false): Promise { this._logger.info(`Launching browser (authMode=${authMode})...`); - const args = authMode + // When BOT_ANON_USE_AUTH_BROWSER_SETUP is on, the anon path uses the + // exact same minimal flag set as the auth path — bisecting whether + // the anon-only Chromium flags trigger Teams' light-meetings preheating + // crash (`rejectMediaDescriptionsUpdateAsync`). + const useAuthSetupForAnon = !authMode && config.botAnonUseAuthBrowserSetup; + if (useAuthSetupForAnon) { + this._logger.warn( + 'BOT_ANON_USE_AUTH_BROWSER_SETUP=true: anon will use the auth Chromium args ' + + '(no --disable-web-security, no --disable-blink-features=AutomationControlled) ' + + 'AND skip the navigator stealth init.', + ); + } + const useMinimalArgs = authMode || useAuthSetupForAnon; + + const args = useMinimalArgs ? [ '--no-sandbox', '--disable-dev-shm-usage', @@ -1058,9 +1076,15 @@ export class BotOrchestrator { '--disable-blink-features=AutomationControlled', ]; + const channel = config.botBrowserChannel.trim(); + if (channel) { + this._logger.info(`Browser channel override: '${channel}' (using locally installed browser)`); + } + this._browser = await chromium.launch({ - headless: authMode ? false : config.botHeadless, + headless: useMinimalArgs ? false : config.botHeadless, args, + ...(channel ? { channel } : {}), }); this._context = await this._browser.newContext({ @@ -1080,6 +1104,8 @@ export class BotOrchestrator { // Stealth: Override browser properties that reveal automation. // Teams checks these to detect headless/automated browsers and // blocks the /v2/ authenticated experience, falling back to light-meetings. + // SKIPPED when BOT_ANON_USE_AUTH_BROWSER_SETUP is on (debug isolation). + if (!useAuthSetupForAnon) { await this._page.addInitScript(() => { // 1. Remove navigator.webdriver flag (primary detection signal) Object.defineProperty(navigator, 'webdriver', { get: () => false }); @@ -1112,6 +1138,7 @@ export class BotOrchestrator { // @ts-ignore if (!window.chrome.runtime) { window.chrome.runtime = {}; } }); + } // Initialize procedures this._joinProcedure = new JoinProcedure(this._page, this._logger, this._botName); @@ -1135,6 +1162,8 @@ export class BotOrchestrator { this._audioProcedure = new AudioProcedure(this._page, this._logger, { useCanvasVideo: config.botUseCanvasVideo, displayLabel: this._botName, + avatarBgColor: config.botAvatarBgColor, + avatarTextColor: config.botAvatarTextColor, }); this._teamsActions = new TeamsActionsService(this._page, this._logger); this._chatProcedure = new ChatProcedure( @@ -1151,14 +1180,27 @@ export class BotOrchestrator { }, ); - // Inject audio getUserMedia override BEFORE any navigation - // This ensures Teams gets our controlled audio stream when it calls getUserMedia - await this._audioProcedure.injectAudioOverride(); + // DEBUG TOGGLE: skip both wrappers when isolating the Teams anonymous + // `rejectMediaDescriptionsUpdateAsync` crash during lobby preheating. + if (config.botDisableMediaWrappers) { + this._logger.warn( + 'BOT_DISABLE_MEDIA_WRAPPERS=true: skipping getUserMedia override AND ' + + 'RTCPeerConnection wrapper. Audio capture (transcript) and TTS playback ' + + 'will be unavailable for this session.', + ); + } else { + // Inject audio getUserMedia override BEFORE any navigation + // This ensures Teams gets our controlled audio stream when it calls getUserMedia + await this._audioProcedure.injectAudioOverride(); - // Aggressive hybrid mode: always capture meeting audio as transcript source. - await this._audioCaptureProcedure!.injectCaptureOverride(); + // Aggressive hybrid mode: always capture meeting audio as transcript source. + await this._audioCaptureProcedure!.injectCaptureOverride(); + } this._page.on('framenavigated', () => { + if (config.botDisableMediaWrappers) { + return; + } if (!config.botUseCanvasVideo || !this._audioProcedure) { return; } @@ -1180,21 +1222,21 @@ export class BotOrchestrator { this._page.on('close', () => { if (!this._isShuttingDown) { this._logger.warn('Page closed unexpectedly'); - this._setState('disconnected'); + this._setState('disconnected', 'trigger=page close event'); } }); // Handle browser renderer crash (Chromium process segfault) this._page.on('crash', () => { this._logger.error('BROWSER CRASH: Chromium renderer process crashed!'); - this._setState('error', 'Browser crashed'); + this._setState('error', 'trigger=Chromium renderer crashed'); }); // Handle browser disconnection (entire browser process dies) this._browser.on('disconnected', () => { if (!this._isShuttingDown) { this._logger.error('BROWSER DISCONNECTED: Browser process died unexpectedly'); - this._setState('error', 'Browser process died'); + this._setState('error', 'trigger=browser process died'); } }); @@ -1231,43 +1273,50 @@ export class BotOrchestrator { * Bails out immediately if the page is closed (crash/disconnect) so we * don't report a misleading "in_lobby" state for the next 2 minutes. */ - private async _waitForMeetingAdmission(): Promise { + private async _waitForMeetingAdmission(): Promise { const startTime = Date.now(); const timeout = config.timeouts.lobbyWait; let loggedLobby = false; let wasInLobby = false; + let lastLobbyVia: string | undefined; while (Date.now() - startTime < timeout) { if (!this._page || this._page.isClosed()) { throw new Error('Page closed while waiting for meeting admission'); } - const inMeeting = await this._joinProcedure!.isInMeeting({ waitForSeconds: 5 }); - if (inMeeting) { - if (wasInLobby) { - this._logger.info('Admitted from lobby into meeting'); - } - return; - } - - const inLobby = await this._joinProcedure!.isInMeetingLobby({ waitForSeconds: 2 }); - - if (inLobby) { + // Lobby check FIRST: while we are in the lobby, we must never let + // isInMeeting() decide our state — even though it is now strict + // (hangup-button only), checking lobby first keeps the order of + // truth obvious and prevents us from running post-join logic the + // instant Teams flips a single lobby attribute. + const lobby = await this._joinProcedure!.isInMeetingLobby({ waitForSeconds: 2 }); + if (lobby.matched) { wasInLobby = true; + lastLobbyVia = lobby.via; if (!loggedLobby) { loggedLobby = true; - this._setState('in_lobby'); - this._logger.info('Bot is in lobby, waiting for admission...'); + this._setState('in_lobby', `trigger=${lobby.via}`); + this._logger.info(`Bot is in lobby, waiting for admission (trigger=${lobby.via})`); await this._takeScreenshot('in-lobby'); } continue; } + const meeting = await this._joinProcedure!.isInMeeting({ waitForSeconds: 3 }); + if (meeting.matched) { + if (wasInLobby) { + this._logger.info(`Admitted from lobby into meeting (trigger=${meeting.via})`); + } else { + this._logger.info(`Joined meeting directly without lobby (trigger=${meeting.via})`); + } + return meeting.via || 'unknown'; + } + if (wasInLobby) { - // Lobby disappeared but isInMeeting not yet true — Teams is - // transitioning (WebRTC renegotiation, UI rendering). Keep - // polling; the meeting controls will appear shortly. - this._logger.info('Lobby gone, waiting for meeting UI to render...'); + this._logger.info( + `Lobby gone (last lobby trigger=${lastLobbyVia}), waiting for meeting UI to render...`, + ); await this._takeScreenshot('lobby-transition'); } } diff --git a/src/config.ts b/src/config.ts index ee1c3ec..5e82155 100644 --- a/src/config.ts +++ b/src/config.ts @@ -15,15 +15,57 @@ export const config = { botName: process.env.BOT_NAME || 'PowerOn AI', botHeadless: process.env.BOT_HEADLESS !== 'false', /** - * Replace Chromium's fake test-pattern video with a canvas stream (gradient + label). - * Default OFF: in tests with the poweron tenant the Teams SFU rejects all - * outbound video m-lines (port=0/inactive) regardless of which track we send, - * so enabling video just costs CPU + adds a misleading "camera on" indicator - * for other participants without ever transmitting frames. Set - * BOT_USE_CANVAS_VIDEO=true if a future tenant policy permits IP video and - * you want to push the canvas stream. + * Replace Chromium's fake test-pattern video (the green surface with the + * spinning Teams placeholder) with a STATIC single-color canvas stream + * showing the bot's display name. + * Default OFF: when no video track is sent at all, Teams renders its own + * "no video" placeholder for the participant. Setting BOT_USE_CANVAS_VIDEO + * =true makes the bot push a quiet, static avatar surface so other + * participants see a calm uniform tile instead of the loading spinner. */ botUseCanvasVideo: process.env.BOT_USE_CANVAS_VIDEO === 'true', + /** + * Background + text color of the static avatar canvas (only relevant if + * BOT_USE_CANVAS_VIDEO=true). Any CSS color value is accepted. + */ + botAvatarBgColor: process.env.BOT_AVATAR_BG_COLOR || '#a8d4f0', + botAvatarTextColor: process.env.BOT_AVATAR_TEXT_COLOR || '#1a3552', + /** + * DEBUG ONLY: skip both media wrappers (RTCPeerConnection wrapper from + * AudioCaptureProcedure AND the getUserMedia override from AudioProcedure). + * Use this to test whether the wrappers are the cause of Teams' anonymous + * `rejectMediaDescriptionsUpdateAsync` crash during the lobby preheating. + * + * With wrappers disabled the bot CANNOT capture meeting audio nor play + * TTS — it can only sit in the meeting silently and observe captions / + * the chat panel. This is purely an isolation test toggle. + * + * Set BOT_DISABLE_MEDIA_WRAPPERS=true to enable the bypass. + */ + botDisableMediaWrappers: process.env.BOT_DISABLE_MEDIA_WRAPPERS === 'true', + /** + * DEBUG ONLY: when true, the anonymous join uses the same minimal + * Chromium args as the authenticated join AND skips the navigator + * stealth init script. Goal: find out whether the special anon-only + * flags (`--disable-web-security`, `--disable-features=IsolateOrigins, + * site-per-process`, `--disable-blink-features=AutomationControlled`) + * or the stealth properties are what triggers Teams' light-meetings + * preheated-PC crash (`rejectMediaDescriptionsUpdateAsync`). + * + * Set BOT_ANON_USE_AUTH_BROWSER_SETUP=true to enable. + */ + botAnonUseAuthBrowserSetup: process.env.BOT_ANON_USE_AUTH_BROWSER_SETUP === 'true', + /** + * Playwright browser channel. Empty = Playwright's bundled Chromium. + * Set to 'chrome' or 'msedge' to use the locally installed real + * browser instead. The real Chrome ships with the canonical + * Sec-CH-UA / Sec-CH-UA-Platform client hints and Canvas/WebGL + * fingerprints — many automation-detection heuristics (including the + * one Teams uses to force anonymous bots into a lobby + preheated-PC + * code path that crashes with `rejectMediaDescriptionsUpdateAsync`) + * accept real Chrome but reject Playwright's bundled Chromium. + */ + botBrowserChannel: process.env.BOT_BROWSER_CHANNEL || '', // Logging logLevel: process.env.LOG_LEVEL || 'info',