fixed teams

This commit is contained in:
ValueOn AG 2026-05-12 17:49:26 +02:00
parent b15bb1b198
commit bad6e67ca0
9 changed files with 460 additions and 305 deletions

View file

@ -9,6 +9,31 @@ GATEWAY_WS_URL=wss://gateway-int.poweron-center.net/api/teamsbot/ws
BOT_NAME=PowerOn AI
BOT_HEADLESS=true
# Static avatar tile (replaces Teams' green/spinning "no video" placeholder
# with a quiet, single-color surface + the bot's display name in the
# center). Recommended for the anonymous bot. Colors are CSS values.
# BOT_USE_CANVAS_VIDEO=true
# BOT_AVATAR_BG_COLOR=#a8d4f0
# BOT_AVATAR_TEXT_COLOR=#1a3552
# DEBUG ONLY - leave commented in normal operation.
# Set to true to skip BOTH media wrappers (RTCPeerConnection wrapper +
# getUserMedia override) for isolating Teams' anonymous lobby preheating
# crash (rejectMediaDescriptionsUpdateAsync). With this on the bot has
# no audio in/out, no captions, no greeting - it sits silently.
# BOT_DISABLE_MEDIA_WRAPPERS=true
# DEBUG ONLY - bisect the Teams anonymous preheated-PC crash by running
# anon with the auth Chromium args (minimal flags) AND no stealth init.
# BOT_ANON_USE_AUTH_BROWSER_SETUP=true
# Playwright browser channel. Empty = bundled Chromium (default).
# Set to 'chrome' or 'msedge' to use the locally installed real browser.
# Strongly recommended for anonymous Teams joins: Playwright's bundled
# Chromium gets detected as automation and forced into a lobby + the
# buggy preheated-PC code path; real Chrome bypasses both.
# BOT_BROWSER_CHANNEL=chrome
# Logging
LOG_LEVEL=info
LOG_DIR=./output/logs

View file

@ -21,7 +21,7 @@ FROM mcr.microsoft.com/playwright:v1.50.0-jammy
WORKDIR /app
# Install Xvfb for headful browser mode (Teams blocks headless browsers)
# Xvfb for headful browser mode (Teams blocks headless browsers).
RUN apt-get update && apt-get install -y xvfb && rm -rf /var/lib/apt/lists/*
# Copy built files and dependencies
@ -29,6 +29,14 @@ COPY --from=builder /app/dist ./dist
COPY --from=builder /app/node_modules ./node_modules
COPY package*.json ./
# Real Google Chrome (stable channel) + its OS deps. Required for anonymous
# Teams joins: the bundled Playwright Chromium gets detected as automation
# by Teams' light-meetings flow and forced into a lobby + the buggy
# preheated-PC code path. Real Chrome bypasses both. Configured via
# BOT_BROWSER_CHANNEL=chrome (passed to chromium.launch({ channel })).
# See wiki/b-reference/teams-bot/architecture.md → "Browser-Channel".
RUN npx playwright install --with-deps chrome
# Create output directories
RUN mkdir -p output/logs output/screenshots
@ -36,6 +44,7 @@ RUN mkdir -p output/logs output/screenshots
ENV NODE_ENV=production
ENV BOT_HEADLESS=false
ENV DISPLAY=:99
ENV BOT_BROWSER_CHANNEL=chrome
# Expose port
EXPOSE 4100

View file

@ -12,6 +12,10 @@ services:
- BOT_NAME=${BOT_NAME:-PowerOn AI}
- BOT_HEADLESS=false
- DISPLAY=:99
# Real Chrome installed in the image (see Dockerfile). Required for
# anonymous Teams joins so the light-meetings flow does not detect
# automation and force the bot into a lobby.
- BOT_BROWSER_CHANNEL=${BOT_BROWSER_CHANNEL:-chrome}
- LOG_LEVEL=info
- SCREENSHOT_ON_ERROR=true
volumes:

View file

@ -20,6 +20,10 @@ export type AudioProcedureOptions = {
useCanvasVideo?: boolean;
/** Shown in the center of the canvas (e.g. bot display name) */
displayLabel?: string;
/** Hex/CSS color of the static avatar background (default: light blue). */
avatarBgColor?: string;
/** Hex/CSS color of the centered display label (default: dark blue). */
avatarTextColor?: string;
};
export class AudioProcedure {
@ -27,6 +31,8 @@ export class AudioProcedure {
private _logger: Logger;
private _useCanvasVideo: boolean;
private _displayLabel: string;
private _avatarBgColor: string;
private _avatarTextColor: string;
private _audioContext: boolean = false;
private _initScriptInjected: boolean = false;
private _audioQueue: Array<{ audioData: string; format: 'mp3' | 'wav' | 'pcm' }> = [];
@ -38,6 +44,8 @@ export class AudioProcedure {
this._logger = logger;
this._useCanvasVideo = !!options?.useCanvasVideo;
this._displayLabel = (options?.displayLabel || 'Bot').trim() || 'Bot';
this._avatarBgColor = (options?.avatarBgColor || '').trim() || '#a8d4f0';
this._avatarTextColor = (options?.avatarTextColor || '').trim() || '#1a3552';
}
/**
@ -59,6 +67,8 @@ export class AudioProcedure {
await this._page.context().addInitScript(poweronMediaPatchInstall, {
useCanvasVideo: this._useCanvasVideo,
displayLabel: this._displayLabel,
avatarBgColor: this._avatarBgColor,
avatarTextColor: this._avatarTextColor,
});
this._initScriptInjected = true;
@ -70,7 +80,12 @@ export class AudioProcedure {
* in an iframe (addInitScript runs too early) or overwrites getUserMedia.
*/
async reinstallMediaPatchInAllFrames(): Promise<void> {
const payload = { useCanvasVideo: this._useCanvasVideo, displayLabel: this._displayLabel };
const payload = {
useCanvasVideo: this._useCanvasVideo,
displayLabel: this._displayLabel,
avatarBgColor: this._avatarBgColor,
avatarTextColor: this._avatarTextColor,
};
for (const frame of this._page.frames()) {
try {
await frame.evaluate(poweronMediaPatchInstall, payload);

View file

@ -144,74 +144,57 @@ export class ChatProcedure {
}
/**
* Check if the chat panel is currently visible by probing for known
* UI elements (chat input, message list, or aria-pressed toggle).
* Is the meeting chat panel currently open?
*
* IMPORTANT what we DO NOT accept as proof of an open chat panel:
* * a generic ``[data-tid="ckeditor"]`` / ``[role=textbox]`` somewhere in
* the page. In the anonymous / compact in-meeting layout Teams renders
* a separate compose box (e.g. for reactions / inline comments) whose
* parent has NO data-tid at all and which does NOT post into the
* meeting chat. If we treated that as "panel open" the bot would skip
* the toggle and silently lose every chat send.
* * a "Besprechungschat" / "Meeting chat" heading. Teams keeps the side-
* pane heading mounted even when the pane is ``vdi-occlusion`` / h=0.
* Teams' calling layout (both anonymous light-meetings and the
* authenticated meeting view as of 2026-05) hosts a single right-side
* pane container `[data-tid="calling-right-side-panel"]` that can
* display Chat, People, Info, Captions etc. only one at a time.
* Detection is therefore two simple checks:
*
* What we DO accept:
* * ``#chat-button[aria-pressed="true"]`` explicit toggle state.
* * a known **chat-side-panel-scoped** input (selectors anchored under
* ``chat-pane-compose-message-footer`` / ``message-pane-footer``).
* * a chat **message list container** with non-trivial height.
* 1. Does the side-pane container exist AND is it visually rendered
* (offsetWidth/Height > 0, has an offsetParent)? When the pane
* is closed it is unmounted or collapsed to zero size.
* 2. Does the visible pane contain a chat-specific child
* (`message-pane-layout`, `chat-pane-compose-message-footer`,
* `#chat-pane-list`, `[data-app-name="chats"]`, )? This
* distinguishes Chat-mode from People/Info/Captions
* language-independently.
*
* Things we deliberately do NOT inspect (each one was empirically
* shown to be unreliable for this DOM):
* * `#chat-button` attributes the button is byte-identical in
* both states and has no `aria-pressed`. Visual state comes from
* a CSS `:has()` selector on the side-pane.
* * `vdi-occlusion` class a permanent structural marker on
* `calling-right-side-panel` and `message-pane-layout`, not a
* visibility flag.
* * "Besprechungschat" / "Meeting chat" headings or any other
* text locale-dependent + Teams keeps headings mounted even
* when the pane is hidden.
* * generic `[data-tid="ckeditor"]` anywhere in the page the
* compact layout renders unrelated compose boxes (reactions,
* inline comments) outside the meeting chat panel.
*/
private async _isChatPanelOpen(): Promise<boolean> {
return this._page.evaluate(() => {
// 1. Chat button aria-pressed state. This is the ONLY safe short-
// circuit — Teams keeps the panel "open" semantically even when its
// layout pane is briefly collapsed (h=0, vdi-occlusion). If we were
// stricter here the periodic scan / send path would re-trigger
// _openChatPanel which CLICKS the button — and a click on an already-
// pressed button TOGGLES the panel CLOSED. So aria-pressed=true must
// short-circuit to true.
const chatBtn = document.querySelector('#chat-button, button[id="chat-button"]') as HTMLElement | null;
if (chatBtn?.getAttribute('aria-pressed') === 'true') return true;
// 2. Chat input / compose box visible — but ONLY accept selectors
// that are scoped to the actual chat side-pane (footer ancestors).
// Generic [contenteditable] / [role=textbox] matches would also hit
// the compact in-meeting compose box used by anonymous / pre-join
// overlays, which is NOT the meeting chat.
const inputSelectors = [
'[data-tid="ckeditor-replyConversation"]',
'[data-tid="ckeditor"]',
'[data-tid="chat-pane-compose-message-footer"] div[contenteditable="true"]',
'[data-tid="chat-pane-compose-message-footer"] div[role="textbox"]',
'[data-tid="message-pane-footer"] div[contenteditable="true"]',
'[data-tid="message-pane-footer"] div[role="textbox"]',
'div[role="textbox"][data-tid*="chat"]',
'div[role="textbox"][data-tid*="message"]',
// light-meetings: a visible "expand compose" button is itself a
// reliable signal that the meeting chat side-pane is open.
'[data-tid="newMessageCommands-expand-compose"]',
'[data-tid="simplified-compose-bottom-toolbar"]',
];
for (const sel of inputSelectors) {
const el = document.querySelector(sel) as HTMLElement | null;
if (el && el.offsetHeight > 0) return true;
}
// 3. Chat-specific containers (NOT [role="log"] which also matches captions)
const chatContainerSelectors = [
'[data-tid="message-pane-list"]',
'[data-tid="chat-pane-list"]',
'[data-tid="chat-pane"]',
'.ts-message-list-container',
];
for (const sel of chatContainerSelectors) {
const el = document.querySelector(sel) as HTMLElement | null;
if (el && el.offsetHeight > 50) return true;
}
return false;
const sidePanel = document.querySelector(
'[data-tid="calling-right-side-panel"]',
) as HTMLElement | null;
if (!sidePanel) return false;
const isVisible = sidePanel.offsetWidth > 0
&& sidePanel.offsetHeight > 0
&& sidePanel.offsetParent !== null;
if (!isVisible) return false;
const chatHallmark = sidePanel.querySelector(
'[data-tid="message-pane-layout"], '
+ '[data-tid="message-pane-body"], '
+ '[data-tid="chat-pane-compose-message-footer"], '
+ '[data-tid="message-pane-footer"], '
+ '#chat-pane-list, '
+ '[data-app-name="chats"]',
);
return chatHallmark !== null;
});
}
@ -222,13 +205,21 @@ export class ChatProcedure {
* loads from a chat thread). Clicking again would TOGGLE it closed
* that's why we always check ``_isChatPanelOpen()`` first.
*
* The selector list below covers BOTH layouts:
* * authenticated full Teams meeting ``#chat-button`` etc.
* * anonymous / compact in-meeting toolbar (Teams Live / pre-join stage)
* where the toggle has no stable id and only carries
* ``data-tid``/``aria-label`` hints. We therefore include data-tid-
* based and broader role-based fallbacks so the bot does not silently
* fall back to typing into a non-chat compose box.
* Auth Full-Teams ships TWO buttons with "Chat" in the aria-label:
* 1) the real toggle (UUID id, `aria="Chat (Ctrl+Shift+2)"`,
* `aria-pressed="false"|"true"`)
* 2) a sibling `#chat-button` without `aria-pressed` that is NOT a
* toggle (likely the side-nav chat-app entry) clicking it does
* nothing for the meeting chat panel.
* light-meetings (anon) ships only `#chat-button`, which IS the real
* toggle there (no `aria-pressed` because Teams' compact bar uses
* menu-button semantics).
*
* Strategy: collect every visible button/role=button whose
* id/data-tid/aria-label hints at "chat", PREFER ones with a real
* `aria-pressed` attribute, click the best unclicked one each round,
* and never click the same button twice (so a non-toggle does not
* lock us into a loop).
*/
private async _openChatPanel(): Promise<boolean> {
if (await this._isChatPanelOpen()) {
@ -236,27 +227,9 @@ export class ChatProcedure {
return true;
}
const chatButtonSelectors = [
'#chat-button',
'button[id="chat-button"]',
'button[data-tid="toggle-chat"]',
'button[data-tid*="chat" i]',
'button[data-tid*="conversation" i]',
'button[aria-label="Chat"]',
'button[aria-label*="Chat" i]',
'button[aria-label*="Unterhaltung" i]',
'button[aria-label*="Besprechungschat" i]',
'button[aria-label*="Meeting chat" i]',
'button[title*="Chat" i]',
'button[title*="Besprechungschat" i]',
// role-based fallbacks for the compact / anonymous toolbar
'[role="button"][aria-label*="Chat" i]',
'[role="button"][aria-label*="Besprechungschat" i]',
'[role="menuitem"][aria-label*="Chat" i]',
];
const maxAttempts = 12;
const pollIntervalMs = 2000;
const previouslyClicked: string[] = [];
for (let attempt = 1; attempt <= maxAttempts; attempt++) {
if (await this._isChatPanelOpen()) {
@ -264,49 +237,81 @@ export class ChatProcedure {
return true;
}
let clicked = false;
const triedSelectors: string[] = [];
for (const selector of chatButtonSelectors) {
try {
const button = await this._page.$(selector);
if (!button) continue;
const isVisible = await button.isVisible().catch(() => false);
triedSelectors.push(`${selector}=${isVisible ? 'visible' : 'hidden'}`);
if (!isVisible) continue;
await button.click();
clicked = true;
this._logger.info(`Clicked chat button: ${selector} (attempt ${attempt}/${maxAttempts})`);
break;
} catch (err) {
triedSelectors.push(`${selector}=err:${String(err).substring(0, 40)}`);
}
}
const click = await this._page.evaluate((alreadyTried: string[]) => {
const matchesChatHint = (el: Element): boolean => {
const id = (el.id || '').toLowerCase();
const tid = (el.getAttribute('data-tid') || '').toLowerCase();
const aria = (el.getAttribute('aria-label') || '').toLowerCase();
const title = (el.getAttribute('title') || '').toLowerCase();
return [id, tid, aria, title].some((v) =>
v.includes('chat')
|| v.includes('unterhalt')
|| v.includes('besprechung')
|| v.includes('conversation'),
);
};
const isVisible = (el: HTMLElement): boolean =>
el.offsetHeight > 0 && el.offsetWidth > 0 && el.offsetParent !== null;
const keyOf = (el: Element): string =>
`${el.id || ''}|${el.getAttribute('data-tid') || ''}|${el.getAttribute('aria-label') || ''}`;
const isToggle = (el: Element): boolean => {
const p = el.getAttribute('aria-pressed');
return p === 'true' || p === 'false';
};
if (clicked) {
await this._page.waitForTimeout(2500);
if (await this._isChatPanelOpen()) {
this._logger.info('Chat panel opened successfully');
return true;
}
this._logger.info('Chat button clicked but panel not detected yet, waiting before next attempt');
await this._page.waitForTimeout(pollIntervalMs);
} else {
// Log which selectors were tried — without this we can't tell whether
// the buttons are missing entirely or just hidden behind another layer.
const all = Array.from(
document.querySelectorAll('button, [role="button"], [role="menuitem"]'),
) as HTMLElement[];
const candidates = all
.filter((el) => matchesChatHint(el) && isVisible(el))
.filter((el) => !alreadyTried.includes(keyOf(el)));
if (candidates.length === 0) return { picked: null as null | { key: string; id: string; tid: string; aria: string; toggle: boolean } };
const toggles = candidates.filter(isToggle);
const pick = toggles[0] || candidates[0];
pick.scrollIntoView({ block: 'center' });
pick.click();
return {
picked: {
key: keyOf(pick),
id: pick.id || '',
tid: pick.getAttribute('data-tid') || '',
aria: pick.getAttribute('aria-label') || '',
toggle: isToggle(pick),
},
};
}, previouslyClicked);
if (!click.picked) {
this._logger.info(
`Chat button not found, retry ${attempt}/${maxAttempts}` +
(triedSelectors.length ? ` | tried: ${triedSelectors.join(', ')}` : ''),
`No (more) un-clicked chat button candidates, retry ${attempt}/${maxAttempts}`,
);
// On the very first miss dump the full button diagnostics so the
// next code change has a real selector hint to work from instead
// of guessing. After that we throttle to avoid log spam.
if (attempt === 1) {
await this._dumpChatButtonDiagnostics();
}
if (attempt < maxAttempts) {
await this._page.waitForTimeout(pollIntervalMs);
}
continue;
}
previouslyClicked.push(click.picked.key);
this._logger.info(
`Clicked chat button: id="${click.picked.id}" tid="${click.picked.tid}" `
+ `aria="${click.picked.aria}" toggle=${click.picked.toggle} `
+ `(attempt ${attempt}/${maxAttempts})`,
);
await this._page.waitForTimeout(2500);
if (await this._isChatPanelOpen()) {
this._logger.info('Chat panel opened successfully');
return true;
}
this._logger.info(
'Chat button clicked but panel not detected — will try a different candidate next round',
);
await this._page.waitForTimeout(pollIntervalMs);
}
this._logger.warn('Could not open chat panel after polling - chat will not work');

View file

@ -15,6 +15,21 @@ import { resolveLaunchUrl, getMeetingLaunchUrl } from './meetingUrlParser';
*/
const _CONDITION_WAIT_MS = 10000;
/**
* Result of a structural state-detection probe.
* `via` names the trigger that decided the result so the orchestrator can
* log _why_ it transitioned the bot into the next state.
*
* Examples for `via`:
* - `selector:[data-tid="lobby-screen"]` direct attribute match
* - `selector:button[id="hangup-button"]` direct attribute match
* - `inferred:join-gone+no-hangup+no-callbar` structural inference
*/
export interface DetectionResult {
matched: boolean;
via?: string;
}
export class JoinProcedure {
private _page: Page;
private _logger: Logger;
@ -383,12 +398,23 @@ export class JoinProcedure {
/**
* Check if the bot is currently in the lobby (waiting to be admitted).
* Teams shows various lobby messages depending on the meeting state:
* - "Someone will let you in shortly" (meeting active, waiting for admit)
* - "Someone will let you in when the meeting starts" (meeting not started yet)
* - "waiting for someone to let you in" (alternative wording)
*
* Detection is purely structural (no text matching) so it works in any
* Teams UI language:
*
* 1) Direct: a known lobby/waiting container `data-tid` / `data-cid` is
* visible. This is the strongest signal but the attributes drift
* between Teams releases.
* 2) Inferred: we have left pre-join (the `prejoin-join-button` is no
* longer visible) AND we are not yet in the meeting (no hangup button
* or call-control bar). After `_clickJoinNow()` succeeded, this state
* is unambiguously "lobby or in transition" both meaning "wait for
* admission" from the orchestrator's point of view.
*
* Returns `{ matched, via }` where `via` names the trigger that decided
* the result so the orchestrator can log why it transitioned state.
*/
async isInMeetingLobby(options: { waitForSeconds?: number } = {}): Promise<boolean> {
async isInMeetingLobby(options: { waitForSeconds?: number } = {}): Promise<DetectionResult> {
const timeout = (options.waitForSeconds || 5) * 1000;
const lobbySelectors = [
@ -396,110 +422,101 @@ export class JoinProcedure {
'[data-tid="waiting-screen"]',
'[data-tid="lobby-waiting-screen"]',
'[data-tid="lobby-container"]',
'[data-tid="prejoin-lobby"]',
'[data-cid="lobby-screen"]',
'[data-cid="waiting-screen"]',
'[data-cid="ts-waiting-screen"]',
'#lobby-container',
'[id*="lobby"]',
];
try {
await this._page.waitForSelector(lobbySelectors.join(', '), {
timeout,
state: 'visible',
});
return true;
} catch {
// No structural lobby element found
}
// Fallback: check for the pre-join/lobby state via page structure —
// the lobby has no call-control bar but does have a waiting spinner or icon
try {
const hasLobbyStructure = await this._page.evaluate(() => {
const el = document.querySelector(
'[class*="lobby" i], [class*="waiting-room" i], [class*="waitingScreen" i]'
);
return !!el;
});
if (hasLobbyStructure) return true;
} catch {
// Page may not be ready
}
return false;
}
/**
* Check if the bot is currently in the meeting (admitted from lobby).
* Primary selector: button[id="hangup-button"] (confirmed by Recall.ai).
* Note: Teams uses `id` (not `data-tid`) for the hangup button since 2025 redesign.
*
* For authenticated joins, Teams v2 sometimes renders differently.
* Additional fallback: check the URL for meeting patterns and DOM for call UI.
*/
async isInMeeting(options: { waitForSeconds?: number } = {}): Promise<boolean> {
const timeout = (options.waitForSeconds || 5) * 1000;
const inMeetingSelectors = [
// Button IDs (Teams 2025+ redesign)
'button[id="hangup-button"]',
'button[id="microphone-button"]',
'button[id="callingButtons-showMoreBtn"]',
'button[id="video-button"]',
// data-tid attributes
'[data-tid="hangup-button"]',
'[data-tid="call-composite"]',
'[data-tid="callingButtons-showMoreBtn"]',
'[data-tid="call-controls"]',
'[data-tid="meeting-composite"]',
'div[data-tid="video-gallery"]',
'[data-tid="microphone-button"]',
'[data-tid="toggle-mute"]',
// data-cid attributes (light-meetings / anonymous join)
'[data-cid="ts-hangup-btn"]',
'[data-cid="calling-hangup-button"]',
'[data-cid="calling-unified-bar"]',
];
try {
await this._page.waitForSelector(inMeetingSelectors.join(', '), {
timeout,
state: 'visible',
});
return true;
} catch {
// Primary selector-based detection failed
}
// Fallback: structural DOM check for call control containers
try {
const inMeeting = await this._page.evaluate(() => {
const callBar = document.querySelector(
'[class*="calling-controls" i], [class*="call-controls" i], ' +
'[class*="controlBar" i], [class*="unified-bar" i]'
);
if (callBar) return true;
// Check for hangup/mic buttons by role+structure (language-independent)
const buttons = Array.from(document.querySelectorAll('button[id]'));
let callButtons = 0;
for (let i = 0; i < buttons.length; i++) {
const id = buttons[i].id.toLowerCase();
if (id.includes('hangup') || id.includes('microphone') ||
id.includes('video-button') || id.includes('mute')) {
callButtons++;
}
for (const selector of lobbySelectors) {
try {
const handle = await this._page.waitForSelector(selector, {
timeout: Math.max(50, Math.floor(timeout / lobbySelectors.length)),
state: 'visible',
});
if (handle) {
return { matched: true, via: `selector:${selector}` };
}
return callButtons >= 2;
} catch {
// Try next selector
}
}
try {
const inferred = await this._page.evaluate(() => {
const isVisible = (el: Element | null): boolean => {
if (!el) return false;
const he = el as HTMLElement;
if (he.offsetParent === null) return false;
const rect = he.getBoundingClientRect();
return rect.width > 0 && rect.height > 0;
};
const joinBtn = document.querySelector(
'#prejoin-join-button, button[data-tid="prejoin-join-button"]',
);
const hangupBtn = document.querySelector(
'button[id="hangup-button"], [data-tid="hangup-button"], ' +
'[data-cid="ts-hangup-btn"], [data-cid="calling-hangup-button"]',
);
const callBar = document.querySelector(
'[data-tid="call-controls"], [data-tid="call-composite"], ' +
'[data-tid="meeting-composite"], [data-tid="video-gallery"], ' +
'[data-cid="calling-unified-bar"]',
);
return !isVisible(joinBtn) && !isVisible(hangupBtn) && !isVisible(callBar);
});
if (inMeeting) {
this._logger.info('Detected meeting via structural DOM analysis (fallback)');
return true;
if (inferred) {
return { matched: true, via: 'inferred:join-gone+no-hangup+no-callbar' };
}
} catch {
// Page may not be ready
}
return false;
return { matched: false };
}
/**
* Check if the bot is currently in the meeting (admitted from lobby).
*
* The ONLY reliable indicator that we are admitted is the hangup button:
* it does not exist in pre-join nor in the lobby. Other elements such as
* `data-tid="toggle-mute"` or `id="microphone-button"` also appear in the
* pre-join / lobby preview UI and would produce false positives that make
* the orchestrator think the bot is in the meeting while it is still
* waiting in the lobby.
*
* Returns `{ matched, via }` where `via` names the matched hangup
* selector so the orchestrator can log the trigger that promoted the
* bot into the `in_meeting` state.
*/
async isInMeeting(options: { waitForSeconds?: number } = {}): Promise<DetectionResult> {
const timeout = (options.waitForSeconds || 5) * 1000;
const hangupSelectors = [
'button[id="hangup-button"]',
'[data-tid="hangup-button"]',
'[data-cid="ts-hangup-btn"]',
'[data-cid="calling-hangup-button"]',
];
for (const selector of hangupSelectors) {
try {
const handle = await this._page.waitForSelector(selector, {
timeout: Math.max(50, Math.floor(timeout / hangupSelectors.length)),
state: 'visible',
});
if (handle) {
return { matched: true, via: `selector:${selector}` };
}
} catch {
// Try next selector
}
}
return { matched: false };
}
/**

View file

@ -8,11 +8,17 @@
export type MediaGetUserMediaPatchOptions = {
useCanvasVideo: boolean;
displayLabel: string;
/** Hex/CSS color of the static avatar background (default: light blue). */
avatarBgColor?: string;
/** Hex/CSS color of the centered display label (default: dark blue). */
avatarTextColor?: string;
};
export const poweronMediaPatchInstall = (opts: MediaGetUserMediaPatchOptions) => {
'use strict';
const { useCanvasVideo, displayLabel } = opts;
const avatarBgColor = opts.avatarBgColor || '#a8d4f0';
const avatarTextColor = opts.avatarTextColor || '#1a3552';
const w: any = window as any;
if (!w.__gumChromium) {
@ -163,7 +169,11 @@ export const poweronMediaPatchInstall = (opts: MediaGetUserMediaPatchOptions) =>
return;
}
const _fps = 15;
// Low fps: the avatar is intentionally STATIC. The interval still ticks so
// captureStream() in headless Chromium gets fresh frames (some Chromium
// builds pause the track if no new frames arrive), but each tick draws an
// identical image — no animation, no flicker.
const _fps = 2;
w.__startBotAvatarStream = () => {
if (
w.__botAvatarStreamStarted
@ -196,43 +206,20 @@ export const poweronMediaPatchInstall = (opts: MediaGetUserMediaPatchOptions) =>
(document.body || document.documentElement).appendChild(canvas);
w.__botAvatarCanvas = canvas;
const c2d = canvas.getContext('2d');
let t = 0;
const draw = () => {
if (!c2d) {
return;
}
t += 0.04;
const wPx = canvas.width;
const hPx = canvas.height;
c2d.fillStyle = '#061525';
c2d.fillStyle = avatarBgColor;
c2d.fillRect(0, 0, wPx, hPx);
const g = c2d.createLinearGradient(0, 0, wPx, hPx);
g.addColorStop(0, '#1a4f8c');
g.addColorStop(0.5, '#0c305a');
g.addColorStop(1, '#132e6e');
c2d.fillStyle = g;
c2d.fillRect(0, 0, wPx, hPx);
c2d.strokeStyle = 'rgba(255, 200, 80, 0.95)';
c2d.lineWidth = 3;
c2d.strokeRect(6, 6, wPx - 12, hPx - 12);
c2d.fillStyle = 'rgba(255, 220, 120, 0.95)';
c2d.font = '600 13px system-ui, "Segoe UI", sans-serif';
c2d.textAlign = 'left';
c2d.textBaseline = 'top';
c2d.fillText('PORTA', 14, 10);
c2d.fillStyle = avatarTextColor;
c2d.font = 'bold 28px system-ui, "Segoe UI", sans-serif';
c2d.textAlign = 'center';
c2d.textBaseline = 'middle';
c2d.fillStyle = '#ffffff';
c2d.font = 'bold 28px system-ui, "Segoe UI", sans-serif';
const line = (w.__botAvatarDisplayLabel || displayLabel).toString().slice(0, 72);
c2d.fillText(line, wPx / 2, hPx / 2 - 6);
c2d.fillStyle = 'rgba(255,255,255,0.78)';
c2d.font = '14px system-ui, "Segoe UI", sans-serif';
c2d.fillText('poweron', wPx / 2, hPx / 2 + 26);
const pulse = 0.75 + 0.25 * Math.sin(t);
c2d.fillStyle = 'rgba(120, 200, 255, ' + 0.15 * pulse + ')';
c2d.fillRect(0, 0, wPx, 6);
c2d.fillRect(0, hPx - 6, wPx, 6);
c2d.fillText(line, wPx / 2, hPx / 2);
};
draw();
// Capture at fps for compositor-driven frames AND also push manual frames
@ -243,7 +230,9 @@ export const poweronMediaPatchInstall = (opts: MediaGetUserMediaPatchOptions) =>
if (w.__botAvatarVideoTrack) {
w.__botAvatarVideoTrack.enabled = true;
try {
w.__botAvatarVideoTrack.contentHint = 'motion';
// 'detail' = static / low-motion content -> WebRTC uses lower
// bitrate + preserves text sharpness instead of motion smoothing.
w.__botAvatarVideoTrack.contentHint = 'detail';
} catch {
// ignore
}

View file

@ -173,7 +173,7 @@ export class BotOrchestrator {
}
try {
this._setState('launching');
this._setState('launching', 'trigger=start invoked');
// Connect to Gateway WebSocket first
await this._connectToGateway();
@ -203,7 +203,7 @@ export class BotOrchestrator {
// Launch browser
await this._launchBrowser();
this._setState('navigating');
this._setState('navigating', 'trigger=browser launched');
// STEP 1: Navigate to meeting URL and click "Continue on this browser"
await this._takeScreenshot('anon-step1-before-launcher', this._isDebugMode);
@ -223,19 +223,12 @@ export class BotOrchestrator {
await this._joinProcedure!.joinMeetingLobbyFlow();
await this._takeScreenshot('anon-step2-after-join', this._isDebugMode);
// Check if we're in lobby
const inLobby = await this._joinProcedure!.isInMeetingLobby({ waitForSeconds: 10 });
if (inLobby) {
this._setState('in_lobby');
this._logger.info('Bot is in lobby, waiting to be admitted...');
await this._takeScreenshot('anon-step3-in-lobby', this._isDebugMode);
}
// Wait to be admitted to the meeting (handles lobby state internally).
// Returns the trigger (matched selector) that promoted us into the meeting.
const admissionVia = await this._waitForMeetingAdmission();
// Wait to be admitted to the meeting
await this._waitForMeetingAdmission();
this._setState('in_meeting');
this._logger.info(`Bot joined the meeting as "${this._botName}"`);
this._setState('in_meeting', `trigger=${admissionVia}`);
this._logger.info(`Bot joined the meeting as "${this._botName}" (trigger=${admissionVia})`);
await this._takeScreenshot('anon-step4-in-meeting', this._isDebugMode);
// Start keepalive to prevent idle disconnect
@ -244,6 +237,15 @@ export class BotOrchestrator {
// Dismiss any post-join permission modals (e.g. "Manage windows on all displays")
await this._joinProcedure!.dismissBrowserPermissionModals();
if (config.botDisableMediaWrappers) {
this._logger.warn(
'BOT_DISABLE_MEDIA_WRAPPERS=true: skipping audio init, transcript capture and ' +
'join greeting. Bot will sit silently in the meeting until stopped.',
);
await this._takeScreenshot('anon-step5-ready', this._isDebugMode);
return;
}
// Initialize audio playback
await this._audioProcedure!.initialize();
if (config.botUseCanvasVideo) {
@ -271,7 +273,7 @@ export class BotOrchestrator {
*/
private async _attemptAuthJoin(): Promise<void> {
await this._launchBrowser(true);
this._setState('navigating');
this._setState('navigating', 'trigger=browser launched (auth)');
// STEP 1: Navigate to teams.microsoft.com to trigger authentication
this._logger.info('STEP 1: navigating to teams.microsoft.com');
@ -444,10 +446,12 @@ export class BotOrchestrator {
await this._takeScreenshot('step5-join-now-clicked', this._isDebugMode);
// STEP 6: Wait for meeting admission (hangup button = in meeting)
await this._waitForMeetingAdmission();
const admissionVia = await this._waitForMeetingAdmission();
this._setState('in_meeting');
this._logger.info(`STEP 6: bot joined the meeting (authenticated as ${this._options.botAccountEmail})`);
this._setState('in_meeting', `trigger=${admissionVia}`);
this._logger.info(
`STEP 6: bot joined the meeting (authenticated as ${this._options.botAccountEmail}, trigger=${admissionVia})`,
);
await this._takeScreenshot('step6-in-meeting', this._isDebugMode);
this._startKeepAlive();
@ -940,7 +944,7 @@ export class BotOrchestrator {
this._stopKeepAlive();
try {
this._setState('leaving');
this._setState('leaving', 'trigger=stop() invoked');
// Stop audio capture
if (this._audioCaptureProcedure) {
@ -977,7 +981,7 @@ export class BotOrchestrator {
this._gatewayWs = null;
}
this._setState('disconnected');
this._setState('disconnected', 'trigger=shutdown completed');
}
}
@ -1038,7 +1042,21 @@ export class BotOrchestrator {
private async _launchBrowser(authMode: boolean = false): Promise<void> {
this._logger.info(`Launching browser (authMode=${authMode})...`);
const args = authMode
// When BOT_ANON_USE_AUTH_BROWSER_SETUP is on, the anon path uses the
// exact same minimal flag set as the auth path — bisecting whether
// the anon-only Chromium flags trigger Teams' light-meetings preheating
// crash (`rejectMediaDescriptionsUpdateAsync`).
const useAuthSetupForAnon = !authMode && config.botAnonUseAuthBrowserSetup;
if (useAuthSetupForAnon) {
this._logger.warn(
'BOT_ANON_USE_AUTH_BROWSER_SETUP=true: anon will use the auth Chromium args ' +
'(no --disable-web-security, no --disable-blink-features=AutomationControlled) ' +
'AND skip the navigator stealth init.',
);
}
const useMinimalArgs = authMode || useAuthSetupForAnon;
const args = useMinimalArgs
? [
'--no-sandbox',
'--disable-dev-shm-usage',
@ -1058,9 +1076,15 @@ export class BotOrchestrator {
'--disable-blink-features=AutomationControlled',
];
const channel = config.botBrowserChannel.trim();
if (channel) {
this._logger.info(`Browser channel override: '${channel}' (using locally installed browser)`);
}
this._browser = await chromium.launch({
headless: authMode ? false : config.botHeadless,
headless: useMinimalArgs ? false : config.botHeadless,
args,
...(channel ? { channel } : {}),
});
this._context = await this._browser.newContext({
@ -1080,6 +1104,8 @@ export class BotOrchestrator {
// Stealth: Override browser properties that reveal automation.
// Teams checks these to detect headless/automated browsers and
// blocks the /v2/ authenticated experience, falling back to light-meetings.
// SKIPPED when BOT_ANON_USE_AUTH_BROWSER_SETUP is on (debug isolation).
if (!useAuthSetupForAnon) {
await this._page.addInitScript(() => {
// 1. Remove navigator.webdriver flag (primary detection signal)
Object.defineProperty(navigator, 'webdriver', { get: () => false });
@ -1112,6 +1138,7 @@ export class BotOrchestrator {
// @ts-ignore
if (!window.chrome.runtime) { window.chrome.runtime = {}; }
});
}
// Initialize procedures
this._joinProcedure = new JoinProcedure(this._page, this._logger, this._botName);
@ -1135,6 +1162,8 @@ export class BotOrchestrator {
this._audioProcedure = new AudioProcedure(this._page, this._logger, {
useCanvasVideo: config.botUseCanvasVideo,
displayLabel: this._botName,
avatarBgColor: config.botAvatarBgColor,
avatarTextColor: config.botAvatarTextColor,
});
this._teamsActions = new TeamsActionsService(this._page, this._logger);
this._chatProcedure = new ChatProcedure(
@ -1151,14 +1180,27 @@ export class BotOrchestrator {
},
);
// Inject audio getUserMedia override BEFORE any navigation
// This ensures Teams gets our controlled audio stream when it calls getUserMedia
await this._audioProcedure.injectAudioOverride();
// DEBUG TOGGLE: skip both wrappers when isolating the Teams anonymous
// `rejectMediaDescriptionsUpdateAsync` crash during lobby preheating.
if (config.botDisableMediaWrappers) {
this._logger.warn(
'BOT_DISABLE_MEDIA_WRAPPERS=true: skipping getUserMedia override AND ' +
'RTCPeerConnection wrapper. Audio capture (transcript) and TTS playback ' +
'will be unavailable for this session.',
);
} else {
// Inject audio getUserMedia override BEFORE any navigation
// This ensures Teams gets our controlled audio stream when it calls getUserMedia
await this._audioProcedure.injectAudioOverride();
// Aggressive hybrid mode: always capture meeting audio as transcript source.
await this._audioCaptureProcedure!.injectCaptureOverride();
// Aggressive hybrid mode: always capture meeting audio as transcript source.
await this._audioCaptureProcedure!.injectCaptureOverride();
}
this._page.on('framenavigated', () => {
if (config.botDisableMediaWrappers) {
return;
}
if (!config.botUseCanvasVideo || !this._audioProcedure) {
return;
}
@ -1180,21 +1222,21 @@ export class BotOrchestrator {
this._page.on('close', () => {
if (!this._isShuttingDown) {
this._logger.warn('Page closed unexpectedly');
this._setState('disconnected');
this._setState('disconnected', 'trigger=page close event');
}
});
// Handle browser renderer crash (Chromium process segfault)
this._page.on('crash', () => {
this._logger.error('BROWSER CRASH: Chromium renderer process crashed!');
this._setState('error', 'Browser crashed');
this._setState('error', 'trigger=Chromium renderer crashed');
});
// Handle browser disconnection (entire browser process dies)
this._browser.on('disconnected', () => {
if (!this._isShuttingDown) {
this._logger.error('BROWSER DISCONNECTED: Browser process died unexpectedly');
this._setState('error', 'Browser process died');
this._setState('error', 'trigger=browser process died');
}
});
@ -1231,43 +1273,50 @@ export class BotOrchestrator {
* Bails out immediately if the page is closed (crash/disconnect) so we
* don't report a misleading "in_lobby" state for the next 2 minutes.
*/
private async _waitForMeetingAdmission(): Promise<void> {
private async _waitForMeetingAdmission(): Promise<string> {
const startTime = Date.now();
const timeout = config.timeouts.lobbyWait;
let loggedLobby = false;
let wasInLobby = false;
let lastLobbyVia: string | undefined;
while (Date.now() - startTime < timeout) {
if (!this._page || this._page.isClosed()) {
throw new Error('Page closed while waiting for meeting admission');
}
const inMeeting = await this._joinProcedure!.isInMeeting({ waitForSeconds: 5 });
if (inMeeting) {
if (wasInLobby) {
this._logger.info('Admitted from lobby into meeting');
}
return;
}
const inLobby = await this._joinProcedure!.isInMeetingLobby({ waitForSeconds: 2 });
if (inLobby) {
// Lobby check FIRST: while we are in the lobby, we must never let
// isInMeeting() decide our state — even though it is now strict
// (hangup-button only), checking lobby first keeps the order of
// truth obvious and prevents us from running post-join logic the
// instant Teams flips a single lobby attribute.
const lobby = await this._joinProcedure!.isInMeetingLobby({ waitForSeconds: 2 });
if (lobby.matched) {
wasInLobby = true;
lastLobbyVia = lobby.via;
if (!loggedLobby) {
loggedLobby = true;
this._setState('in_lobby');
this._logger.info('Bot is in lobby, waiting for admission...');
this._setState('in_lobby', `trigger=${lobby.via}`);
this._logger.info(`Bot is in lobby, waiting for admission (trigger=${lobby.via})`);
await this._takeScreenshot('in-lobby');
}
continue;
}
const meeting = await this._joinProcedure!.isInMeeting({ waitForSeconds: 3 });
if (meeting.matched) {
if (wasInLobby) {
this._logger.info(`Admitted from lobby into meeting (trigger=${meeting.via})`);
} else {
this._logger.info(`Joined meeting directly without lobby (trigger=${meeting.via})`);
}
return meeting.via || 'unknown';
}
if (wasInLobby) {
// Lobby disappeared but isInMeeting not yet true — Teams is
// transitioning (WebRTC renegotiation, UI rendering). Keep
// polling; the meeting controls will appear shortly.
this._logger.info('Lobby gone, waiting for meeting UI to render...');
this._logger.info(
`Lobby gone (last lobby trigger=${lastLobbyVia}), waiting for meeting UI to render...`,
);
await this._takeScreenshot('lobby-transition');
}
}

View file

@ -15,15 +15,57 @@ export const config = {
botName: process.env.BOT_NAME || 'PowerOn AI',
botHeadless: process.env.BOT_HEADLESS !== 'false',
/**
* Replace Chromium's fake test-pattern video with a canvas stream (gradient + label).
* Default OFF: in tests with the poweron tenant the Teams SFU rejects all
* outbound video m-lines (port=0/inactive) regardless of which track we send,
* so enabling video just costs CPU + adds a misleading "camera on" indicator
* for other participants without ever transmitting frames. Set
* BOT_USE_CANVAS_VIDEO=true if a future tenant policy permits IP video and
* you want to push the canvas stream.
* Replace Chromium's fake test-pattern video (the green surface with the
* spinning Teams placeholder) with a STATIC single-color canvas stream
* showing the bot's display name.
* Default OFF: when no video track is sent at all, Teams renders its own
* "no video" placeholder for the participant. Setting BOT_USE_CANVAS_VIDEO
* =true makes the bot push a quiet, static avatar surface so other
* participants see a calm uniform tile instead of the loading spinner.
*/
botUseCanvasVideo: process.env.BOT_USE_CANVAS_VIDEO === 'true',
/**
* Background + text color of the static avatar canvas (only relevant if
* BOT_USE_CANVAS_VIDEO=true). Any CSS color value is accepted.
*/
botAvatarBgColor: process.env.BOT_AVATAR_BG_COLOR || '#a8d4f0',
botAvatarTextColor: process.env.BOT_AVATAR_TEXT_COLOR || '#1a3552',
/**
* DEBUG ONLY: skip both media wrappers (RTCPeerConnection wrapper from
* AudioCaptureProcedure AND the getUserMedia override from AudioProcedure).
* Use this to test whether the wrappers are the cause of Teams' anonymous
* `rejectMediaDescriptionsUpdateAsync` crash during the lobby preheating.
*
* With wrappers disabled the bot CANNOT capture meeting audio nor play
* TTS it can only sit in the meeting silently and observe captions /
* the chat panel. This is purely an isolation test toggle.
*
* Set BOT_DISABLE_MEDIA_WRAPPERS=true to enable the bypass.
*/
botDisableMediaWrappers: process.env.BOT_DISABLE_MEDIA_WRAPPERS === 'true',
/**
* DEBUG ONLY: when true, the anonymous join uses the same minimal
* Chromium args as the authenticated join AND skips the navigator
* stealth init script. Goal: find out whether the special anon-only
* flags (`--disable-web-security`, `--disable-features=IsolateOrigins,
* site-per-process`, `--disable-blink-features=AutomationControlled`)
* or the stealth properties are what triggers Teams' light-meetings
* preheated-PC crash (`rejectMediaDescriptionsUpdateAsync`).
*
* Set BOT_ANON_USE_AUTH_BROWSER_SETUP=true to enable.
*/
botAnonUseAuthBrowserSetup: process.env.BOT_ANON_USE_AUTH_BROWSER_SETUP === 'true',
/**
* Playwright browser channel. Empty = Playwright's bundled Chromium.
* Set to 'chrome' or 'msedge' to use the locally installed real
* browser instead. The real Chrome ships with the canonical
* Sec-CH-UA / Sec-CH-UA-Platform client hints and Canvas/WebGL
* fingerprints many automation-detection heuristics (including the
* one Teams uses to force anonymous bots into a lobby + preheated-PC
* code path that crashes with `rejectMediaDescriptionsUpdateAsync`)
* accept real Chrome but reject Playwright's bundled Chromium.
*/
botBrowserChannel: process.env.BOT_BROWSER_CHANNEL || '',
// Logging
logLevel: process.env.LOG_LEVEL || 'info',