import { Browser, BrowserContext, Page, ElementHandle, chromium } from 'playwright'; import { Logger } from 'winston'; import { v4 as uuidv4 } from 'uuid'; import path from 'path'; import fs from 'fs'; import WebSocket from 'ws'; import { config } from '../config'; import { createSessionLogger } from '../utils/logger'; import { BotSession, BotState, TranscriptEntry, StatusMessage, TranscriptMessage, PlayAudioMessage, ChatMessage, SendChatMessage, AudioChunkMessage } from '../types'; import { JoinProcedure } from './joinProcedure'; import { CaptionsProcedure } from './captionsProcedure'; import { AudioProcedure } from './audioProcedure'; import { AudioCaptureProcedure } from './audioCaptureProcedure'; import { ChatProcedure, ChatMessageEntry } from './chatProcedure'; import { AuthProcedure } from './authProcedure'; import { TeamsActionsService } from './teamsActionsService'; import { isValidMeetingUrl, getMeetingLaunchUrl, resolveLaunchUrl } from './meetingUrlParser'; // Camera / fake video injection is disabled for now to focus on stability. // The Y4M fake video file was causing browser crashes when audio started flowing. export interface OrchestratorCallbacks { onStateChange: (state: BotState, message?: string) => void; onTranscript: (entry: TranscriptEntry) => void; onError: (error: Error) => void; } export interface OrchestratorOptions { gatewayWsUrl: string; instanceId: string; language?: string; botAccountEmail?: string; botAccountPassword?: string; transferMode?: string; debugMode?: boolean; } /** * Orchestrates the entire bot lifecycle: * - Connects to Gateway via WebSocket * - Launches browser * - Joins meeting * - Enables captions * - Sends transcripts to Gateway * - Handles audio playback from Gateway * - Leaves meeting */ export class BotOrchestrator { private _sessionId: string; private _meetingUrl: string; private _botName: string; private _logger: Logger; private _callbacks: OrchestratorCallbacks; private _options: OrchestratorOptions; private _browser: Browser | null = null; private _context: BrowserContext | null = null; private _page: Page | null = null; private _gatewayWs: WebSocket | null = null; private _useHttpFallback: boolean = false; private _httpBaseUrl: string = ''; private _joinProcedure: JoinProcedure | null = null; private _captionsProcedure: CaptionsProcedure | null = null; private _audioProcedure: AudioProcedure | null = null; private _audioCaptureProcedure: AudioCaptureProcedure | null = null; private _chatProcedure: ChatProcedure | null = null; private _teamsActions: TeamsActionsService | null = null; private _state: BotState = 'idle'; private _isShuttingDown: boolean = false; private _isDebugMode: boolean = false; private _keepAliveInterval: NodeJS.Timeout | null = null; constructor( sessionId: string, meetingUrl: string, botName: string, callbacks: OrchestratorCallbacks, options: OrchestratorOptions ) { this._sessionId = sessionId; this._meetingUrl = meetingUrl; this._botName = botName || config.botName; this._callbacks = callbacks; this._options = options; this._isDebugMode = !!options.debugMode; this._logger = createSessionLogger(sessionId); } /** * Poll for a DOM element matching any of the given selectors. * Checks every 500ms until found or timeout is reached. * Returns the element handle if found, or null on timeout. */ private async _pollForElement( selectors: string | string[], timeoutMs: number = 15000, label?: string, ): Promise { const selectorList = Array.isArray(selectors) ? selectors : [selectors]; const combined = selectorList.join(', '); const tag = label || combined.substring(0, 60); const deadline = Date.now() + timeoutMs; while (Date.now() < deadline) { for (const selector of selectorList) { try { const el = await this._page!.$(selector); if (el) { this._logger.info(`[poll] Found "${tag}" via: ${selector}`); return el; } } catch { /* page navigated or element detached — retry */ } } await this._page!.waitForTimeout(500); } this._logger.warn(`[poll] "${tag}" not found within ${timeoutMs}ms`); return null; } get sessionId(): string { return this._sessionId; } get state(): BotState { return this._state; } /** * Start the bot - connect to Gateway, launch browser, join meeting, enable captions. * Chooses between anonymous join and authenticated join based on credentials. */ async start(): Promise { if (!isValidMeetingUrl(this._meetingUrl)) { throw new Error(`Invalid meeting URL: ${this._meetingUrl}`); } try { this._setState('launching'); // Connect to Gateway WebSocket first await this._connectToGateway(); // Choose join method based on credentials const hasCredentials = !!(this._options.botAccountEmail && this._options.botAccountPassword); if (hasCredentials) { this._logger.info(`Authenticated join as: ${this._options.botAccountEmail}`); await this._attemptAuthJoin(); } else { this._logger.info('Anonymous join with bot name: ' + this._botName); await this._attemptJoin(); } } catch (error) { this._logger.error('Error starting bot:', error); this._setState('error', (error as Error).message); await this._takeScreenshot('error'); throw error; } } /** * Join a meeting as anonymous guest with the configured bot name. */ private async _attemptJoin(): Promise { // Launch browser await this._launchBrowser(); this._setState('navigating'); // STEP 1: Navigate to meeting URL and click "Continue on this browser" await this._joinProcedure!.startMeetingLauncherFlow(this._meetingUrl); // Ensure microphone is ON (required for voice playback) await this._ensureMicOn(); // STEP 2: Enter bot name and click "Join now" await this._joinProcedure!.joinMeetingLobbyFlow(); // Check if we're in lobby const inLobby = await this._joinProcedure!.isInMeetingLobby({ waitForSeconds: 10 }); if (inLobby) { this._setState('in_lobby'); this._logger.info('Bot is in lobby, waiting to be admitted...'); } // Wait to be admitted to the meeting await this._waitForMeetingAdmission(); this._setState('in_meeting'); this._logger.info(`Bot joined the meeting as "${this._botName}"`); // Start keepalive to prevent idle disconnect this._startKeepAlive(); // Dismiss any post-join permission modals (e.g. "Manage windows on all displays") await this._joinProcedure!.dismissBrowserPermissionModals(); // Initialize audio playback await this._audioProcedure!.initialize(); // Enable transcript capture (captions or audio based on transferMode) await this._enableTranscriptCapture(); // Enable chat monitoring await this._enableChat(); // Send greeting in meeting chat await this._sendJoinGreeting(); } /** * Join a meeting as authenticated user (System Bot or User Account). * Flow: teams.microsoft.com → MS Login → Navigate to meeting URL → Pre-Join → Join now * * Every UI step uses _pollForElement (500ms interval) for both stability and performance: * no fixed waits, the flow proceeds as soon as each element appears. */ private async _attemptAuthJoin(): Promise { await this._launchBrowser(true); this._setState('navigating'); // STEP 1: Navigate to teams.microsoft.com to trigger authentication this._logger.info('STEP 1: navigating to teams.microsoft.com'); await this._page!.goto('https://teams.microsoft.com', { waitUntil: 'domcontentloaded', timeout: 30000, }); const emailInput = await this._pollForElement( ['input[name="loginfmt"]', 'input[type="email"]'], 30000, 'MS login email input', ); if (!emailInput) { this._logger.warn(`No login page found, current URL: ${this._page!.url().substring(0, 150)}`); await this._takeScreenshot('step1-no-login-page', this._isDebugMode); } // STEP 2: Microsoft Authentication this._logger.info(`STEP 2: authenticating as ${this._options.botAccountEmail}`); const authProcedure = new AuthProcedure(this._page!, this._logger); const authSuccess = await authProcedure.authenticateWithMicrosoft( this._options.botAccountEmail!, this._options.botAccountPassword!, true, ); if (!authSuccess) { await this._takeScreenshot('step2-auth-failed', this._isDebugMode); throw new Error('Microsoft authentication failed'); } this._logger.info('STEP 2: authentication successful'); await this._takeScreenshot('step2-auth-done', this._isDebugMode); // STEP 3: Wait for Teams to load after auth this._logger.info('STEP 3: waiting for Teams to load after auth...'); try { await this._page!.waitForURL( (url) => url.hostname.includes('teams.microsoft.com') || url.hostname.includes('teams.cloud.microsoft'), { timeout: 30000 }, ); } catch { this._logger.warn(`Unexpected URL after auth: ${this._page!.url().substring(0, 150)}`); } await this._takeScreenshot('step3-teams-loaded', this._isDebugMode); // STEP 4: Navigate to the meeting URL with proper launch params. // CRITICAL: The suppress params (msLaunch, suppressPrompt, directDl) must // be on the LAUNCHER URL itself, NOT inside the encoded meeting URL parameter. // resolveLaunchUrl follows redirects first (meeting URL → launcher URL), // then adds the params to the RESOLVED launcher URL. getMeetingLaunchUrl // adds params to the raw meeting URL — they end up encoded inside the // launcher's url= parameter and have no effect on the launcher behavior. let launchUrl: string; try { launchUrl = await resolveLaunchUrl(this._meetingUrl); } catch (error) { this._logger.warn(`Could not resolve launch URL, using fallback: ${error}`); launchUrl = getMeetingLaunchUrl(this._meetingUrl); } // Remove anon=true since the user is authenticated try { const urlObj = new URL(launchUrl); urlObj.searchParams.delete('anon'); launchUrl = urlObj.toString(); } catch { /* keep as-is */ } this._logger.info(`STEP 4: navigating to launch URL: ${launchUrl.substring(0, 120)}...`); await this._page!.goto(launchUrl, { waitUntil: 'domcontentloaded', timeout: 30000, }); this._logger.info(`STEP 4: URL after navigation: ${this._page!.url().substring(0, 150)}`); await this._takeScreenshot('step4-meeting-url-loaded', this._isDebugMode); // STEP 4a: Poll for first actionable button (interstitial OR pre-join) const interstitialSelectors = [ 'button[data-tid="joinOnWeb"]', 'button:has-text("Continue on this browser")', 'button:has-text("In diesem Browser fortfahren")', 'button:has-text("Weiter in diesem Browser")', 'button:has-text("Join on the web instead")', 'button:has-text("Use web app instead")', 'button[data-tid="chat-join-button"]', 'button[data-tid="join-call-button"]', ]; const preJoinSelectors = [ 'button:has-text("Join now")', 'button:has-text("Jetzt teilnehmen")', 'button[data-tid="prejoin-join-button"]', ]; const firstBtn = await this._pollForElement( [...interstitialSelectors, ...preJoinSelectors], 30000, 'interstitial or pre-join button', ); if (firstBtn) { const btnText = (await firstBtn.textContent().catch(() => ''))?.trim() || ''; const btnTid = (await firstBtn.getAttribute('data-tid').catch(() => '')) || ''; const isPreJoin = btnTid === 'prejoin-join-button' || btnText.toLowerCase().includes('join now') || btnText.toLowerCase().includes('jetzt teilnehmen'); if (!isPreJoin) { await firstBtn.click(); this._logger.info(`STEP 4a: clicked interstitial: "${btnText}" (data-tid="${btnTid}")`); await this._takeScreenshot('step4a-after-interstitial', this._isDebugMode); } else { this._logger.info(`STEP 4a: pre-join button already visible: "${btnText}"`); } } else { await this._takeScreenshot('step4a-no-buttons-found', this._isDebugMode); } // STEP 5: Poll for "Join now" on the pre-join screen (mic is NOT touched) await this._takeScreenshot('step5-before-join-now', this._isDebugMode); const joinNowBtn = await this._pollForElement(preJoinSelectors, 30000, 'Join now button'); if (!joinNowBtn) { await this._takeScreenshot('step5-no-join-now', this._isDebugMode); throw new Error('"Join now" button not found on pre-join screen'); } await joinNowBtn.click(); this._logger.info('STEP 5: clicked "Join now", waiting for meeting'); await this._takeScreenshot('step5-join-now-clicked', this._isDebugMode); // STEP 6: Wait for meeting admission (hangup button = in meeting) await this._waitForMeetingAdmission(); this._setState('in_meeting'); this._logger.info(`STEP 6: bot joined the meeting (authenticated as ${this._options.botAccountEmail})`); await this._takeScreenshot('step6-in-meeting', this._isDebugMode); this._startKeepAlive(); await this._audioProcedure!.initialize(); await this._enableTranscriptCapture(); await this._enableChat(); await this._sendJoinGreeting(); } /** * Ensure the camera is turned on in the pre-join screen. * When camera is on, Teams shows the profile/background image. * * Teams pre-join uses a fui-Switch input: * * - checked present = camera ON (data-cid="toggle-video-true", title="Turn camera off") * - checked absent = camera OFF (data-cid="toggle-video-false", title="Turn camera on") */ private async _ensureCameraOn(): Promise { try { const cameraToggle = await this._pollForElement([ 'input[data-tid="toggle-video"]', '[data-tid="toggle-video"]', 'input[role="switch"][title*="camera" i]', 'input[role="switch"][title*="Camera" i]', 'input[role="switch"][title*="Video" i]', ], 10000, 'camera toggle (pre-join)'); if (!cameraToggle) return; const state = await cameraToggle.evaluate((el: HTMLInputElement) => ({ checked: el.checked, dataCid: el.getAttribute('data-cid') || '', title: el.getAttribute('title') || '', })); this._logger.info(`Camera state: checked=${state.checked}, data-cid="${state.dataCid}", title="${state.title}"`); if (!state.checked) { await cameraToggle.click(); this._logger.info('Camera toggled ON'); } else { this._logger.info('Camera already ON'); } } catch (err) { this._logger.warn(`Could not toggle camera: ${err}`); } } /** * Verify camera is on after joining the meeting, and turn it on if not. * * In-meeting camera button (from Teams DOM): *