service-teams-browser-bot/src/bot/orchestrator.ts
2026-02-16 12:56:42 +01:00

798 lines
26 KiB
TypeScript

import { Browser, BrowserContext, Page, chromium } from 'playwright';
import { Logger } from 'winston';
import { v4 as uuidv4 } from 'uuid';
import path from 'path';
import fs from 'fs';
import WebSocket from 'ws';
import { config } from '../config';
import { createSessionLogger } from '../utils/logger';
import { BotSession, BotState, TranscriptEntry, StatusMessage, TranscriptMessage, PlayAudioMessage, ChatMessage, SendChatMessage } from '../types';
import { JoinProcedure } from './joinProcedure';
import { CaptionsProcedure } from './captionsProcedure';
import { AudioProcedure } from './audioProcedure';
import { ChatProcedure, ChatMessageEntry } from './chatProcedure';
import { isValidMeetingUrl } from './meetingUrlParser';
export interface OrchestratorCallbacks {
onStateChange: (state: BotState, message?: string) => void;
onTranscript: (entry: TranscriptEntry) => void;
onError: (error: Error) => void;
}
export interface OrchestratorOptions {
gatewayWsUrl: string;
instanceId: string;
language?: string;
botAccountEmail?: string;
botAccountPassword?: string;
backgroundImageUrl?: string;
}
/**
* Orchestrates the entire bot lifecycle:
* - Connects to Gateway via WebSocket
* - Launches browser
* - Joins meeting
* - Enables captions
* - Sends transcripts to Gateway
* - Handles audio playback from Gateway
* - Leaves meeting
*/
export class BotOrchestrator {
private _sessionId: string;
private _meetingUrl: string;
private _botName: string;
private _logger: Logger;
private _callbacks: OrchestratorCallbacks;
private _options: OrchestratorOptions;
private _browser: Browser | null = null;
private _context: BrowserContext | null = null;
private _page: Page | null = null;
private _gatewayWs: WebSocket | null = null;
private _useHttpFallback: boolean = false;
private _httpBaseUrl: string = '';
private _joinProcedure: JoinProcedure | null = null;
private _captionsProcedure: CaptionsProcedure | null = null;
private _audioProcedure: AudioProcedure | null = null;
private _chatProcedure: ChatProcedure | null = null;
private _state: BotState = 'idle';
private _isShuttingDown: boolean = false;
constructor(
sessionId: string,
meetingUrl: string,
botName: string,
callbacks: OrchestratorCallbacks,
options: OrchestratorOptions
) {
this._sessionId = sessionId;
this._meetingUrl = meetingUrl;
this._botName = botName || config.botName;
this._callbacks = callbacks;
this._options = options;
this._logger = createSessionLogger(sessionId);
}
get sessionId(): string {
return this._sessionId;
}
get state(): BotState {
return this._state;
}
/**
* Start the bot - connect to Gateway, launch browser, authenticate (if configured), join meeting, enable captions.
*/
async start(): Promise<void> {
if (!isValidMeetingUrl(this._meetingUrl)) {
throw new Error(`Invalid meeting URL: ${this._meetingUrl}`);
}
let useAuthentication = !!(this._options.botAccountEmail && this._options.botAccountPassword);
try {
this._setState('launching');
// Connect to Gateway WebSocket first
await this._connectToGateway();
// Try joining (authenticated first, then anonymous fallback)
await this._attemptJoin(useAuthentication);
} catch (error) {
// If authenticated join failed, retry as anonymous
if (useAuthentication) {
this._logger.warn(`Authenticated join failed: ${(error as Error).message}. Retrying as anonymous guest...`);
try {
await this._cleanup();
await this._attemptJoin(false);
return;
} catch (retryError) {
this._logger.error('Anonymous fallback also failed:', retryError);
this._setState('error', (retryError as Error).message);
await this._takeScreenshot('error-fallback');
throw retryError;
}
}
this._logger.error('Error starting bot:', error);
this._setState('error', (error as Error).message);
await this._takeScreenshot('error');
throw error;
}
}
/**
* Attempt to join a meeting (authenticated or anonymous).
*/
private async _attemptJoin(authenticate: boolean): Promise<void> {
// Launch browser
await this._launchBrowser();
// Update JoinProcedure with correct auth state
this._joinProcedure = new JoinProcedure(this._page!, this._logger, this._botName, authenticate);
this._setState('navigating');
// STEP 1: Navigate to meeting URL and click "Continue on this browser"
// This is the same for both authenticated and anonymous joins.
await this._joinProcedure.startMeetingLauncherFlow(this._meetingUrl);
// STEP 2: For authenticated joins, click "Sign in" on the pre-join page
// instead of entering a name. The "Sign in" link is at the bottom of the
// anonymous pre-join page. Clicking it triggers the Microsoft login flow,
// which redirects back to an authenticated pre-join page within Teams v2.
if (authenticate) {
this._logger.info('Authenticated join: waiting for pre-join page to load, then clicking "Sign in"...');
// Wait for the pre-join page to fully load.
// After "Continue on this browser", Teams loads the light-meetings pre-join page.
// This can take 5-15 seconds and may show mic/camera permission overlays.
// The "Sign in" link appears at the bottom of the page once it's loaded.
// Wait for "Sign in" link to appear (up to 20 seconds)
let signInClicked = false;
const signInSelector = 'a:has-text("Sign in"), button:has-text("Sign in"), a:has-text("Anmelden"), button:has-text("Anmelden")';
try {
this._logger.info('Waiting for "Sign in" link to appear on pre-join page...');
await this._page!.waitForSelector(signInSelector, { timeout: 20000, state: 'visible' });
// Click it
const signInLink = await this._page!.$(signInSelector);
if (signInLink) {
await signInLink.click();
this._logger.info('Clicked "Sign in" link on pre-join page');
signInClicked = true;
}
} catch {
this._logger.info('"Sign in" not found via waitForSelector, trying DOM scan...');
}
// Fallback: scan DOM for sign-in link
if (!signInClicked) {
// The page might have loaded but the selector didn't match exactly
signInClicked = await this._page!.evaluate(() => {
// Look for any link/button with "Sign in" or "Anmelden" text
const allElements = document.querySelectorAll('a, button, span[role="link"]');
for (let i = 0; i < allElements.length; i++) {
const el = allElements[i] as HTMLElement;
const text = el.innerText?.trim() || '';
if (text === 'Sign in' || text === 'Anmelden') {
el.click();
return true;
}
}
return false;
});
if (signInClicked) {
this._logger.info('Clicked "Sign in" via DOM evaluation fallback');
} else {
this._logger.warn('Could not find "Sign in" link on pre-join page');
// Log page content for debugging
const pageText = await this._page!.evaluate(() => document.body?.innerText?.substring(0, 500) || '');
this._logger.warn(`Pre-join page content: ${pageText.substring(0, 300)}`);
}
}
if (signInClicked) {
// Wait for Microsoft login page to load
await this._page!.waitForTimeout(3000);
// Perform Microsoft login (email, password, stay signed in)
const { AuthProcedure } = await import('./authProcedure');
const authProcedure = new AuthProcedure(this._page!, this._logger);
const authSuccess = await authProcedure.authenticateWithMicrosoft(
this._options.botAccountEmail!,
this._options.botAccountPassword!
);
if (authSuccess) {
this._logger.info('Authentication via "Sign in" link succeeded');
// After auth, Teams redirects back to the authenticated pre-join page
// within Teams v2 (/v2/) -- wait for it to load
await this._page!.waitForTimeout(5000);
const postAuthUrl = this._page!.url();
this._logger.info(`Post-auth URL: ${postAuthUrl.substring(0, 80)}`);
// Verify we're on the authenticated pre-join page
const pageText = await this._page!.evaluate(() => document.body?.innerText?.substring(0, 500) || '');
if (pageText.includes('Join now')) {
this._logger.info('On authenticated pre-join page with "Join now" button');
} else {
this._logger.warn(`Post-auth page content: ${pageText.substring(0, 200)}`);
}
} else {
this._logger.warn('Authentication via "Sign in" failed - continuing as anonymous');
}
} else {
this._logger.warn('Could not find "Sign in" link - continuing as anonymous');
}
}
// Set virtual background if configured (must be done on pre-join screen, before "Join now")
if (this._options.backgroundImageUrl && this._page && authenticate) {
try {
const { BackgroundProcedure } = await import('./backgroundProcedure');
const bgProcedure = new BackgroundProcedure(this._page, this._logger);
await bgProcedure.setBackgroundFromUrl(this._options.backgroundImageUrl);
} catch (error) {
this._logger.warn(`Background image setup failed (non-fatal): ${error}`);
}
}
// Join the meeting
await this._joinProcedure.joinMeetingLobbyFlow();
// Check if we're in lobby
const inLobby = await this._joinProcedure.isInMeetingLobby({ waitForSeconds: 10 });
if (inLobby) {
this._setState('in_lobby');
this._logger.info('Bot is in lobby, waiting to be admitted...');
}
// Wait to be admitted to the meeting
await this._waitForMeetingAdmission();
this._setState('in_meeting');
this._logger.info(`Bot joined the meeting! (authenticated: ${authenticate})`);
// Dismiss any post-join permission modals (e.g. "Manage windows on all displays")
await this._joinProcedure!.dismissBrowserPermissionModals();
// Initialize audio
await this._audioProcedure!.initialize();
// Enable and subscribe to captions
await this._enableCaptions();
// Enable chat monitoring
await this._enableChat();
}
/**
* Clean up browser for retry (close browser without full shutdown).
*/
private async _cleanup(): Promise<void> {
try {
if (this._page) await this._page.close().catch(() => {});
if (this._context) await this._context.close().catch(() => {});
if (this._browser) await this._browser.close().catch(() => {});
this._page = null;
this._context = null;
this._browser = null;
this._joinProcedure = null;
this._captionsProcedure = null;
this._audioProcedure = null;
this._chatProcedure = null;
} catch {
// Ignore cleanup errors
}
}
/**
* Connect to the Gateway WebSocket for this session.
*/
private async _connectToGateway(): Promise<void> {
// gatewayWsUrl is the full WebSocket URL provided by the Gateway
// It already includes instanceId and sessionId
const wsUrl = this._options.gatewayWsUrl;
this._logger.info(`Connecting to Gateway: ${wsUrl}`);
// Derive HTTP base URL from WebSocket URL for fallback
this._httpBaseUrl = wsUrl
.replace('wss://', 'https://')
.replace('ws://', 'http://')
.replace(/\/bot\/ws\/.*$/, '');
return new Promise((resolve, reject) => {
this._gatewayWs = new WebSocket(wsUrl);
const wsTimeout = setTimeout(() => {
if (this._gatewayWs?.readyState !== WebSocket.OPEN) {
this._logger.warn('WebSocket connection timeout - switching to HTTP fallback');
this._useHttpFallback = true;
this._gatewayWs?.close();
this._gatewayWs = null;
resolve(); // Continue with HTTP fallback instead of failing
}
}, 10000);
this._gatewayWs.on('open', () => {
clearTimeout(wsTimeout);
this._logger.info('Connected to Gateway via WebSocket');
this._useHttpFallback = false;
resolve();
});
this._gatewayWs.on('message', (data) => {
this._handleGatewayMessage(data.toString());
});
this._gatewayWs.on('close', (code, reason) => {
this._logger.warn(`Gateway WebSocket closed: ${code} - ${reason}`);
if (!this._isShuttingDown && !this._useHttpFallback) {
this._logger.info('Switching to HTTP fallback for transcript delivery');
this._useHttpFallback = true;
}
});
this._gatewayWs.on('error', (error) => {
clearTimeout(wsTimeout);
this._logger.error('Gateway WebSocket error:', error);
this._logger.info('Switching to HTTP fallback for transcript delivery');
this._useHttpFallback = true;
this._gatewayWs = null;
resolve(); // Continue with HTTP fallback
});
});
}
/**
* Handle incoming messages from the Gateway.
*/
private _handleGatewayMessage(data: string): void {
try {
const message = JSON.parse(data);
switch (message.type) {
case 'playAudio':
const audioMsg = message as PlayAudioMessage;
this.playAudio(audioMsg.audio.data, audioMsg.audio.format);
break;
case 'sendChatMessage':
const chatMsg = message as SendChatMessage;
this.sendChatMessageToMeeting(chatMsg.text);
break;
case 'stopAudio':
this._logger.info('Stop audio command received from Gateway');
if (this._audioProcedure) {
this._audioProcedure.stopAllAudio();
}
break;
case 'pong':
// Heartbeat response
break;
default:
this._logger.debug('Unknown Gateway message type:', message.type);
}
} catch (error) {
this._logger.error('Error parsing Gateway message:', error);
}
}
/**
* Send a message to the Gateway (WebSocket or HTTP fallback).
*/
private _sendToGateway(message: object): void {
if (this._gatewayWs && this._gatewayWs.readyState === WebSocket.OPEN) {
try {
this._gatewayWs.send(JSON.stringify(message));
return;
} catch (error) {
this._logger.error('WebSocket send error, falling back to HTTP:', error);
this._useHttpFallback = true;
}
}
// HTTP fallback
if (this._useHttpFallback) {
this._sendViaHttp(message);
} else {
this._logger.warn('Cannot send to Gateway - no WebSocket and no HTTP fallback');
}
}
/**
* Send a message via HTTP POST (fallback when WebSocket unavailable).
*/
private async _sendViaHttp(message: any): Promise<void> {
const msgType = message.type;
let url = '';
if (msgType === 'transcript') {
url = `${this._httpBaseUrl}/bot/transcript/${this._sessionId}`;
} else if (msgType === 'status') {
url = `${this._httpBaseUrl}/bot/status/${this._sessionId}`;
} else {
this._logger.debug(`HTTP fallback: unsupported message type ${msgType}`);
return;
}
try {
const response = await fetch(url, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(message),
});
if (!response.ok) {
this._logger.warn(`HTTP fallback response: ${response.status} ${response.statusText}`);
}
} catch (error) {
this._logger.error(`HTTP fallback error for ${msgType}:`, error);
}
}
/**
* Send a transcript to the Gateway.
*/
private _sendTranscript(speaker: string, text: string, isFinal: boolean): void {
const message: TranscriptMessage = {
type: 'transcript',
sessionId: this._sessionId,
transcript: {
speaker,
text,
timestamp: new Date().toISOString(),
isFinal,
},
};
this._sendToGateway(message);
}
/**
* Send a status update to the Gateway.
*/
private _sendStatus(status: StatusMessage['status'], message?: string): void {
const statusMessage: StatusMessage = {
type: 'status',
sessionId: this._sessionId,
status,
message,
};
this._sendToGateway(statusMessage);
}
/**
* Stop the bot - leave meeting, close browser, disconnect from Gateway.
*/
async stop(): Promise<void> {
if (this._isShuttingDown) {
return;
}
this._isShuttingDown = true;
this._logger.info('Stopping bot...');
try {
this._setState('leaving');
// Unsubscribe from captions and chat
if (this._captionsProcedure) {
await this._captionsProcedure.unsubscribe();
}
if (this._chatProcedure) {
await this._chatProcedure.unsubscribe();
}
// Clean up audio
if (this._audioProcedure) {
await this._audioProcedure.cleanup();
}
// Leave the meeting
if (this._joinProcedure && this._state !== 'error') {
await this._joinProcedure.leaveMeetingFlow();
}
} catch (error) {
this._logger.error('Error during shutdown:', error);
} finally {
// Close browser
await this._closeBrowser();
// Close Gateway connection
if (this._gatewayWs) {
this._gatewayWs.close(1000, 'Bot stopping');
this._gatewayWs = null;
}
this._setState('disconnected');
}
}
/**
* Play audio in the meeting.
*/
async playAudio(audioData: string, format: 'mp3' | 'wav' | 'pcm'): Promise<void> {
if (this._isShuttingDown) {
this._logger.debug('Ignoring playAudio - bot is shutting down');
return;
}
if (this._state !== 'in_meeting' || !this._audioProcedure) {
this._logger.warn('Cannot play audio - not in meeting');
return;
}
await this._audioProcedure.playAudio(audioData, format);
}
/**
* Launch the browser and create a new page.
*/
private async _launchBrowser(): Promise<void> {
this._logger.info('Launching browser...');
this._browser = await chromium.launch({
headless: config.botHeadless,
args: [
'--use-fake-ui-for-media-stream', // Auto-accept media permissions
'--use-fake-device-for-media-stream', // Provide fake camera/mic so Teams sees devices
'--disable-web-security',
'--disable-features=IsolateOrigins,site-per-process',
'--autoplay-policy=no-user-gesture-required',
],
});
this._context = await this._browser.newContext({
permissions: ['microphone', 'camera'],
viewport: { width: 1280, height: 720 },
userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36',
});
this._page = await this._context.newPage();
// Initialize procedures
const isAuthenticated = !!(this._options.botAccountEmail && this._options.botAccountPassword);
this._joinProcedure = new JoinProcedure(this._page, this._logger, this._botName, isAuthenticated);
this._captionsProcedure = new CaptionsProcedure(
this._page,
this._logger,
(entry) => {
// Send transcript to Gateway
this._sendTranscript(entry.speaker, entry.text, entry.isFinal);
// Also notify local callbacks
this._callbacks.onTranscript(entry);
},
this._options.language
);
this._audioProcedure = new AudioProcedure(this._page, this._logger);
this._chatProcedure = new ChatProcedure(
this._page,
this._logger,
(entry: ChatMessageEntry) => {
// Send chat message to Gateway as a special transcript
this._sendChatMessage(entry.speaker, entry.text);
this._callbacks.onTranscript({
speaker: entry.speaker,
text: entry.text,
timestamp: entry.timestamp,
isFinal: true,
});
}
);
// Inject audio getUserMedia override BEFORE any navigation
// This ensures Teams gets our controlled audio stream when it calls getUserMedia
await this._audioProcedure.injectAudioOverride();
// Handle page errors
this._page.on('pageerror', (error) => {
this._logger.error('Page error:', error);
});
// Handle page close
this._page.on('close', () => {
if (!this._isShuttingDown) {
this._logger.warn('Page closed unexpectedly');
this._setState('disconnected');
}
});
this._logger.info('Browser launched');
}
/**
* Close the browser.
*/
private async _closeBrowser(): Promise<void> {
try {
if (this._page) {
await this._page.close();
}
if (this._context) {
await this._context.close();
}
if (this._browser) {
await this._browser.close();
}
} catch (error) {
this._logger.error('Error closing browser:', error);
}
this._page = null;
this._context = null;
this._browser = null;
this._logger.info('Browser closed');
}
/**
* Wait for the bot to be admitted from the lobby.
*/
private async _waitForMeetingAdmission(): Promise<void> {
const startTime = Date.now();
const timeout = config.timeouts.lobbyWait;
let consecutiveNoSignal = 0;
const maxNoSignal = 5; // Allow several cycles with no lobby/meeting signal before giving up
while (Date.now() - startTime < timeout) {
// Check if we're in the meeting
const inMeeting = await this._joinProcedure!.isInMeeting({ waitForSeconds: 5 });
if (inMeeting) {
return;
}
// Check if still in lobby
const inLobby = await this._joinProcedure!.isInMeetingLobby({ waitForSeconds: 2 });
if (inLobby) {
consecutiveNoSignal = 0;
this._logger.info('Still waiting in lobby...');
continue;
}
// Neither in meeting nor in lobby — this can happen legitimately:
// - Authenticated users skip lobby, but meeting UI takes seconds to load
// - Page is transitioning between states
// Only give up after several consecutive cycles with no signal
consecutiveNoSignal++;
const currentUrl = this._page?.url() || 'unknown';
this._logger.info(`No lobby/meeting signal detected (attempt ${consecutiveNoSignal}/${maxNoSignal}), URL: ${currentUrl}`);
if (consecutiveNoSignal >= maxNoSignal) {
// Take a screenshot and log page content for debugging before giving up
await this._takeScreenshot('no-meeting-signal');
try {
const bodySnippet = await this._page?.evaluate(() =>
document.body?.innerText?.substring(0, 500) || '(empty)'
);
this._logger.warn(`Page content before giving up: ${bodySnippet}`);
} catch { /* ignore */ }
throw new Error('Bot was removed from lobby or meeting ended');
}
}
throw new Error('Timeout waiting to be admitted from lobby');
}
/**
* Enable captions and start scraping.
*/
private async _enableCaptions(): Promise<void> {
try {
await this._captionsProcedure!.enableCaptionsFlow();
await this._captionsProcedure!.subscribeToCaptions();
this._logger.info('Captions enabled and subscribed');
} catch (error) {
this._logger.warn('Could not enable captions:', error);
// Continue without captions - not a fatal error
}
}
/**
* Enable chat monitoring.
*/
private async _enableChat(): Promise<void> {
try {
await this._chatProcedure!.enableChatMonitoring();
await this._chatProcedure!.subscribeToChatMessages();
this._logger.info('Chat monitoring enabled and subscribed');
} catch (error) {
this._logger.warn('Could not enable chat monitoring:', error);
// Continue without chat - not a fatal error
}
}
/**
* Send a chat message event to the Gateway.
*/
private _sendChatMessage(speaker: string, text: string): void {
const message: ChatMessage = {
type: 'chatMessage',
sessionId: this._sessionId,
chat: {
speaker,
text,
timestamp: new Date().toISOString(),
},
};
this._sendToGateway(message);
}
/**
* Send a text message to the meeting chat.
*/
async sendChatMessageToMeeting(text: string): Promise<void> {
if (this._isShuttingDown || this._state !== 'in_meeting' || !this._chatProcedure) {
this._logger.warn('Cannot send chat message - not in meeting');
return;
}
await this._chatProcedure.sendChatMessage(text);
}
/**
* Update the bot state and notify callbacks + Gateway.
*/
private _setState(state: BotState, message?: string): void {
this._state = state;
this._logger.info(`State changed: ${state}${message ? ` - ${message}` : ''}`);
this._callbacks.onStateChange(state, message);
// Send status to Gateway
const statusMap: Record<BotState, StatusMessage['status']> = {
'idle': 'connecting',
'launching': 'connecting',
'navigating': 'connecting',
'in_lobby': 'in_lobby',
'in_meeting': 'joined',
'leaving': 'left',
'error': 'error',
'disconnected': 'left',
};
this._sendStatus(statusMap[state], message);
}
/**
* Take a screenshot for debugging.
* Logs screenshot as base64 for easy viewing from Azure logs.
*/
private async _takeScreenshot(name: string): Promise<void> {
if (!config.screenshotOnError || !this._page) {
return;
}
try {
// Save to file
const screenshotDir = config.screenshotDir;
if (!fs.existsSync(screenshotDir)) {
fs.mkdirSync(screenshotDir, { recursive: true });
}
const filename = `${this._sessionId}-${name}-${Date.now()}.png`;
const filepath = path.join(screenshotDir, filename);
const buffer = await this._page.screenshot({ fullPage: true });
fs.writeFileSync(filepath, buffer);
this._logger.info(`Screenshot saved: ${filepath}`);
// Also log as base64 for Azure logs (truncated for readability)
const base64 = buffer.toString('base64');
this._logger.info(`SCREENSHOT_BASE64_START:${name}`);
// Log in chunks to avoid log line limits
const chunkSize = 50000;
for (let i = 0; i < base64.length; i += chunkSize) {
this._logger.info(`SCREENSHOT_CHUNK:${base64.substring(i, i + chunkSize)}`);
}
this._logger.info(`SCREENSHOT_BASE64_END:${name}`);
} catch (error) {
this._logger.error('Error taking screenshot:', error);
}
}
}