feat: authenticated join flow, audio capture, camera activation, transferMode support
Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
parent
35dd781d90
commit
04abaf9402
6 changed files with 505 additions and 38 deletions
183
src/bot/audioCaptureProcedure.ts
Normal file
183
src/bot/audioCaptureProcedure.ts
Normal file
|
|
@ -0,0 +1,183 @@
|
||||||
|
import { Page } from 'playwright';
|
||||||
|
import { Logger } from 'winston';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Captures incoming meeting audio by intercepting WebRTC RTCPeerConnection.
|
||||||
|
*
|
||||||
|
* How it works:
|
||||||
|
* 1. Before page navigation, wraps window.RTCPeerConnection via addInitScript
|
||||||
|
* 2. When Teams establishes WebRTC connections, the wrapper intercepts incoming audio tracks
|
||||||
|
* 3. Incoming audio tracks are captured via AudioContext + ScriptProcessorNode
|
||||||
|
* 4. Audio chunks (PCM16, 16kHz mono) are buffered and exposed via a global callback
|
||||||
|
* 5. The Node.js side polls for chunks and sends them to the Gateway
|
||||||
|
*/
|
||||||
|
export class AudioCaptureProcedure {
|
||||||
|
private _page: Page;
|
||||||
|
private _logger: Logger;
|
||||||
|
private _onAudioChunk: (base64Data: string, sampleRate: number) => void;
|
||||||
|
private _isCapturing: boolean = false;
|
||||||
|
private _pollInterval: ReturnType<typeof setInterval> | null = null;
|
||||||
|
private _injected: boolean = false;
|
||||||
|
|
||||||
|
constructor(
|
||||||
|
page: Page,
|
||||||
|
logger: Logger,
|
||||||
|
onAudioChunk: (base64Data: string, sampleRate: number) => void,
|
||||||
|
) {
|
||||||
|
this._page = page;
|
||||||
|
this._logger = logger;
|
||||||
|
this._onAudioChunk = onAudioChunk;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Inject the RTCPeerConnection wrapper BEFORE any page navigation.
|
||||||
|
* Must be called before navigating to Teams.
|
||||||
|
*/
|
||||||
|
async injectCaptureOverride(): Promise<void> {
|
||||||
|
if (this._injected) return;
|
||||||
|
|
||||||
|
this._logger.info('[AudioCapture] Injecting RTCPeerConnection wrapper...');
|
||||||
|
|
||||||
|
await this._page.addInitScript(() => {
|
||||||
|
// Audio chunk buffer — Node.js polls this periodically
|
||||||
|
(window as any).__audioCaptureChunks = [] as string[];
|
||||||
|
(window as any).__audioCaptureActive = false;
|
||||||
|
|
||||||
|
const OrigRTC = window.RTCPeerConnection;
|
||||||
|
|
||||||
|
// @ts-ignore — wrapping constructor
|
||||||
|
window.RTCPeerConnection = function (this: RTCPeerConnection, ...args: any[]) {
|
||||||
|
const pc = new OrigRTC(...args);
|
||||||
|
|
||||||
|
pc.addEventListener('track', (event: RTCTrackEvent) => {
|
||||||
|
if (event.track.kind !== 'audio') return;
|
||||||
|
if ((window as any).__audioCaptureActive) return;
|
||||||
|
(window as any).__audioCaptureActive = true;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const AudioCtx = window.AudioContext || (window as any).webkitAudioContext;
|
||||||
|
const ctx = new AudioCtx({ sampleRate: 16000 });
|
||||||
|
const stream = new MediaStream([event.track]);
|
||||||
|
const source = ctx.createMediaStreamSource(stream);
|
||||||
|
|
||||||
|
// ScriptProcessor for raw PCM access (deprecated but widely supported)
|
||||||
|
const processor = ctx.createScriptProcessor(4096, 1, 1);
|
||||||
|
let chunkBuffer: Float32Array[] = [];
|
||||||
|
let samplesCollected = 0;
|
||||||
|
const samplesPerChunk = 16000; // 1 second of audio at 16kHz
|
||||||
|
|
||||||
|
processor.onaudioprocess = (e: AudioProcessingEvent) => {
|
||||||
|
const input = e.inputBuffer.getChannelData(0);
|
||||||
|
chunkBuffer.push(new Float32Array(input));
|
||||||
|
samplesCollected += input.length;
|
||||||
|
|
||||||
|
if (samplesCollected >= samplesPerChunk) {
|
||||||
|
// Merge buffers into one Float32Array
|
||||||
|
const merged = new Float32Array(samplesCollected);
|
||||||
|
let offset = 0;
|
||||||
|
for (const buf of chunkBuffer) {
|
||||||
|
merged.set(buf, offset);
|
||||||
|
offset += buf.length;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert Float32 [-1,1] to PCM16 Int16
|
||||||
|
const pcm16 = new Int16Array(merged.length);
|
||||||
|
for (let i = 0; i < merged.length; i++) {
|
||||||
|
const s = Math.max(-1, Math.min(1, merged[i]));
|
||||||
|
pcm16[i] = s < 0 ? s * 0x8000 : s * 0x7FFF;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert to base64
|
||||||
|
const bytes = new Uint8Array(pcm16.buffer);
|
||||||
|
let binary = '';
|
||||||
|
for (let i = 0; i < bytes.length; i++) {
|
||||||
|
binary += String.fromCharCode(bytes[i]);
|
||||||
|
}
|
||||||
|
const base64 = btoa(binary);
|
||||||
|
|
||||||
|
// Push to buffer for Node.js to poll
|
||||||
|
const chunks = (window as any).__audioCaptureChunks as string[];
|
||||||
|
if (chunks.length < 30) {
|
||||||
|
chunks.push(base64);
|
||||||
|
}
|
||||||
|
|
||||||
|
chunkBuffer = [];
|
||||||
|
samplesCollected = 0;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
source.connect(processor);
|
||||||
|
processor.connect(ctx.destination);
|
||||||
|
|
||||||
|
// Store references for cleanup
|
||||||
|
(window as any).__audioCaptureCtx = ctx;
|
||||||
|
(window as any).__audioCaptureProcessor = processor;
|
||||||
|
|
||||||
|
console.log('[AudioCapture] WebRTC audio track intercepted, capturing at 16kHz mono');
|
||||||
|
} catch (err) {
|
||||||
|
console.error('[AudioCapture] Failed to set up audio capture:', err);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
return pc;
|
||||||
|
} as any;
|
||||||
|
|
||||||
|
// Copy static properties
|
||||||
|
window.RTCPeerConnection.prototype = OrigRTC.prototype;
|
||||||
|
Object.setPrototypeOf(window.RTCPeerConnection, OrigRTC);
|
||||||
|
});
|
||||||
|
|
||||||
|
this._injected = true;
|
||||||
|
this._logger.info('[AudioCapture] RTCPeerConnection wrapper injected');
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Start polling for captured audio chunks and forwarding them to the callback.
|
||||||
|
*/
|
||||||
|
async startCapture(): Promise<void> {
|
||||||
|
if (this._isCapturing) return;
|
||||||
|
this._isCapturing = true;
|
||||||
|
|
||||||
|
this._logger.info('[AudioCapture] Starting audio chunk polling...');
|
||||||
|
|
||||||
|
this._pollInterval = setInterval(async () => {
|
||||||
|
try {
|
||||||
|
const chunks = await this._page.evaluate(() => {
|
||||||
|
const buf = (window as any).__audioCaptureChunks as string[];
|
||||||
|
const result = buf.splice(0, buf.length);
|
||||||
|
return result;
|
||||||
|
});
|
||||||
|
|
||||||
|
for (const chunk of chunks) {
|
||||||
|
this._onAudioChunk(chunk, 16000);
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// Page might be navigating or closed
|
||||||
|
}
|
||||||
|
}, 500);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Stop capturing audio.
|
||||||
|
*/
|
||||||
|
async stopCapture(): Promise<void> {
|
||||||
|
this._isCapturing = false;
|
||||||
|
|
||||||
|
if (this._pollInterval) {
|
||||||
|
clearInterval(this._pollInterval);
|
||||||
|
this._pollInterval = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
await this._page.evaluate(() => {
|
||||||
|
(window as any).__audioCaptureActive = false;
|
||||||
|
const ctx = (window as any).__audioCaptureCtx as AudioContext;
|
||||||
|
if (ctx) ctx.close();
|
||||||
|
});
|
||||||
|
} catch {
|
||||||
|
// Page might already be closed
|
||||||
|
}
|
||||||
|
|
||||||
|
this._logger.info('[AudioCapture] Audio capture stopped');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -7,11 +7,13 @@ import WebSocket from 'ws';
|
||||||
|
|
||||||
import { config } from '../config';
|
import { config } from '../config';
|
||||||
import { createSessionLogger } from '../utils/logger';
|
import { createSessionLogger } from '../utils/logger';
|
||||||
import { BotSession, BotState, TranscriptEntry, StatusMessage, TranscriptMessage, PlayAudioMessage, ChatMessage, SendChatMessage } from '../types';
|
import { BotSession, BotState, TranscriptEntry, StatusMessage, TranscriptMessage, PlayAudioMessage, ChatMessage, SendChatMessage, AudioChunkMessage } from '../types';
|
||||||
import { JoinProcedure } from './joinProcedure';
|
import { JoinProcedure } from './joinProcedure';
|
||||||
import { CaptionsProcedure } from './captionsProcedure';
|
import { CaptionsProcedure } from './captionsProcedure';
|
||||||
import { AudioProcedure } from './audioProcedure';
|
import { AudioProcedure } from './audioProcedure';
|
||||||
|
import { AudioCaptureProcedure } from './audioCaptureProcedure';
|
||||||
import { ChatProcedure, ChatMessageEntry } from './chatProcedure';
|
import { ChatProcedure, ChatMessageEntry } from './chatProcedure';
|
||||||
|
import { AuthProcedure } from './authProcedure';
|
||||||
import { isValidMeetingUrl } from './meetingUrlParser';
|
import { isValidMeetingUrl } from './meetingUrlParser';
|
||||||
|
|
||||||
export interface OrchestratorCallbacks {
|
export interface OrchestratorCallbacks {
|
||||||
|
|
@ -26,7 +28,7 @@ export interface OrchestratorOptions {
|
||||||
language?: string;
|
language?: string;
|
||||||
botAccountEmail?: string;
|
botAccountEmail?: string;
|
||||||
botAccountPassword?: string;
|
botAccountPassword?: string;
|
||||||
backgroundImageUrl?: string;
|
transferMode?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -57,6 +59,7 @@ export class BotOrchestrator {
|
||||||
private _joinProcedure: JoinProcedure | null = null;
|
private _joinProcedure: JoinProcedure | null = null;
|
||||||
private _captionsProcedure: CaptionsProcedure | null = null;
|
private _captionsProcedure: CaptionsProcedure | null = null;
|
||||||
private _audioProcedure: AudioProcedure | null = null;
|
private _audioProcedure: AudioProcedure | null = null;
|
||||||
|
private _audioCaptureProcedure: AudioCaptureProcedure | null = null;
|
||||||
private _chatProcedure: ChatProcedure | null = null;
|
private _chatProcedure: ChatProcedure | null = null;
|
||||||
|
|
||||||
private _state: BotState = 'idle';
|
private _state: BotState = 'idle';
|
||||||
|
|
@ -87,10 +90,7 @@ export class BotOrchestrator {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Start the bot - connect to Gateway, launch browser, join meeting, enable captions.
|
* Start the bot - connect to Gateway, launch browser, join meeting, enable captions.
|
||||||
*
|
* Chooses between anonymous join and authenticated join based on credentials.
|
||||||
* NOTE: Authentication is disabled. The bot always joins as an anonymous guest
|
|
||||||
* with the configured bot name (typically the system bot's display name, e.g. "Nyla Larsson").
|
|
||||||
* See Teamsbot-Auth-Join-Learnings.md for details on why and how to re-enable.
|
|
||||||
*/
|
*/
|
||||||
async start(): Promise<void> {
|
async start(): Promise<void> {
|
||||||
if (!isValidMeetingUrl(this._meetingUrl)) {
|
if (!isValidMeetingUrl(this._meetingUrl)) {
|
||||||
|
|
@ -103,8 +103,15 @@ export class BotOrchestrator {
|
||||||
// Connect to Gateway WebSocket first
|
// Connect to Gateway WebSocket first
|
||||||
await this._connectToGateway();
|
await this._connectToGateway();
|
||||||
|
|
||||||
// Join meeting as anonymous guest with configured bot name
|
// Choose join method based on credentials
|
||||||
|
const hasCredentials = !!(this._options.botAccountEmail && this._options.botAccountPassword);
|
||||||
|
if (hasCredentials) {
|
||||||
|
this._logger.info(`Authenticated join as: ${this._options.botAccountEmail}`);
|
||||||
|
await this._attemptAuthJoin();
|
||||||
|
} else {
|
||||||
|
this._logger.info('Anonymous join with bot name: ' + this._botName);
|
||||||
await this._attemptJoin();
|
await this._attemptJoin();
|
||||||
|
}
|
||||||
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
this._logger.error('Error starting bot:', error);
|
this._logger.error('Error starting bot:', error);
|
||||||
|
|
@ -116,11 +123,6 @@ export class BotOrchestrator {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Join a meeting as anonymous guest with the configured bot name.
|
* Join a meeting as anonymous guest with the configured bot name.
|
||||||
*
|
|
||||||
* NOTE: Authentication is disabled. See Teamsbot-Auth-Join-Learnings.md.
|
|
||||||
* The bot name (e.g. "Nyla Larsson") comes from the system bot's display name,
|
|
||||||
* configured in the Gateway. This provides a consistent identity without
|
|
||||||
* requiring Microsoft authentication.
|
|
||||||
*/
|
*/
|
||||||
private async _attemptJoin(): Promise<void> {
|
private async _attemptJoin(): Promise<void> {
|
||||||
// Launch browser
|
// Launch browser
|
||||||
|
|
@ -150,16 +152,197 @@ export class BotOrchestrator {
|
||||||
// Dismiss any post-join permission modals (e.g. "Manage windows on all displays")
|
// Dismiss any post-join permission modals (e.g. "Manage windows on all displays")
|
||||||
await this._joinProcedure!.dismissBrowserPermissionModals();
|
await this._joinProcedure!.dismissBrowserPermissionModals();
|
||||||
|
|
||||||
// Initialize audio
|
// Initialize audio playback
|
||||||
await this._audioProcedure!.initialize();
|
await this._audioProcedure!.initialize();
|
||||||
|
|
||||||
// Enable and subscribe to captions
|
// Enable transcript capture (captions or audio based on transferMode)
|
||||||
await this._enableCaptions();
|
await this._enableTranscriptCapture();
|
||||||
|
|
||||||
// Enable chat monitoring
|
// Enable chat monitoring
|
||||||
await this._enableChat();
|
await this._enableChat();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Join a meeting as authenticated user (System Bot or User Account).
|
||||||
|
* Flow: teams.microsoft.com → MS Login → Teams Chat → Join → Pre-Join → Join now
|
||||||
|
*/
|
||||||
|
private async _attemptAuthJoin(): Promise<void> {
|
||||||
|
// Launch browser in headful mode with minimal args (Chromium Minimal)
|
||||||
|
await this._launchBrowser(true);
|
||||||
|
|
||||||
|
this._setState('navigating');
|
||||||
|
|
||||||
|
// STEP 1: Navigate to teams.microsoft.com
|
||||||
|
this._logger.info('Auth join: navigating to teams.microsoft.com');
|
||||||
|
await this._page!.goto('https://teams.microsoft.com', {
|
||||||
|
waitUntil: 'domcontentloaded',
|
||||||
|
timeout: 30000,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Wait for login redirect
|
||||||
|
try {
|
||||||
|
await this._page!.waitForURL('**/login.microsoftonline.com/**', { timeout: 30000 });
|
||||||
|
this._logger.info('Redirected to MS login page');
|
||||||
|
} catch {
|
||||||
|
this._logger.warn(`No login redirect, current URL: ${this._page!.url().substring(0, 150)}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait for login page to render
|
||||||
|
try {
|
||||||
|
await this._page!.waitForSelector('input[name="loginfmt"], input[type="email"]', {
|
||||||
|
timeout: 15000, state: 'visible',
|
||||||
|
});
|
||||||
|
} catch {
|
||||||
|
this._logger.warn('Login page elements not found');
|
||||||
|
}
|
||||||
|
|
||||||
|
// STEP 2: Microsoft Authentication
|
||||||
|
this._logger.info(`Authenticating as ${this._options.botAccountEmail}`);
|
||||||
|
const authProcedure = new AuthProcedure(this._page!, this._logger);
|
||||||
|
const authSuccess = await authProcedure.authenticateWithMicrosoft(
|
||||||
|
this._options.botAccountEmail!,
|
||||||
|
this._options.botAccountPassword!,
|
||||||
|
true,
|
||||||
|
);
|
||||||
|
|
||||||
|
if (!authSuccess) {
|
||||||
|
throw new Error('Microsoft authentication failed');
|
||||||
|
}
|
||||||
|
this._logger.info('Authentication successful');
|
||||||
|
|
||||||
|
// STEP 3: Wait for Teams chat page (landing page after auth)
|
||||||
|
try {
|
||||||
|
await this._page!.waitForURL('**/teams.microsoft.com/**', { timeout: 30000 });
|
||||||
|
} catch {
|
||||||
|
// Also accept teams.cloud.microsoft
|
||||||
|
try {
|
||||||
|
await this._page!.waitForURL('**/teams.cloud.microsoft/**', { timeout: 10000 });
|
||||||
|
} catch {
|
||||||
|
this._logger.warn(`Unexpected URL after auth: ${this._page!.url().substring(0, 150)}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait for "Join" button in chat header
|
||||||
|
try {
|
||||||
|
await this._page!.waitForSelector(
|
||||||
|
'button[data-tid="chat-join-button"], button[data-tid="join-call-button"]',
|
||||||
|
{ timeout: 30000, state: 'visible' },
|
||||||
|
);
|
||||||
|
this._logger.info('Teams chat page loaded, "Join" button found');
|
||||||
|
} catch {
|
||||||
|
this._logger.warn('"Join" button not found in chat header');
|
||||||
|
await this._takeScreenshot('auth-no-join-button');
|
||||||
|
}
|
||||||
|
|
||||||
|
// STEP 4: Click "Join" in chat header → Pre-Join screen
|
||||||
|
this._logger.info('Clicking "Join" in chat header');
|
||||||
|
const chatJoinSelectors = [
|
||||||
|
'button[data-tid="chat-join-button"]',
|
||||||
|
'button[data-tid="join-call-button"]',
|
||||||
|
];
|
||||||
|
|
||||||
|
let chatJoinClicked = false;
|
||||||
|
for (const selector of chatJoinSelectors) {
|
||||||
|
try {
|
||||||
|
const btn = await this._page!.waitForSelector(selector, { timeout: 5000, state: 'visible' });
|
||||||
|
if (btn) {
|
||||||
|
await btn.click();
|
||||||
|
chatJoinClicked = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} catch { /* try next */ }
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!chatJoinClicked) {
|
||||||
|
throw new Error('"Join" button in chat header not found');
|
||||||
|
}
|
||||||
|
|
||||||
|
// STEP 5: Pre-Join screen → Click "Join now"
|
||||||
|
this._logger.info('Waiting for pre-join screen');
|
||||||
|
try {
|
||||||
|
await this._page!.waitForSelector(
|
||||||
|
'button:has-text("Join now"), button:has-text("Jetzt teilnehmen"), button[data-tid="prejoin-join-button"]',
|
||||||
|
{ timeout: 30000, state: 'visible' },
|
||||||
|
);
|
||||||
|
} catch {
|
||||||
|
this._logger.warn('"Join now" button not found');
|
||||||
|
await this._takeScreenshot('auth-no-join-now');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Activate camera toggle if it's off (so background image is visible)
|
||||||
|
await this._ensureCameraOn();
|
||||||
|
|
||||||
|
await this._page!.waitForTimeout(2000);
|
||||||
|
|
||||||
|
const joinNowSelectors = [
|
||||||
|
'button:has-text("Join now")',
|
||||||
|
'button:has-text("Jetzt teilnehmen")',
|
||||||
|
'button[data-tid="prejoin-join-button"]',
|
||||||
|
];
|
||||||
|
|
||||||
|
let joinNowClicked = false;
|
||||||
|
for (const selector of joinNowSelectors) {
|
||||||
|
try {
|
||||||
|
const btn = await this._page!.waitForSelector(selector, { timeout: 5000, state: 'visible' });
|
||||||
|
if (btn) {
|
||||||
|
await btn.click();
|
||||||
|
joinNowClicked = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} catch { /* try next */ }
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!joinNowClicked) {
|
||||||
|
throw new Error('"Join now" button not found on pre-join screen');
|
||||||
|
}
|
||||||
|
|
||||||
|
this._logger.info('Clicked "Join now", waiting for meeting');
|
||||||
|
|
||||||
|
// Wait for meeting admission (hangup button = in meeting)
|
||||||
|
await this._waitForMeetingAdmission();
|
||||||
|
|
||||||
|
this._setState('in_meeting');
|
||||||
|
this._logger.info(`Bot joined the meeting (authenticated as ${this._options.botAccountEmail})`);
|
||||||
|
|
||||||
|
// Initialize audio playback
|
||||||
|
await this._audioProcedure!.initialize();
|
||||||
|
|
||||||
|
// Enable transcript capture (captions or audio based on transferMode)
|
||||||
|
await this._enableTranscriptCapture();
|
||||||
|
await this._enableChat();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Ensure the camera is turned on in the pre-join screen.
|
||||||
|
* When camera is on, Teams shows the profile/background image.
|
||||||
|
*/
|
||||||
|
private async _ensureCameraOn(): Promise<void> {
|
||||||
|
try {
|
||||||
|
// Look for camera toggle button
|
||||||
|
const cameraBtn = await this._page!.$('button[data-tid="toggle-video"], button[aria-label*="camera" i], button[aria-label*="Camera" i], button[aria-label*="Video" i]');
|
||||||
|
if (cameraBtn) {
|
||||||
|
// Check if camera is currently off (aria-pressed="false" or similar)
|
||||||
|
const isOff = await cameraBtn.evaluate((el) => {
|
||||||
|
return el.getAttribute('aria-pressed') === 'false' ||
|
||||||
|
el.getAttribute('aria-checked') === 'false' ||
|
||||||
|
el.classList.contains('is-off') ||
|
||||||
|
el.querySelector('.fui-Icon-regular') !== null;
|
||||||
|
});
|
||||||
|
if (isOff) {
|
||||||
|
await cameraBtn.click();
|
||||||
|
this._logger.info('Camera toggled ON');
|
||||||
|
await this._page!.waitForTimeout(1000);
|
||||||
|
} else {
|
||||||
|
this._logger.info('Camera already ON');
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
this._logger.warn('Camera toggle button not found');
|
||||||
|
}
|
||||||
|
} catch (err) {
|
||||||
|
this._logger.warn('Could not toggle camera:', err);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Connect to the Gateway WebSocket for this session.
|
* Connect to the Gateway WebSocket for this session.
|
||||||
*/
|
*/
|
||||||
|
|
@ -288,6 +471,9 @@ export class BotOrchestrator {
|
||||||
url = `${this._httpBaseUrl}/bot/transcript/${this._sessionId}`;
|
url = `${this._httpBaseUrl}/bot/transcript/${this._sessionId}`;
|
||||||
} else if (msgType === 'status') {
|
} else if (msgType === 'status') {
|
||||||
url = `${this._httpBaseUrl}/bot/status/${this._sessionId}`;
|
url = `${this._httpBaseUrl}/bot/status/${this._sessionId}`;
|
||||||
|
} else if (msgType === 'audioChunk') {
|
||||||
|
// Audio chunks are too frequent for HTTP — only send via WebSocket
|
||||||
|
return;
|
||||||
} else {
|
} else {
|
||||||
this._logger.debug(`HTTP fallback: unsupported message type ${msgType}`);
|
this._logger.debug(`HTTP fallback: unsupported message type ${msgType}`);
|
||||||
return;
|
return;
|
||||||
|
|
@ -351,6 +537,11 @@ export class BotOrchestrator {
|
||||||
try {
|
try {
|
||||||
this._setState('leaving');
|
this._setState('leaving');
|
||||||
|
|
||||||
|
// Stop audio capture
|
||||||
|
if (this._audioCaptureProcedure) {
|
||||||
|
await this._audioCaptureProcedure.stopCapture();
|
||||||
|
}
|
||||||
|
|
||||||
// Unsubscribe from captions and chat
|
// Unsubscribe from captions and chat
|
||||||
if (this._captionsProcedure) {
|
if (this._captionsProcedure) {
|
||||||
await this._captionsProcedure.unsubscribe();
|
await this._captionsProcedure.unsubscribe();
|
||||||
|
|
@ -359,7 +550,7 @@ export class BotOrchestrator {
|
||||||
await this._chatProcedure.unsubscribe();
|
await this._chatProcedure.unsubscribe();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Clean up audio
|
// Clean up audio playback
|
||||||
if (this._audioProcedure) {
|
if (this._audioProcedure) {
|
||||||
await this._audioProcedure.cleanup();
|
await this._audioProcedure.cleanup();
|
||||||
}
|
}
|
||||||
|
|
@ -403,20 +594,30 @@ export class BotOrchestrator {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Launch the browser and create a new page.
|
* Launch the browser and create a new page.
|
||||||
|
* @param authMode - If true, use headful + minimal args (Chromium Minimal, proven to work for auth)
|
||||||
*/
|
*/
|
||||||
private async _launchBrowser(): Promise<void> {
|
private async _launchBrowser(authMode: boolean = false): Promise<void> {
|
||||||
this._logger.info('Launching browser...');
|
this._logger.info(`Launching browser (authMode=${authMode})...`);
|
||||||
|
|
||||||
this._browser = await chromium.launch({
|
const args = authMode
|
||||||
headless: config.botHeadless,
|
? [
|
||||||
args: [
|
// Chromium Minimal: only --no-sandbox + fake media (proven to work for authenticated join)
|
||||||
'--use-fake-ui-for-media-stream', // Auto-accept media permissions
|
'--no-sandbox',
|
||||||
'--use-fake-device-for-media-stream', // Provide fake camera/mic so Teams sees devices
|
'--use-fake-ui-for-media-stream',
|
||||||
|
'--use-fake-device-for-media-stream',
|
||||||
|
]
|
||||||
|
: [
|
||||||
|
'--use-fake-ui-for-media-stream',
|
||||||
|
'--use-fake-device-for-media-stream',
|
||||||
'--disable-web-security',
|
'--disable-web-security',
|
||||||
'--disable-features=IsolateOrigins,site-per-process',
|
'--disable-features=IsolateOrigins,site-per-process',
|
||||||
'--autoplay-policy=no-user-gesture-required',
|
'--autoplay-policy=no-user-gesture-required',
|
||||||
'--disable-blink-features=AutomationControlled', // Prevent navigator.webdriver=true
|
'--disable-blink-features=AutomationControlled',
|
||||||
],
|
];
|
||||||
|
|
||||||
|
this._browser = await chromium.launch({
|
||||||
|
headless: authMode ? false : config.botHeadless,
|
||||||
|
args,
|
||||||
});
|
});
|
||||||
|
|
||||||
this._context = await this._browser.newContext({
|
this._context = await this._browser.newContext({
|
||||||
|
|
@ -463,8 +664,15 @@ export class BotOrchestrator {
|
||||||
if (!window.chrome.runtime) { window.chrome.runtime = {}; }
|
if (!window.chrome.runtime) { window.chrome.runtime = {}; }
|
||||||
});
|
});
|
||||||
|
|
||||||
// Initialize procedures (always anonymous join)
|
// Initialize procedures
|
||||||
this._joinProcedure = new JoinProcedure(this._page, this._logger, this._botName);
|
this._joinProcedure = new JoinProcedure(this._page, this._logger, this._botName);
|
||||||
|
this._audioCaptureProcedure = new AudioCaptureProcedure(
|
||||||
|
this._page,
|
||||||
|
this._logger,
|
||||||
|
(base64Data, sampleRate) => {
|
||||||
|
this._sendAudioChunk(base64Data, sampleRate);
|
||||||
|
},
|
||||||
|
);
|
||||||
this._captionsProcedure = new CaptionsProcedure(
|
this._captionsProcedure = new CaptionsProcedure(
|
||||||
this._page,
|
this._page,
|
||||||
this._logger,
|
this._logger,
|
||||||
|
|
@ -496,6 +704,12 @@ export class BotOrchestrator {
|
||||||
// This ensures Teams gets our controlled audio stream when it calls getUserMedia
|
// This ensures Teams gets our controlled audio stream when it calls getUserMedia
|
||||||
await this._audioProcedure.injectAudioOverride();
|
await this._audioProcedure.injectAudioOverride();
|
||||||
|
|
||||||
|
// Inject audio capture (WebRTC interception) if transfer mode requires it
|
||||||
|
const transferMode = this._getEffectiveTransferMode();
|
||||||
|
if (transferMode === 'audio') {
|
||||||
|
await this._audioCaptureProcedure!.injectCaptureOverride();
|
||||||
|
}
|
||||||
|
|
||||||
// Handle page errors
|
// Handle page errors
|
||||||
this._page.on('pageerror', (error) => {
|
this._page.on('pageerror', (error) => {
|
||||||
this._logger.error('Page error:', error);
|
this._logger.error('Page error:', error);
|
||||||
|
|
@ -584,6 +798,19 @@ export class BotOrchestrator {
|
||||||
throw new Error('Timeout waiting to be admitted from lobby');
|
throw new Error('Timeout waiting to be admitted from lobby');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Determine the effective transfer mode based on config and join mode.
|
||||||
|
* auto: anonymous → audio, authenticated → caption
|
||||||
|
*/
|
||||||
|
private _getEffectiveTransferMode(): 'caption' | 'audio' {
|
||||||
|
const mode = this._options.transferMode || 'auto';
|
||||||
|
if (mode === 'caption') return 'caption';
|
||||||
|
if (mode === 'audio') return 'audio';
|
||||||
|
// auto: use audio for anonymous (Teams only provides English captions), caption for auth
|
||||||
|
const isAuth = !!(this._options.botAccountEmail && this._options.botAccountPassword);
|
||||||
|
return isAuth ? 'caption' : 'audio';
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Enable captions and start scraping.
|
* Enable captions and start scraping.
|
||||||
*/
|
*/
|
||||||
|
|
@ -594,7 +821,36 @@ export class BotOrchestrator {
|
||||||
this._logger.info('Captions enabled and subscribed');
|
this._logger.info('Captions enabled and subscribed');
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
this._logger.warn('Could not enable captions:', error);
|
this._logger.warn('Could not enable captions:', error);
|
||||||
// Continue without captions - not a fatal error
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Enable audio capture from meeting participants.
|
||||||
|
*/
|
||||||
|
private async _enableAudioCapture(): Promise<void> {
|
||||||
|
if (!this._audioCaptureProcedure) {
|
||||||
|
this._logger.warn('Audio capture procedure not initialized');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
await this._audioCaptureProcedure.startCapture();
|
||||||
|
this._logger.info('Audio capture started (PCM16 16kHz mono)');
|
||||||
|
} catch (error) {
|
||||||
|
this._logger.warn('Could not start audio capture:', error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Enable transcript capture (captions or audio) based on transfer mode.
|
||||||
|
*/
|
||||||
|
private async _enableTranscriptCapture(): Promise<void> {
|
||||||
|
const transferMode = this._getEffectiveTransferMode();
|
||||||
|
this._logger.info(`Transfer mode: ${transferMode} (configured: ${this._options.transferMode || 'auto'})`);
|
||||||
|
|
||||||
|
if (transferMode === 'caption') {
|
||||||
|
await this._enableCaptions();
|
||||||
|
} else {
|
||||||
|
await this._enableAudioCapture();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -628,6 +884,23 @@ export class BotOrchestrator {
|
||||||
this._sendToGateway(message);
|
this._sendToGateway(message);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Send an audio chunk to the Gateway for STT processing.
|
||||||
|
*/
|
||||||
|
private _sendAudioChunk(base64Data: string, sampleRate: number): void {
|
||||||
|
const message: AudioChunkMessage = {
|
||||||
|
type: 'audioChunk',
|
||||||
|
sessionId: this._sessionId,
|
||||||
|
audio: {
|
||||||
|
format: 'pcm16',
|
||||||
|
sampleRate,
|
||||||
|
data: base64Data,
|
||||||
|
timestamp: new Date().toISOString(),
|
||||||
|
},
|
||||||
|
};
|
||||||
|
this._sendToGateway(message);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Send a text message to the meeting chat.
|
* Send a text message to the meeting chat.
|
||||||
*/
|
*/
|
||||||
|
|
|
||||||
|
|
@ -19,8 +19,8 @@ async function main(): Promise<void> {
|
||||||
|
|
||||||
// Start HTTP server
|
// Start HTTP server
|
||||||
httpServer = new HttpServer({
|
httpServer = new HttpServer({
|
||||||
onJoinRequest: async (sessionId, meetingUrl, botName, instanceId, gatewayWsUrl, language, botAccountEmail, botAccountPassword, backgroundImageUrl) => {
|
onJoinRequest: async (sessionId, meetingUrl, botName, instanceId, gatewayWsUrl, language, botAccountEmail, botAccountPassword, transferMode) => {
|
||||||
await sessionManager.createSession(sessionId, meetingUrl, botName, instanceId, gatewayWsUrl, language, botAccountEmail, botAccountPassword, backgroundImageUrl);
|
await sessionManager.createSession(sessionId, meetingUrl, botName, instanceId, gatewayWsUrl, language, botAccountEmail, botAccountPassword, transferMode);
|
||||||
},
|
},
|
||||||
onLeaveRequest: async (sessionId) => {
|
onLeaveRequest: async (sessionId) => {
|
||||||
await sessionManager.endSession(sessionId);
|
await sessionManager.endSession(sessionId);
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,7 @@ import { config } from '../config';
|
||||||
import { runAuthTests, runSingleVariant, getVariantIds } from '../bot/authTestProcedure';
|
import { runAuthTests, runSingleVariant, getVariantIds } from '../bot/authTestProcedure';
|
||||||
|
|
||||||
export interface HttpServerCallbacks {
|
export interface HttpServerCallbacks {
|
||||||
onJoinRequest: (sessionId: string, meetingUrl: string, botName?: string, instanceId?: string, gatewayWsUrl?: string, language?: string, botAccountEmail?: string, botAccountPassword?: string, backgroundImageUrl?: string) => Promise<void>;
|
onJoinRequest: (sessionId: string, meetingUrl: string, botName?: string, instanceId?: string, gatewayWsUrl?: string, language?: string, botAccountEmail?: string, botAccountPassword?: string, transferMode?: string) => Promise<void>;
|
||||||
onLeaveRequest: (sessionId: string) => Promise<void>;
|
onLeaveRequest: (sessionId: string) => Promise<void>;
|
||||||
onStatusRequest: (sessionId: string) => { state: string; error?: string } | null;
|
onStatusRequest: (sessionId: string) => { state: string; error?: string } | null;
|
||||||
}
|
}
|
||||||
|
|
@ -78,14 +78,14 @@ export class HttpServer {
|
||||||
// Deploy a new bot
|
// Deploy a new bot
|
||||||
this._app.post('/api/bot', async (req: Request, res: Response) => {
|
this._app.post('/api/bot', async (req: Request, res: Response) => {
|
||||||
try {
|
try {
|
||||||
const { sessionId, meetingUrl, botName, instanceId, gatewayWsUrl, language, botAccountEmail, botAccountPassword, backgroundImageUrl } = req.body;
|
const { sessionId, meetingUrl, botName, instanceId, gatewayWsUrl, language, botAccountEmail, botAccountPassword, transferMode } = req.body;
|
||||||
|
|
||||||
if (!sessionId || !meetingUrl) {
|
if (!sessionId || !meetingUrl) {
|
||||||
res.status(400).json({ error: 'Missing required fields: sessionId, meetingUrl' });
|
res.status(400).json({ error: 'Missing required fields: sessionId, meetingUrl' });
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
await this._callbacks.onJoinRequest(sessionId, meetingUrl, botName, instanceId, gatewayWsUrl, language, botAccountEmail, botAccountPassword, backgroundImageUrl);
|
await this._callbacks.onJoinRequest(sessionId, meetingUrl, botName, instanceId, gatewayWsUrl, language, botAccountEmail, botAccountPassword, transferMode);
|
||||||
|
|
||||||
res.json({
|
res.json({
|
||||||
success: true,
|
success: true,
|
||||||
|
|
|
||||||
|
|
@ -41,7 +41,7 @@ export class SessionManager {
|
||||||
language?: string,
|
language?: string,
|
||||||
botAccountEmail?: string,
|
botAccountEmail?: string,
|
||||||
botAccountPassword?: string,
|
botAccountPassword?: string,
|
||||||
backgroundImageUrl?: string
|
transferMode?: string,
|
||||||
): Promise<void> {
|
): Promise<void> {
|
||||||
if (this._sessions.has(sessionId)) {
|
if (this._sessions.has(sessionId)) {
|
||||||
logger.warn(`Session ${sessionId} already exists`);
|
logger.warn(`Session ${sessionId} already exists`);
|
||||||
|
|
@ -75,7 +75,7 @@ export class SessionManager {
|
||||||
language: language,
|
language: language,
|
||||||
botAccountEmail: botAccountEmail,
|
botAccountEmail: botAccountEmail,
|
||||||
botAccountPassword: botAccountPassword,
|
botAccountPassword: botAccountPassword,
|
||||||
backgroundImageUrl: backgroundImageUrl,
|
transferMode: transferMode,
|
||||||
};
|
};
|
||||||
|
|
||||||
const orchestrator = new BotOrchestrator(
|
const orchestrator = new BotOrchestrator(
|
||||||
|
|
|
||||||
|
|
@ -55,8 +55,19 @@ export interface SendChatMessage {
|
||||||
text: string;
|
text: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export interface AudioChunkMessage {
|
||||||
|
type: 'audioChunk';
|
||||||
|
sessionId: string;
|
||||||
|
audio: {
|
||||||
|
format: 'pcm16' | 'opus';
|
||||||
|
sampleRate: number;
|
||||||
|
data: string; // base64 encoded
|
||||||
|
timestamp: string;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
export type GatewayToBot = PlayAudioMessage | JoinMeetingMessage | LeaveMeetingMessage | SendChatMessage;
|
export type GatewayToBot = PlayAudioMessage | JoinMeetingMessage | LeaveMeetingMessage | SendChatMessage;
|
||||||
export type BotToGateway = TranscriptMessage | StatusMessage | ChatMessage;
|
export type BotToGateway = TranscriptMessage | StatusMessage | ChatMessage | AudioChunkMessage;
|
||||||
|
|
||||||
// Bot State
|
// Bot State
|
||||||
export type BotState =
|
export type BotState =
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue