feat: authenticated join flow, audio capture, camera activation, transferMode support
Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
parent
35dd781d90
commit
04abaf9402
6 changed files with 505 additions and 38 deletions
183
src/bot/audioCaptureProcedure.ts
Normal file
183
src/bot/audioCaptureProcedure.ts
Normal file
|
|
@ -0,0 +1,183 @@
|
|||
import { Page } from 'playwright';
|
||||
import { Logger } from 'winston';
|
||||
|
||||
/**
|
||||
* Captures incoming meeting audio by intercepting WebRTC RTCPeerConnection.
|
||||
*
|
||||
* How it works:
|
||||
* 1. Before page navigation, wraps window.RTCPeerConnection via addInitScript
|
||||
* 2. When Teams establishes WebRTC connections, the wrapper intercepts incoming audio tracks
|
||||
* 3. Incoming audio tracks are captured via AudioContext + ScriptProcessorNode
|
||||
* 4. Audio chunks (PCM16, 16kHz mono) are buffered and exposed via a global callback
|
||||
* 5. The Node.js side polls for chunks and sends them to the Gateway
|
||||
*/
|
||||
export class AudioCaptureProcedure {
|
||||
private _page: Page;
|
||||
private _logger: Logger;
|
||||
private _onAudioChunk: (base64Data: string, sampleRate: number) => void;
|
||||
private _isCapturing: boolean = false;
|
||||
private _pollInterval: ReturnType<typeof setInterval> | null = null;
|
||||
private _injected: boolean = false;
|
||||
|
||||
constructor(
|
||||
page: Page,
|
||||
logger: Logger,
|
||||
onAudioChunk: (base64Data: string, sampleRate: number) => void,
|
||||
) {
|
||||
this._page = page;
|
||||
this._logger = logger;
|
||||
this._onAudioChunk = onAudioChunk;
|
||||
}
|
||||
|
||||
/**
|
||||
* Inject the RTCPeerConnection wrapper BEFORE any page navigation.
|
||||
* Must be called before navigating to Teams.
|
||||
*/
|
||||
async injectCaptureOverride(): Promise<void> {
|
||||
if (this._injected) return;
|
||||
|
||||
this._logger.info('[AudioCapture] Injecting RTCPeerConnection wrapper...');
|
||||
|
||||
await this._page.addInitScript(() => {
|
||||
// Audio chunk buffer — Node.js polls this periodically
|
||||
(window as any).__audioCaptureChunks = [] as string[];
|
||||
(window as any).__audioCaptureActive = false;
|
||||
|
||||
const OrigRTC = window.RTCPeerConnection;
|
||||
|
||||
// @ts-ignore — wrapping constructor
|
||||
window.RTCPeerConnection = function (this: RTCPeerConnection, ...args: any[]) {
|
||||
const pc = new OrigRTC(...args);
|
||||
|
||||
pc.addEventListener('track', (event: RTCTrackEvent) => {
|
||||
if (event.track.kind !== 'audio') return;
|
||||
if ((window as any).__audioCaptureActive) return;
|
||||
(window as any).__audioCaptureActive = true;
|
||||
|
||||
try {
|
||||
const AudioCtx = window.AudioContext || (window as any).webkitAudioContext;
|
||||
const ctx = new AudioCtx({ sampleRate: 16000 });
|
||||
const stream = new MediaStream([event.track]);
|
||||
const source = ctx.createMediaStreamSource(stream);
|
||||
|
||||
// ScriptProcessor for raw PCM access (deprecated but widely supported)
|
||||
const processor = ctx.createScriptProcessor(4096, 1, 1);
|
||||
let chunkBuffer: Float32Array[] = [];
|
||||
let samplesCollected = 0;
|
||||
const samplesPerChunk = 16000; // 1 second of audio at 16kHz
|
||||
|
||||
processor.onaudioprocess = (e: AudioProcessingEvent) => {
|
||||
const input = e.inputBuffer.getChannelData(0);
|
||||
chunkBuffer.push(new Float32Array(input));
|
||||
samplesCollected += input.length;
|
||||
|
||||
if (samplesCollected >= samplesPerChunk) {
|
||||
// Merge buffers into one Float32Array
|
||||
const merged = new Float32Array(samplesCollected);
|
||||
let offset = 0;
|
||||
for (const buf of chunkBuffer) {
|
||||
merged.set(buf, offset);
|
||||
offset += buf.length;
|
||||
}
|
||||
|
||||
// Convert Float32 [-1,1] to PCM16 Int16
|
||||
const pcm16 = new Int16Array(merged.length);
|
||||
for (let i = 0; i < merged.length; i++) {
|
||||
const s = Math.max(-1, Math.min(1, merged[i]));
|
||||
pcm16[i] = s < 0 ? s * 0x8000 : s * 0x7FFF;
|
||||
}
|
||||
|
||||
// Convert to base64
|
||||
const bytes = new Uint8Array(pcm16.buffer);
|
||||
let binary = '';
|
||||
for (let i = 0; i < bytes.length; i++) {
|
||||
binary += String.fromCharCode(bytes[i]);
|
||||
}
|
||||
const base64 = btoa(binary);
|
||||
|
||||
// Push to buffer for Node.js to poll
|
||||
const chunks = (window as any).__audioCaptureChunks as string[];
|
||||
if (chunks.length < 30) {
|
||||
chunks.push(base64);
|
||||
}
|
||||
|
||||
chunkBuffer = [];
|
||||
samplesCollected = 0;
|
||||
}
|
||||
};
|
||||
|
||||
source.connect(processor);
|
||||
processor.connect(ctx.destination);
|
||||
|
||||
// Store references for cleanup
|
||||
(window as any).__audioCaptureCtx = ctx;
|
||||
(window as any).__audioCaptureProcessor = processor;
|
||||
|
||||
console.log('[AudioCapture] WebRTC audio track intercepted, capturing at 16kHz mono');
|
||||
} catch (err) {
|
||||
console.error('[AudioCapture] Failed to set up audio capture:', err);
|
||||
}
|
||||
});
|
||||
|
||||
return pc;
|
||||
} as any;
|
||||
|
||||
// Copy static properties
|
||||
window.RTCPeerConnection.prototype = OrigRTC.prototype;
|
||||
Object.setPrototypeOf(window.RTCPeerConnection, OrigRTC);
|
||||
});
|
||||
|
||||
this._injected = true;
|
||||
this._logger.info('[AudioCapture] RTCPeerConnection wrapper injected');
|
||||
}
|
||||
|
||||
/**
|
||||
* Start polling for captured audio chunks and forwarding them to the callback.
|
||||
*/
|
||||
async startCapture(): Promise<void> {
|
||||
if (this._isCapturing) return;
|
||||
this._isCapturing = true;
|
||||
|
||||
this._logger.info('[AudioCapture] Starting audio chunk polling...');
|
||||
|
||||
this._pollInterval = setInterval(async () => {
|
||||
try {
|
||||
const chunks = await this._page.evaluate(() => {
|
||||
const buf = (window as any).__audioCaptureChunks as string[];
|
||||
const result = buf.splice(0, buf.length);
|
||||
return result;
|
||||
});
|
||||
|
||||
for (const chunk of chunks) {
|
||||
this._onAudioChunk(chunk, 16000);
|
||||
}
|
||||
} catch {
|
||||
// Page might be navigating or closed
|
||||
}
|
||||
}, 500);
|
||||
}
|
||||
|
||||
/**
|
||||
* Stop capturing audio.
|
||||
*/
|
||||
async stopCapture(): Promise<void> {
|
||||
this._isCapturing = false;
|
||||
|
||||
if (this._pollInterval) {
|
||||
clearInterval(this._pollInterval);
|
||||
this._pollInterval = null;
|
||||
}
|
||||
|
||||
try {
|
||||
await this._page.evaluate(() => {
|
||||
(window as any).__audioCaptureActive = false;
|
||||
const ctx = (window as any).__audioCaptureCtx as AudioContext;
|
||||
if (ctx) ctx.close();
|
||||
});
|
||||
} catch {
|
||||
// Page might already be closed
|
||||
}
|
||||
|
||||
this._logger.info('[AudioCapture] Audio capture stopped');
|
||||
}
|
||||
}
|
||||
|
|
@ -7,11 +7,13 @@ import WebSocket from 'ws';
|
|||
|
||||
import { config } from '../config';
|
||||
import { createSessionLogger } from '../utils/logger';
|
||||
import { BotSession, BotState, TranscriptEntry, StatusMessage, TranscriptMessage, PlayAudioMessage, ChatMessage, SendChatMessage } from '../types';
|
||||
import { BotSession, BotState, TranscriptEntry, StatusMessage, TranscriptMessage, PlayAudioMessage, ChatMessage, SendChatMessage, AudioChunkMessage } from '../types';
|
||||
import { JoinProcedure } from './joinProcedure';
|
||||
import { CaptionsProcedure } from './captionsProcedure';
|
||||
import { AudioProcedure } from './audioProcedure';
|
||||
import { AudioCaptureProcedure } from './audioCaptureProcedure';
|
||||
import { ChatProcedure, ChatMessageEntry } from './chatProcedure';
|
||||
import { AuthProcedure } from './authProcedure';
|
||||
import { isValidMeetingUrl } from './meetingUrlParser';
|
||||
|
||||
export interface OrchestratorCallbacks {
|
||||
|
|
@ -26,7 +28,7 @@ export interface OrchestratorOptions {
|
|||
language?: string;
|
||||
botAccountEmail?: string;
|
||||
botAccountPassword?: string;
|
||||
backgroundImageUrl?: string;
|
||||
transferMode?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -57,6 +59,7 @@ export class BotOrchestrator {
|
|||
private _joinProcedure: JoinProcedure | null = null;
|
||||
private _captionsProcedure: CaptionsProcedure | null = null;
|
||||
private _audioProcedure: AudioProcedure | null = null;
|
||||
private _audioCaptureProcedure: AudioCaptureProcedure | null = null;
|
||||
private _chatProcedure: ChatProcedure | null = null;
|
||||
|
||||
private _state: BotState = 'idle';
|
||||
|
|
@ -87,10 +90,7 @@ export class BotOrchestrator {
|
|||
|
||||
/**
|
||||
* Start the bot - connect to Gateway, launch browser, join meeting, enable captions.
|
||||
*
|
||||
* NOTE: Authentication is disabled. The bot always joins as an anonymous guest
|
||||
* with the configured bot name (typically the system bot's display name, e.g. "Nyla Larsson").
|
||||
* See Teamsbot-Auth-Join-Learnings.md for details on why and how to re-enable.
|
||||
* Chooses between anonymous join and authenticated join based on credentials.
|
||||
*/
|
||||
async start(): Promise<void> {
|
||||
if (!isValidMeetingUrl(this._meetingUrl)) {
|
||||
|
|
@ -103,8 +103,15 @@ export class BotOrchestrator {
|
|||
// Connect to Gateway WebSocket first
|
||||
await this._connectToGateway();
|
||||
|
||||
// Join meeting as anonymous guest with configured bot name
|
||||
// Choose join method based on credentials
|
||||
const hasCredentials = !!(this._options.botAccountEmail && this._options.botAccountPassword);
|
||||
if (hasCredentials) {
|
||||
this._logger.info(`Authenticated join as: ${this._options.botAccountEmail}`);
|
||||
await this._attemptAuthJoin();
|
||||
} else {
|
||||
this._logger.info('Anonymous join with bot name: ' + this._botName);
|
||||
await this._attemptJoin();
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
this._logger.error('Error starting bot:', error);
|
||||
|
|
@ -116,11 +123,6 @@ export class BotOrchestrator {
|
|||
|
||||
/**
|
||||
* Join a meeting as anonymous guest with the configured bot name.
|
||||
*
|
||||
* NOTE: Authentication is disabled. See Teamsbot-Auth-Join-Learnings.md.
|
||||
* The bot name (e.g. "Nyla Larsson") comes from the system bot's display name,
|
||||
* configured in the Gateway. This provides a consistent identity without
|
||||
* requiring Microsoft authentication.
|
||||
*/
|
||||
private async _attemptJoin(): Promise<void> {
|
||||
// Launch browser
|
||||
|
|
@ -150,16 +152,197 @@ export class BotOrchestrator {
|
|||
// Dismiss any post-join permission modals (e.g. "Manage windows on all displays")
|
||||
await this._joinProcedure!.dismissBrowserPermissionModals();
|
||||
|
||||
// Initialize audio
|
||||
// Initialize audio playback
|
||||
await this._audioProcedure!.initialize();
|
||||
|
||||
// Enable and subscribe to captions
|
||||
await this._enableCaptions();
|
||||
// Enable transcript capture (captions or audio based on transferMode)
|
||||
await this._enableTranscriptCapture();
|
||||
|
||||
// Enable chat monitoring
|
||||
await this._enableChat();
|
||||
}
|
||||
|
||||
/**
|
||||
* Join a meeting as authenticated user (System Bot or User Account).
|
||||
* Flow: teams.microsoft.com → MS Login → Teams Chat → Join → Pre-Join → Join now
|
||||
*/
|
||||
private async _attemptAuthJoin(): Promise<void> {
|
||||
// Launch browser in headful mode with minimal args (Chromium Minimal)
|
||||
await this._launchBrowser(true);
|
||||
|
||||
this._setState('navigating');
|
||||
|
||||
// STEP 1: Navigate to teams.microsoft.com
|
||||
this._logger.info('Auth join: navigating to teams.microsoft.com');
|
||||
await this._page!.goto('https://teams.microsoft.com', {
|
||||
waitUntil: 'domcontentloaded',
|
||||
timeout: 30000,
|
||||
});
|
||||
|
||||
// Wait for login redirect
|
||||
try {
|
||||
await this._page!.waitForURL('**/login.microsoftonline.com/**', { timeout: 30000 });
|
||||
this._logger.info('Redirected to MS login page');
|
||||
} catch {
|
||||
this._logger.warn(`No login redirect, current URL: ${this._page!.url().substring(0, 150)}`);
|
||||
}
|
||||
|
||||
// Wait for login page to render
|
||||
try {
|
||||
await this._page!.waitForSelector('input[name="loginfmt"], input[type="email"]', {
|
||||
timeout: 15000, state: 'visible',
|
||||
});
|
||||
} catch {
|
||||
this._logger.warn('Login page elements not found');
|
||||
}
|
||||
|
||||
// STEP 2: Microsoft Authentication
|
||||
this._logger.info(`Authenticating as ${this._options.botAccountEmail}`);
|
||||
const authProcedure = new AuthProcedure(this._page!, this._logger);
|
||||
const authSuccess = await authProcedure.authenticateWithMicrosoft(
|
||||
this._options.botAccountEmail!,
|
||||
this._options.botAccountPassword!,
|
||||
true,
|
||||
);
|
||||
|
||||
if (!authSuccess) {
|
||||
throw new Error('Microsoft authentication failed');
|
||||
}
|
||||
this._logger.info('Authentication successful');
|
||||
|
||||
// STEP 3: Wait for Teams chat page (landing page after auth)
|
||||
try {
|
||||
await this._page!.waitForURL('**/teams.microsoft.com/**', { timeout: 30000 });
|
||||
} catch {
|
||||
// Also accept teams.cloud.microsoft
|
||||
try {
|
||||
await this._page!.waitForURL('**/teams.cloud.microsoft/**', { timeout: 10000 });
|
||||
} catch {
|
||||
this._logger.warn(`Unexpected URL after auth: ${this._page!.url().substring(0, 150)}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Wait for "Join" button in chat header
|
||||
try {
|
||||
await this._page!.waitForSelector(
|
||||
'button[data-tid="chat-join-button"], button[data-tid="join-call-button"]',
|
||||
{ timeout: 30000, state: 'visible' },
|
||||
);
|
||||
this._logger.info('Teams chat page loaded, "Join" button found');
|
||||
} catch {
|
||||
this._logger.warn('"Join" button not found in chat header');
|
||||
await this._takeScreenshot('auth-no-join-button');
|
||||
}
|
||||
|
||||
// STEP 4: Click "Join" in chat header → Pre-Join screen
|
||||
this._logger.info('Clicking "Join" in chat header');
|
||||
const chatJoinSelectors = [
|
||||
'button[data-tid="chat-join-button"]',
|
||||
'button[data-tid="join-call-button"]',
|
||||
];
|
||||
|
||||
let chatJoinClicked = false;
|
||||
for (const selector of chatJoinSelectors) {
|
||||
try {
|
||||
const btn = await this._page!.waitForSelector(selector, { timeout: 5000, state: 'visible' });
|
||||
if (btn) {
|
||||
await btn.click();
|
||||
chatJoinClicked = true;
|
||||
break;
|
||||
}
|
||||
} catch { /* try next */ }
|
||||
}
|
||||
|
||||
if (!chatJoinClicked) {
|
||||
throw new Error('"Join" button in chat header not found');
|
||||
}
|
||||
|
||||
// STEP 5: Pre-Join screen → Click "Join now"
|
||||
this._logger.info('Waiting for pre-join screen');
|
||||
try {
|
||||
await this._page!.waitForSelector(
|
||||
'button:has-text("Join now"), button:has-text("Jetzt teilnehmen"), button[data-tid="prejoin-join-button"]',
|
||||
{ timeout: 30000, state: 'visible' },
|
||||
);
|
||||
} catch {
|
||||
this._logger.warn('"Join now" button not found');
|
||||
await this._takeScreenshot('auth-no-join-now');
|
||||
}
|
||||
|
||||
// Activate camera toggle if it's off (so background image is visible)
|
||||
await this._ensureCameraOn();
|
||||
|
||||
await this._page!.waitForTimeout(2000);
|
||||
|
||||
const joinNowSelectors = [
|
||||
'button:has-text("Join now")',
|
||||
'button:has-text("Jetzt teilnehmen")',
|
||||
'button[data-tid="prejoin-join-button"]',
|
||||
];
|
||||
|
||||
let joinNowClicked = false;
|
||||
for (const selector of joinNowSelectors) {
|
||||
try {
|
||||
const btn = await this._page!.waitForSelector(selector, { timeout: 5000, state: 'visible' });
|
||||
if (btn) {
|
||||
await btn.click();
|
||||
joinNowClicked = true;
|
||||
break;
|
||||
}
|
||||
} catch { /* try next */ }
|
||||
}
|
||||
|
||||
if (!joinNowClicked) {
|
||||
throw new Error('"Join now" button not found on pre-join screen');
|
||||
}
|
||||
|
||||
this._logger.info('Clicked "Join now", waiting for meeting');
|
||||
|
||||
// Wait for meeting admission (hangup button = in meeting)
|
||||
await this._waitForMeetingAdmission();
|
||||
|
||||
this._setState('in_meeting');
|
||||
this._logger.info(`Bot joined the meeting (authenticated as ${this._options.botAccountEmail})`);
|
||||
|
||||
// Initialize audio playback
|
||||
await this._audioProcedure!.initialize();
|
||||
|
||||
// Enable transcript capture (captions or audio based on transferMode)
|
||||
await this._enableTranscriptCapture();
|
||||
await this._enableChat();
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensure the camera is turned on in the pre-join screen.
|
||||
* When camera is on, Teams shows the profile/background image.
|
||||
*/
|
||||
private async _ensureCameraOn(): Promise<void> {
|
||||
try {
|
||||
// Look for camera toggle button
|
||||
const cameraBtn = await this._page!.$('button[data-tid="toggle-video"], button[aria-label*="camera" i], button[aria-label*="Camera" i], button[aria-label*="Video" i]');
|
||||
if (cameraBtn) {
|
||||
// Check if camera is currently off (aria-pressed="false" or similar)
|
||||
const isOff = await cameraBtn.evaluate((el) => {
|
||||
return el.getAttribute('aria-pressed') === 'false' ||
|
||||
el.getAttribute('aria-checked') === 'false' ||
|
||||
el.classList.contains('is-off') ||
|
||||
el.querySelector('.fui-Icon-regular') !== null;
|
||||
});
|
||||
if (isOff) {
|
||||
await cameraBtn.click();
|
||||
this._logger.info('Camera toggled ON');
|
||||
await this._page!.waitForTimeout(1000);
|
||||
} else {
|
||||
this._logger.info('Camera already ON');
|
||||
}
|
||||
} else {
|
||||
this._logger.warn('Camera toggle button not found');
|
||||
}
|
||||
} catch (err) {
|
||||
this._logger.warn('Could not toggle camera:', err);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Connect to the Gateway WebSocket for this session.
|
||||
*/
|
||||
|
|
@ -288,6 +471,9 @@ export class BotOrchestrator {
|
|||
url = `${this._httpBaseUrl}/bot/transcript/${this._sessionId}`;
|
||||
} else if (msgType === 'status') {
|
||||
url = `${this._httpBaseUrl}/bot/status/${this._sessionId}`;
|
||||
} else if (msgType === 'audioChunk') {
|
||||
// Audio chunks are too frequent for HTTP — only send via WebSocket
|
||||
return;
|
||||
} else {
|
||||
this._logger.debug(`HTTP fallback: unsupported message type ${msgType}`);
|
||||
return;
|
||||
|
|
@ -351,6 +537,11 @@ export class BotOrchestrator {
|
|||
try {
|
||||
this._setState('leaving');
|
||||
|
||||
// Stop audio capture
|
||||
if (this._audioCaptureProcedure) {
|
||||
await this._audioCaptureProcedure.stopCapture();
|
||||
}
|
||||
|
||||
// Unsubscribe from captions and chat
|
||||
if (this._captionsProcedure) {
|
||||
await this._captionsProcedure.unsubscribe();
|
||||
|
|
@ -359,7 +550,7 @@ export class BotOrchestrator {
|
|||
await this._chatProcedure.unsubscribe();
|
||||
}
|
||||
|
||||
// Clean up audio
|
||||
// Clean up audio playback
|
||||
if (this._audioProcedure) {
|
||||
await this._audioProcedure.cleanup();
|
||||
}
|
||||
|
|
@ -403,20 +594,30 @@ export class BotOrchestrator {
|
|||
|
||||
/**
|
||||
* Launch the browser and create a new page.
|
||||
* @param authMode - If true, use headful + minimal args (Chromium Minimal, proven to work for auth)
|
||||
*/
|
||||
private async _launchBrowser(): Promise<void> {
|
||||
this._logger.info('Launching browser...');
|
||||
private async _launchBrowser(authMode: boolean = false): Promise<void> {
|
||||
this._logger.info(`Launching browser (authMode=${authMode})...`);
|
||||
|
||||
this._browser = await chromium.launch({
|
||||
headless: config.botHeadless,
|
||||
args: [
|
||||
'--use-fake-ui-for-media-stream', // Auto-accept media permissions
|
||||
'--use-fake-device-for-media-stream', // Provide fake camera/mic so Teams sees devices
|
||||
const args = authMode
|
||||
? [
|
||||
// Chromium Minimal: only --no-sandbox + fake media (proven to work for authenticated join)
|
||||
'--no-sandbox',
|
||||
'--use-fake-ui-for-media-stream',
|
||||
'--use-fake-device-for-media-stream',
|
||||
]
|
||||
: [
|
||||
'--use-fake-ui-for-media-stream',
|
||||
'--use-fake-device-for-media-stream',
|
||||
'--disable-web-security',
|
||||
'--disable-features=IsolateOrigins,site-per-process',
|
||||
'--autoplay-policy=no-user-gesture-required',
|
||||
'--disable-blink-features=AutomationControlled', // Prevent navigator.webdriver=true
|
||||
],
|
||||
'--disable-blink-features=AutomationControlled',
|
||||
];
|
||||
|
||||
this._browser = await chromium.launch({
|
||||
headless: authMode ? false : config.botHeadless,
|
||||
args,
|
||||
});
|
||||
|
||||
this._context = await this._browser.newContext({
|
||||
|
|
@ -463,8 +664,15 @@ export class BotOrchestrator {
|
|||
if (!window.chrome.runtime) { window.chrome.runtime = {}; }
|
||||
});
|
||||
|
||||
// Initialize procedures (always anonymous join)
|
||||
// Initialize procedures
|
||||
this._joinProcedure = new JoinProcedure(this._page, this._logger, this._botName);
|
||||
this._audioCaptureProcedure = new AudioCaptureProcedure(
|
||||
this._page,
|
||||
this._logger,
|
||||
(base64Data, sampleRate) => {
|
||||
this._sendAudioChunk(base64Data, sampleRate);
|
||||
},
|
||||
);
|
||||
this._captionsProcedure = new CaptionsProcedure(
|
||||
this._page,
|
||||
this._logger,
|
||||
|
|
@ -496,6 +704,12 @@ export class BotOrchestrator {
|
|||
// This ensures Teams gets our controlled audio stream when it calls getUserMedia
|
||||
await this._audioProcedure.injectAudioOverride();
|
||||
|
||||
// Inject audio capture (WebRTC interception) if transfer mode requires it
|
||||
const transferMode = this._getEffectiveTransferMode();
|
||||
if (transferMode === 'audio') {
|
||||
await this._audioCaptureProcedure!.injectCaptureOverride();
|
||||
}
|
||||
|
||||
// Handle page errors
|
||||
this._page.on('pageerror', (error) => {
|
||||
this._logger.error('Page error:', error);
|
||||
|
|
@ -584,6 +798,19 @@ export class BotOrchestrator {
|
|||
throw new Error('Timeout waiting to be admitted from lobby');
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine the effective transfer mode based on config and join mode.
|
||||
* auto: anonymous → audio, authenticated → caption
|
||||
*/
|
||||
private _getEffectiveTransferMode(): 'caption' | 'audio' {
|
||||
const mode = this._options.transferMode || 'auto';
|
||||
if (mode === 'caption') return 'caption';
|
||||
if (mode === 'audio') return 'audio';
|
||||
// auto: use audio for anonymous (Teams only provides English captions), caption for auth
|
||||
const isAuth = !!(this._options.botAccountEmail && this._options.botAccountPassword);
|
||||
return isAuth ? 'caption' : 'audio';
|
||||
}
|
||||
|
||||
/**
|
||||
* Enable captions and start scraping.
|
||||
*/
|
||||
|
|
@ -594,7 +821,36 @@ export class BotOrchestrator {
|
|||
this._logger.info('Captions enabled and subscribed');
|
||||
} catch (error) {
|
||||
this._logger.warn('Could not enable captions:', error);
|
||||
// Continue without captions - not a fatal error
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Enable audio capture from meeting participants.
|
||||
*/
|
||||
private async _enableAudioCapture(): Promise<void> {
|
||||
if (!this._audioCaptureProcedure) {
|
||||
this._logger.warn('Audio capture procedure not initialized');
|
||||
return;
|
||||
}
|
||||
try {
|
||||
await this._audioCaptureProcedure.startCapture();
|
||||
this._logger.info('Audio capture started (PCM16 16kHz mono)');
|
||||
} catch (error) {
|
||||
this._logger.warn('Could not start audio capture:', error);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Enable transcript capture (captions or audio) based on transfer mode.
|
||||
*/
|
||||
private async _enableTranscriptCapture(): Promise<void> {
|
||||
const transferMode = this._getEffectiveTransferMode();
|
||||
this._logger.info(`Transfer mode: ${transferMode} (configured: ${this._options.transferMode || 'auto'})`);
|
||||
|
||||
if (transferMode === 'caption') {
|
||||
await this._enableCaptions();
|
||||
} else {
|
||||
await this._enableAudioCapture();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -628,6 +884,23 @@ export class BotOrchestrator {
|
|||
this._sendToGateway(message);
|
||||
}
|
||||
|
||||
/**
|
||||
* Send an audio chunk to the Gateway for STT processing.
|
||||
*/
|
||||
private _sendAudioChunk(base64Data: string, sampleRate: number): void {
|
||||
const message: AudioChunkMessage = {
|
||||
type: 'audioChunk',
|
||||
sessionId: this._sessionId,
|
||||
audio: {
|
||||
format: 'pcm16',
|
||||
sampleRate,
|
||||
data: base64Data,
|
||||
timestamp: new Date().toISOString(),
|
||||
},
|
||||
};
|
||||
this._sendToGateway(message);
|
||||
}
|
||||
|
||||
/**
|
||||
* Send a text message to the meeting chat.
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -19,8 +19,8 @@ async function main(): Promise<void> {
|
|||
|
||||
// Start HTTP server
|
||||
httpServer = new HttpServer({
|
||||
onJoinRequest: async (sessionId, meetingUrl, botName, instanceId, gatewayWsUrl, language, botAccountEmail, botAccountPassword, backgroundImageUrl) => {
|
||||
await sessionManager.createSession(sessionId, meetingUrl, botName, instanceId, gatewayWsUrl, language, botAccountEmail, botAccountPassword, backgroundImageUrl);
|
||||
onJoinRequest: async (sessionId, meetingUrl, botName, instanceId, gatewayWsUrl, language, botAccountEmail, botAccountPassword, transferMode) => {
|
||||
await sessionManager.createSession(sessionId, meetingUrl, botName, instanceId, gatewayWsUrl, language, botAccountEmail, botAccountPassword, transferMode);
|
||||
},
|
||||
onLeaveRequest: async (sessionId) => {
|
||||
await sessionManager.endSession(sessionId);
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ import { config } from '../config';
|
|||
import { runAuthTests, runSingleVariant, getVariantIds } from '../bot/authTestProcedure';
|
||||
|
||||
export interface HttpServerCallbacks {
|
||||
onJoinRequest: (sessionId: string, meetingUrl: string, botName?: string, instanceId?: string, gatewayWsUrl?: string, language?: string, botAccountEmail?: string, botAccountPassword?: string, backgroundImageUrl?: string) => Promise<void>;
|
||||
onJoinRequest: (sessionId: string, meetingUrl: string, botName?: string, instanceId?: string, gatewayWsUrl?: string, language?: string, botAccountEmail?: string, botAccountPassword?: string, transferMode?: string) => Promise<void>;
|
||||
onLeaveRequest: (sessionId: string) => Promise<void>;
|
||||
onStatusRequest: (sessionId: string) => { state: string; error?: string } | null;
|
||||
}
|
||||
|
|
@ -78,14 +78,14 @@ export class HttpServer {
|
|||
// Deploy a new bot
|
||||
this._app.post('/api/bot', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const { sessionId, meetingUrl, botName, instanceId, gatewayWsUrl, language, botAccountEmail, botAccountPassword, backgroundImageUrl } = req.body;
|
||||
const { sessionId, meetingUrl, botName, instanceId, gatewayWsUrl, language, botAccountEmail, botAccountPassword, transferMode } = req.body;
|
||||
|
||||
if (!sessionId || !meetingUrl) {
|
||||
res.status(400).json({ error: 'Missing required fields: sessionId, meetingUrl' });
|
||||
return;
|
||||
}
|
||||
|
||||
await this._callbacks.onJoinRequest(sessionId, meetingUrl, botName, instanceId, gatewayWsUrl, language, botAccountEmail, botAccountPassword, backgroundImageUrl);
|
||||
await this._callbacks.onJoinRequest(sessionId, meetingUrl, botName, instanceId, gatewayWsUrl, language, botAccountEmail, botAccountPassword, transferMode);
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
|
|
|
|||
|
|
@ -41,7 +41,7 @@ export class SessionManager {
|
|||
language?: string,
|
||||
botAccountEmail?: string,
|
||||
botAccountPassword?: string,
|
||||
backgroundImageUrl?: string
|
||||
transferMode?: string,
|
||||
): Promise<void> {
|
||||
if (this._sessions.has(sessionId)) {
|
||||
logger.warn(`Session ${sessionId} already exists`);
|
||||
|
|
@ -75,7 +75,7 @@ export class SessionManager {
|
|||
language: language,
|
||||
botAccountEmail: botAccountEmail,
|
||||
botAccountPassword: botAccountPassword,
|
||||
backgroundImageUrl: backgroundImageUrl,
|
||||
transferMode: transferMode,
|
||||
};
|
||||
|
||||
const orchestrator = new BotOrchestrator(
|
||||
|
|
|
|||
|
|
@ -55,8 +55,19 @@ export interface SendChatMessage {
|
|||
text: string;
|
||||
}
|
||||
|
||||
export interface AudioChunkMessage {
|
||||
type: 'audioChunk';
|
||||
sessionId: string;
|
||||
audio: {
|
||||
format: 'pcm16' | 'opus';
|
||||
sampleRate: number;
|
||||
data: string; // base64 encoded
|
||||
timestamp: string;
|
||||
};
|
||||
}
|
||||
|
||||
export type GatewayToBot = PlayAudioMessage | JoinMeetingMessage | LeaveMeetingMessage | SendChatMessage;
|
||||
export type BotToGateway = TranscriptMessage | StatusMessage | ChatMessage;
|
||||
export type BotToGateway = TranscriptMessage | StatusMessage | ChatMessage | AudioChunkMessage;
|
||||
|
||||
// Bot State
|
||||
export type BotState =
|
||||
|
|
|
|||
Loading…
Reference in a new issue