feat: authenticated join flow, audio capture, camera activation, transferMode support

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
ValueOn AG 2026-02-17 18:43:30 +01:00
parent 35dd781d90
commit 04abaf9402
6 changed files with 505 additions and 38 deletions

View file

@ -0,0 +1,183 @@
import { Page } from 'playwright';
import { Logger } from 'winston';
/**
* Captures incoming meeting audio by intercepting WebRTC RTCPeerConnection.
*
* How it works:
* 1. Before page navigation, wraps window.RTCPeerConnection via addInitScript
* 2. When Teams establishes WebRTC connections, the wrapper intercepts incoming audio tracks
* 3. Incoming audio tracks are captured via AudioContext + ScriptProcessorNode
* 4. Audio chunks (PCM16, 16kHz mono) are buffered and exposed via a global callback
* 5. The Node.js side polls for chunks and sends them to the Gateway
*/
export class AudioCaptureProcedure {
private _page: Page;
private _logger: Logger;
private _onAudioChunk: (base64Data: string, sampleRate: number) => void;
private _isCapturing: boolean = false;
private _pollInterval: ReturnType<typeof setInterval> | null = null;
private _injected: boolean = false;
constructor(
page: Page,
logger: Logger,
onAudioChunk: (base64Data: string, sampleRate: number) => void,
) {
this._page = page;
this._logger = logger;
this._onAudioChunk = onAudioChunk;
}
/**
* Inject the RTCPeerConnection wrapper BEFORE any page navigation.
* Must be called before navigating to Teams.
*/
async injectCaptureOverride(): Promise<void> {
if (this._injected) return;
this._logger.info('[AudioCapture] Injecting RTCPeerConnection wrapper...');
await this._page.addInitScript(() => {
// Audio chunk buffer — Node.js polls this periodically
(window as any).__audioCaptureChunks = [] as string[];
(window as any).__audioCaptureActive = false;
const OrigRTC = window.RTCPeerConnection;
// @ts-ignore — wrapping constructor
window.RTCPeerConnection = function (this: RTCPeerConnection, ...args: any[]) {
const pc = new OrigRTC(...args);
pc.addEventListener('track', (event: RTCTrackEvent) => {
if (event.track.kind !== 'audio') return;
if ((window as any).__audioCaptureActive) return;
(window as any).__audioCaptureActive = true;
try {
const AudioCtx = window.AudioContext || (window as any).webkitAudioContext;
const ctx = new AudioCtx({ sampleRate: 16000 });
const stream = new MediaStream([event.track]);
const source = ctx.createMediaStreamSource(stream);
// ScriptProcessor for raw PCM access (deprecated but widely supported)
const processor = ctx.createScriptProcessor(4096, 1, 1);
let chunkBuffer: Float32Array[] = [];
let samplesCollected = 0;
const samplesPerChunk = 16000; // 1 second of audio at 16kHz
processor.onaudioprocess = (e: AudioProcessingEvent) => {
const input = e.inputBuffer.getChannelData(0);
chunkBuffer.push(new Float32Array(input));
samplesCollected += input.length;
if (samplesCollected >= samplesPerChunk) {
// Merge buffers into one Float32Array
const merged = new Float32Array(samplesCollected);
let offset = 0;
for (const buf of chunkBuffer) {
merged.set(buf, offset);
offset += buf.length;
}
// Convert Float32 [-1,1] to PCM16 Int16
const pcm16 = new Int16Array(merged.length);
for (let i = 0; i < merged.length; i++) {
const s = Math.max(-1, Math.min(1, merged[i]));
pcm16[i] = s < 0 ? s * 0x8000 : s * 0x7FFF;
}
// Convert to base64
const bytes = new Uint8Array(pcm16.buffer);
let binary = '';
for (let i = 0; i < bytes.length; i++) {
binary += String.fromCharCode(bytes[i]);
}
const base64 = btoa(binary);
// Push to buffer for Node.js to poll
const chunks = (window as any).__audioCaptureChunks as string[];
if (chunks.length < 30) {
chunks.push(base64);
}
chunkBuffer = [];
samplesCollected = 0;
}
};
source.connect(processor);
processor.connect(ctx.destination);
// Store references for cleanup
(window as any).__audioCaptureCtx = ctx;
(window as any).__audioCaptureProcessor = processor;
console.log('[AudioCapture] WebRTC audio track intercepted, capturing at 16kHz mono');
} catch (err) {
console.error('[AudioCapture] Failed to set up audio capture:', err);
}
});
return pc;
} as any;
// Copy static properties
window.RTCPeerConnection.prototype = OrigRTC.prototype;
Object.setPrototypeOf(window.RTCPeerConnection, OrigRTC);
});
this._injected = true;
this._logger.info('[AudioCapture] RTCPeerConnection wrapper injected');
}
/**
* Start polling for captured audio chunks and forwarding them to the callback.
*/
async startCapture(): Promise<void> {
if (this._isCapturing) return;
this._isCapturing = true;
this._logger.info('[AudioCapture] Starting audio chunk polling...');
this._pollInterval = setInterval(async () => {
try {
const chunks = await this._page.evaluate(() => {
const buf = (window as any).__audioCaptureChunks as string[];
const result = buf.splice(0, buf.length);
return result;
});
for (const chunk of chunks) {
this._onAudioChunk(chunk, 16000);
}
} catch {
// Page might be navigating or closed
}
}, 500);
}
/**
* Stop capturing audio.
*/
async stopCapture(): Promise<void> {
this._isCapturing = false;
if (this._pollInterval) {
clearInterval(this._pollInterval);
this._pollInterval = null;
}
try {
await this._page.evaluate(() => {
(window as any).__audioCaptureActive = false;
const ctx = (window as any).__audioCaptureCtx as AudioContext;
if (ctx) ctx.close();
});
} catch {
// Page might already be closed
}
this._logger.info('[AudioCapture] Audio capture stopped');
}
}

View file

@ -7,11 +7,13 @@ import WebSocket from 'ws';
import { config } from '../config'; import { config } from '../config';
import { createSessionLogger } from '../utils/logger'; import { createSessionLogger } from '../utils/logger';
import { BotSession, BotState, TranscriptEntry, StatusMessage, TranscriptMessage, PlayAudioMessage, ChatMessage, SendChatMessage } from '../types'; import { BotSession, BotState, TranscriptEntry, StatusMessage, TranscriptMessage, PlayAudioMessage, ChatMessage, SendChatMessage, AudioChunkMessage } from '../types';
import { JoinProcedure } from './joinProcedure'; import { JoinProcedure } from './joinProcedure';
import { CaptionsProcedure } from './captionsProcedure'; import { CaptionsProcedure } from './captionsProcedure';
import { AudioProcedure } from './audioProcedure'; import { AudioProcedure } from './audioProcedure';
import { AudioCaptureProcedure } from './audioCaptureProcedure';
import { ChatProcedure, ChatMessageEntry } from './chatProcedure'; import { ChatProcedure, ChatMessageEntry } from './chatProcedure';
import { AuthProcedure } from './authProcedure';
import { isValidMeetingUrl } from './meetingUrlParser'; import { isValidMeetingUrl } from './meetingUrlParser';
export interface OrchestratorCallbacks { export interface OrchestratorCallbacks {
@ -26,7 +28,7 @@ export interface OrchestratorOptions {
language?: string; language?: string;
botAccountEmail?: string; botAccountEmail?: string;
botAccountPassword?: string; botAccountPassword?: string;
backgroundImageUrl?: string; transferMode?: string;
} }
/** /**
@ -57,6 +59,7 @@ export class BotOrchestrator {
private _joinProcedure: JoinProcedure | null = null; private _joinProcedure: JoinProcedure | null = null;
private _captionsProcedure: CaptionsProcedure | null = null; private _captionsProcedure: CaptionsProcedure | null = null;
private _audioProcedure: AudioProcedure | null = null; private _audioProcedure: AudioProcedure | null = null;
private _audioCaptureProcedure: AudioCaptureProcedure | null = null;
private _chatProcedure: ChatProcedure | null = null; private _chatProcedure: ChatProcedure | null = null;
private _state: BotState = 'idle'; private _state: BotState = 'idle';
@ -87,10 +90,7 @@ export class BotOrchestrator {
/** /**
* Start the bot - connect to Gateway, launch browser, join meeting, enable captions. * Start the bot - connect to Gateway, launch browser, join meeting, enable captions.
* * Chooses between anonymous join and authenticated join based on credentials.
* NOTE: Authentication is disabled. The bot always joins as an anonymous guest
* with the configured bot name (typically the system bot's display name, e.g. "Nyla Larsson").
* See Teamsbot-Auth-Join-Learnings.md for details on why and how to re-enable.
*/ */
async start(): Promise<void> { async start(): Promise<void> {
if (!isValidMeetingUrl(this._meetingUrl)) { if (!isValidMeetingUrl(this._meetingUrl)) {
@ -103,8 +103,15 @@ export class BotOrchestrator {
// Connect to Gateway WebSocket first // Connect to Gateway WebSocket first
await this._connectToGateway(); await this._connectToGateway();
// Join meeting as anonymous guest with configured bot name // Choose join method based on credentials
const hasCredentials = !!(this._options.botAccountEmail && this._options.botAccountPassword);
if (hasCredentials) {
this._logger.info(`Authenticated join as: ${this._options.botAccountEmail}`);
await this._attemptAuthJoin();
} else {
this._logger.info('Anonymous join with bot name: ' + this._botName);
await this._attemptJoin(); await this._attemptJoin();
}
} catch (error) { } catch (error) {
this._logger.error('Error starting bot:', error); this._logger.error('Error starting bot:', error);
@ -116,11 +123,6 @@ export class BotOrchestrator {
/** /**
* Join a meeting as anonymous guest with the configured bot name. * Join a meeting as anonymous guest with the configured bot name.
*
* NOTE: Authentication is disabled. See Teamsbot-Auth-Join-Learnings.md.
* The bot name (e.g. "Nyla Larsson") comes from the system bot's display name,
* configured in the Gateway. This provides a consistent identity without
* requiring Microsoft authentication.
*/ */
private async _attemptJoin(): Promise<void> { private async _attemptJoin(): Promise<void> {
// Launch browser // Launch browser
@ -150,16 +152,197 @@ export class BotOrchestrator {
// Dismiss any post-join permission modals (e.g. "Manage windows on all displays") // Dismiss any post-join permission modals (e.g. "Manage windows on all displays")
await this._joinProcedure!.dismissBrowserPermissionModals(); await this._joinProcedure!.dismissBrowserPermissionModals();
// Initialize audio // Initialize audio playback
await this._audioProcedure!.initialize(); await this._audioProcedure!.initialize();
// Enable and subscribe to captions // Enable transcript capture (captions or audio based on transferMode)
await this._enableCaptions(); await this._enableTranscriptCapture();
// Enable chat monitoring // Enable chat monitoring
await this._enableChat(); await this._enableChat();
} }
/**
* Join a meeting as authenticated user (System Bot or User Account).
* Flow: teams.microsoft.com MS Login Teams Chat Join Pre-Join Join now
*/
private async _attemptAuthJoin(): Promise<void> {
// Launch browser in headful mode with minimal args (Chromium Minimal)
await this._launchBrowser(true);
this._setState('navigating');
// STEP 1: Navigate to teams.microsoft.com
this._logger.info('Auth join: navigating to teams.microsoft.com');
await this._page!.goto('https://teams.microsoft.com', {
waitUntil: 'domcontentloaded',
timeout: 30000,
});
// Wait for login redirect
try {
await this._page!.waitForURL('**/login.microsoftonline.com/**', { timeout: 30000 });
this._logger.info('Redirected to MS login page');
} catch {
this._logger.warn(`No login redirect, current URL: ${this._page!.url().substring(0, 150)}`);
}
// Wait for login page to render
try {
await this._page!.waitForSelector('input[name="loginfmt"], input[type="email"]', {
timeout: 15000, state: 'visible',
});
} catch {
this._logger.warn('Login page elements not found');
}
// STEP 2: Microsoft Authentication
this._logger.info(`Authenticating as ${this._options.botAccountEmail}`);
const authProcedure = new AuthProcedure(this._page!, this._logger);
const authSuccess = await authProcedure.authenticateWithMicrosoft(
this._options.botAccountEmail!,
this._options.botAccountPassword!,
true,
);
if (!authSuccess) {
throw new Error('Microsoft authentication failed');
}
this._logger.info('Authentication successful');
// STEP 3: Wait for Teams chat page (landing page after auth)
try {
await this._page!.waitForURL('**/teams.microsoft.com/**', { timeout: 30000 });
} catch {
// Also accept teams.cloud.microsoft
try {
await this._page!.waitForURL('**/teams.cloud.microsoft/**', { timeout: 10000 });
} catch {
this._logger.warn(`Unexpected URL after auth: ${this._page!.url().substring(0, 150)}`);
}
}
// Wait for "Join" button in chat header
try {
await this._page!.waitForSelector(
'button[data-tid="chat-join-button"], button[data-tid="join-call-button"]',
{ timeout: 30000, state: 'visible' },
);
this._logger.info('Teams chat page loaded, "Join" button found');
} catch {
this._logger.warn('"Join" button not found in chat header');
await this._takeScreenshot('auth-no-join-button');
}
// STEP 4: Click "Join" in chat header → Pre-Join screen
this._logger.info('Clicking "Join" in chat header');
const chatJoinSelectors = [
'button[data-tid="chat-join-button"]',
'button[data-tid="join-call-button"]',
];
let chatJoinClicked = false;
for (const selector of chatJoinSelectors) {
try {
const btn = await this._page!.waitForSelector(selector, { timeout: 5000, state: 'visible' });
if (btn) {
await btn.click();
chatJoinClicked = true;
break;
}
} catch { /* try next */ }
}
if (!chatJoinClicked) {
throw new Error('"Join" button in chat header not found');
}
// STEP 5: Pre-Join screen → Click "Join now"
this._logger.info('Waiting for pre-join screen');
try {
await this._page!.waitForSelector(
'button:has-text("Join now"), button:has-text("Jetzt teilnehmen"), button[data-tid="prejoin-join-button"]',
{ timeout: 30000, state: 'visible' },
);
} catch {
this._logger.warn('"Join now" button not found');
await this._takeScreenshot('auth-no-join-now');
}
// Activate camera toggle if it's off (so background image is visible)
await this._ensureCameraOn();
await this._page!.waitForTimeout(2000);
const joinNowSelectors = [
'button:has-text("Join now")',
'button:has-text("Jetzt teilnehmen")',
'button[data-tid="prejoin-join-button"]',
];
let joinNowClicked = false;
for (const selector of joinNowSelectors) {
try {
const btn = await this._page!.waitForSelector(selector, { timeout: 5000, state: 'visible' });
if (btn) {
await btn.click();
joinNowClicked = true;
break;
}
} catch { /* try next */ }
}
if (!joinNowClicked) {
throw new Error('"Join now" button not found on pre-join screen');
}
this._logger.info('Clicked "Join now", waiting for meeting');
// Wait for meeting admission (hangup button = in meeting)
await this._waitForMeetingAdmission();
this._setState('in_meeting');
this._logger.info(`Bot joined the meeting (authenticated as ${this._options.botAccountEmail})`);
// Initialize audio playback
await this._audioProcedure!.initialize();
// Enable transcript capture (captions or audio based on transferMode)
await this._enableTranscriptCapture();
await this._enableChat();
}
/**
* Ensure the camera is turned on in the pre-join screen.
* When camera is on, Teams shows the profile/background image.
*/
private async _ensureCameraOn(): Promise<void> {
try {
// Look for camera toggle button
const cameraBtn = await this._page!.$('button[data-tid="toggle-video"], button[aria-label*="camera" i], button[aria-label*="Camera" i], button[aria-label*="Video" i]');
if (cameraBtn) {
// Check if camera is currently off (aria-pressed="false" or similar)
const isOff = await cameraBtn.evaluate((el) => {
return el.getAttribute('aria-pressed') === 'false' ||
el.getAttribute('aria-checked') === 'false' ||
el.classList.contains('is-off') ||
el.querySelector('.fui-Icon-regular') !== null;
});
if (isOff) {
await cameraBtn.click();
this._logger.info('Camera toggled ON');
await this._page!.waitForTimeout(1000);
} else {
this._logger.info('Camera already ON');
}
} else {
this._logger.warn('Camera toggle button not found');
}
} catch (err) {
this._logger.warn('Could not toggle camera:', err);
}
}
/** /**
* Connect to the Gateway WebSocket for this session. * Connect to the Gateway WebSocket for this session.
*/ */
@ -288,6 +471,9 @@ export class BotOrchestrator {
url = `${this._httpBaseUrl}/bot/transcript/${this._sessionId}`; url = `${this._httpBaseUrl}/bot/transcript/${this._sessionId}`;
} else if (msgType === 'status') { } else if (msgType === 'status') {
url = `${this._httpBaseUrl}/bot/status/${this._sessionId}`; url = `${this._httpBaseUrl}/bot/status/${this._sessionId}`;
} else if (msgType === 'audioChunk') {
// Audio chunks are too frequent for HTTP — only send via WebSocket
return;
} else { } else {
this._logger.debug(`HTTP fallback: unsupported message type ${msgType}`); this._logger.debug(`HTTP fallback: unsupported message type ${msgType}`);
return; return;
@ -351,6 +537,11 @@ export class BotOrchestrator {
try { try {
this._setState('leaving'); this._setState('leaving');
// Stop audio capture
if (this._audioCaptureProcedure) {
await this._audioCaptureProcedure.stopCapture();
}
// Unsubscribe from captions and chat // Unsubscribe from captions and chat
if (this._captionsProcedure) { if (this._captionsProcedure) {
await this._captionsProcedure.unsubscribe(); await this._captionsProcedure.unsubscribe();
@ -359,7 +550,7 @@ export class BotOrchestrator {
await this._chatProcedure.unsubscribe(); await this._chatProcedure.unsubscribe();
} }
// Clean up audio // Clean up audio playback
if (this._audioProcedure) { if (this._audioProcedure) {
await this._audioProcedure.cleanup(); await this._audioProcedure.cleanup();
} }
@ -403,20 +594,30 @@ export class BotOrchestrator {
/** /**
* Launch the browser and create a new page. * Launch the browser and create a new page.
* @param authMode - If true, use headful + minimal args (Chromium Minimal, proven to work for auth)
*/ */
private async _launchBrowser(): Promise<void> { private async _launchBrowser(authMode: boolean = false): Promise<void> {
this._logger.info('Launching browser...'); this._logger.info(`Launching browser (authMode=${authMode})...`);
this._browser = await chromium.launch({ const args = authMode
headless: config.botHeadless, ? [
args: [ // Chromium Minimal: only --no-sandbox + fake media (proven to work for authenticated join)
'--use-fake-ui-for-media-stream', // Auto-accept media permissions '--no-sandbox',
'--use-fake-device-for-media-stream', // Provide fake camera/mic so Teams sees devices '--use-fake-ui-for-media-stream',
'--use-fake-device-for-media-stream',
]
: [
'--use-fake-ui-for-media-stream',
'--use-fake-device-for-media-stream',
'--disable-web-security', '--disable-web-security',
'--disable-features=IsolateOrigins,site-per-process', '--disable-features=IsolateOrigins,site-per-process',
'--autoplay-policy=no-user-gesture-required', '--autoplay-policy=no-user-gesture-required',
'--disable-blink-features=AutomationControlled', // Prevent navigator.webdriver=true '--disable-blink-features=AutomationControlled',
], ];
this._browser = await chromium.launch({
headless: authMode ? false : config.botHeadless,
args,
}); });
this._context = await this._browser.newContext({ this._context = await this._browser.newContext({
@ -463,8 +664,15 @@ export class BotOrchestrator {
if (!window.chrome.runtime) { window.chrome.runtime = {}; } if (!window.chrome.runtime) { window.chrome.runtime = {}; }
}); });
// Initialize procedures (always anonymous join) // Initialize procedures
this._joinProcedure = new JoinProcedure(this._page, this._logger, this._botName); this._joinProcedure = new JoinProcedure(this._page, this._logger, this._botName);
this._audioCaptureProcedure = new AudioCaptureProcedure(
this._page,
this._logger,
(base64Data, sampleRate) => {
this._sendAudioChunk(base64Data, sampleRate);
},
);
this._captionsProcedure = new CaptionsProcedure( this._captionsProcedure = new CaptionsProcedure(
this._page, this._page,
this._logger, this._logger,
@ -496,6 +704,12 @@ export class BotOrchestrator {
// This ensures Teams gets our controlled audio stream when it calls getUserMedia // This ensures Teams gets our controlled audio stream when it calls getUserMedia
await this._audioProcedure.injectAudioOverride(); await this._audioProcedure.injectAudioOverride();
// Inject audio capture (WebRTC interception) if transfer mode requires it
const transferMode = this._getEffectiveTransferMode();
if (transferMode === 'audio') {
await this._audioCaptureProcedure!.injectCaptureOverride();
}
// Handle page errors // Handle page errors
this._page.on('pageerror', (error) => { this._page.on('pageerror', (error) => {
this._logger.error('Page error:', error); this._logger.error('Page error:', error);
@ -584,6 +798,19 @@ export class BotOrchestrator {
throw new Error('Timeout waiting to be admitted from lobby'); throw new Error('Timeout waiting to be admitted from lobby');
} }
/**
* Determine the effective transfer mode based on config and join mode.
* auto: anonymous audio, authenticated caption
*/
private _getEffectiveTransferMode(): 'caption' | 'audio' {
const mode = this._options.transferMode || 'auto';
if (mode === 'caption') return 'caption';
if (mode === 'audio') return 'audio';
// auto: use audio for anonymous (Teams only provides English captions), caption for auth
const isAuth = !!(this._options.botAccountEmail && this._options.botAccountPassword);
return isAuth ? 'caption' : 'audio';
}
/** /**
* Enable captions and start scraping. * Enable captions and start scraping.
*/ */
@ -594,7 +821,36 @@ export class BotOrchestrator {
this._logger.info('Captions enabled and subscribed'); this._logger.info('Captions enabled and subscribed');
} catch (error) { } catch (error) {
this._logger.warn('Could not enable captions:', error); this._logger.warn('Could not enable captions:', error);
// Continue without captions - not a fatal error }
}
/**
* Enable audio capture from meeting participants.
*/
private async _enableAudioCapture(): Promise<void> {
if (!this._audioCaptureProcedure) {
this._logger.warn('Audio capture procedure not initialized');
return;
}
try {
await this._audioCaptureProcedure.startCapture();
this._logger.info('Audio capture started (PCM16 16kHz mono)');
} catch (error) {
this._logger.warn('Could not start audio capture:', error);
}
}
/**
* Enable transcript capture (captions or audio) based on transfer mode.
*/
private async _enableTranscriptCapture(): Promise<void> {
const transferMode = this._getEffectiveTransferMode();
this._logger.info(`Transfer mode: ${transferMode} (configured: ${this._options.transferMode || 'auto'})`);
if (transferMode === 'caption') {
await this._enableCaptions();
} else {
await this._enableAudioCapture();
} }
} }
@ -628,6 +884,23 @@ export class BotOrchestrator {
this._sendToGateway(message); this._sendToGateway(message);
} }
/**
* Send an audio chunk to the Gateway for STT processing.
*/
private _sendAudioChunk(base64Data: string, sampleRate: number): void {
const message: AudioChunkMessage = {
type: 'audioChunk',
sessionId: this._sessionId,
audio: {
format: 'pcm16',
sampleRate,
data: base64Data,
timestamp: new Date().toISOString(),
},
};
this._sendToGateway(message);
}
/** /**
* Send a text message to the meeting chat. * Send a text message to the meeting chat.
*/ */

View file

@ -19,8 +19,8 @@ async function main(): Promise<void> {
// Start HTTP server // Start HTTP server
httpServer = new HttpServer({ httpServer = new HttpServer({
onJoinRequest: async (sessionId, meetingUrl, botName, instanceId, gatewayWsUrl, language, botAccountEmail, botAccountPassword, backgroundImageUrl) => { onJoinRequest: async (sessionId, meetingUrl, botName, instanceId, gatewayWsUrl, language, botAccountEmail, botAccountPassword, transferMode) => {
await sessionManager.createSession(sessionId, meetingUrl, botName, instanceId, gatewayWsUrl, language, botAccountEmail, botAccountPassword, backgroundImageUrl); await sessionManager.createSession(sessionId, meetingUrl, botName, instanceId, gatewayWsUrl, language, botAccountEmail, botAccountPassword, transferMode);
}, },
onLeaveRequest: async (sessionId) => { onLeaveRequest: async (sessionId) => {
await sessionManager.endSession(sessionId); await sessionManager.endSession(sessionId);

View file

@ -5,7 +5,7 @@ import { config } from '../config';
import { runAuthTests, runSingleVariant, getVariantIds } from '../bot/authTestProcedure'; import { runAuthTests, runSingleVariant, getVariantIds } from '../bot/authTestProcedure';
export interface HttpServerCallbacks { export interface HttpServerCallbacks {
onJoinRequest: (sessionId: string, meetingUrl: string, botName?: string, instanceId?: string, gatewayWsUrl?: string, language?: string, botAccountEmail?: string, botAccountPassword?: string, backgroundImageUrl?: string) => Promise<void>; onJoinRequest: (sessionId: string, meetingUrl: string, botName?: string, instanceId?: string, gatewayWsUrl?: string, language?: string, botAccountEmail?: string, botAccountPassword?: string, transferMode?: string) => Promise<void>;
onLeaveRequest: (sessionId: string) => Promise<void>; onLeaveRequest: (sessionId: string) => Promise<void>;
onStatusRequest: (sessionId: string) => { state: string; error?: string } | null; onStatusRequest: (sessionId: string) => { state: string; error?: string } | null;
} }
@ -78,14 +78,14 @@ export class HttpServer {
// Deploy a new bot // Deploy a new bot
this._app.post('/api/bot', async (req: Request, res: Response) => { this._app.post('/api/bot', async (req: Request, res: Response) => {
try { try {
const { sessionId, meetingUrl, botName, instanceId, gatewayWsUrl, language, botAccountEmail, botAccountPassword, backgroundImageUrl } = req.body; const { sessionId, meetingUrl, botName, instanceId, gatewayWsUrl, language, botAccountEmail, botAccountPassword, transferMode } = req.body;
if (!sessionId || !meetingUrl) { if (!sessionId || !meetingUrl) {
res.status(400).json({ error: 'Missing required fields: sessionId, meetingUrl' }); res.status(400).json({ error: 'Missing required fields: sessionId, meetingUrl' });
return; return;
} }
await this._callbacks.onJoinRequest(sessionId, meetingUrl, botName, instanceId, gatewayWsUrl, language, botAccountEmail, botAccountPassword, backgroundImageUrl); await this._callbacks.onJoinRequest(sessionId, meetingUrl, botName, instanceId, gatewayWsUrl, language, botAccountEmail, botAccountPassword, transferMode);
res.json({ res.json({
success: true, success: true,

View file

@ -41,7 +41,7 @@ export class SessionManager {
language?: string, language?: string,
botAccountEmail?: string, botAccountEmail?: string,
botAccountPassword?: string, botAccountPassword?: string,
backgroundImageUrl?: string transferMode?: string,
): Promise<void> { ): Promise<void> {
if (this._sessions.has(sessionId)) { if (this._sessions.has(sessionId)) {
logger.warn(`Session ${sessionId} already exists`); logger.warn(`Session ${sessionId} already exists`);
@ -75,7 +75,7 @@ export class SessionManager {
language: language, language: language,
botAccountEmail: botAccountEmail, botAccountEmail: botAccountEmail,
botAccountPassword: botAccountPassword, botAccountPassword: botAccountPassword,
backgroundImageUrl: backgroundImageUrl, transferMode: transferMode,
}; };
const orchestrator = new BotOrchestrator( const orchestrator = new BotOrchestrator(

View file

@ -55,8 +55,19 @@ export interface SendChatMessage {
text: string; text: string;
} }
export interface AudioChunkMessage {
type: 'audioChunk';
sessionId: string;
audio: {
format: 'pcm16' | 'opus';
sampleRate: number;
data: string; // base64 encoded
timestamp: string;
};
}
export type GatewayToBot = PlayAudioMessage | JoinMeetingMessage | LeaveMeetingMessage | SendChatMessage; export type GatewayToBot = PlayAudioMessage | JoinMeetingMessage | LeaveMeetingMessage | SendChatMessage;
export type BotToGateway = TranscriptMessage | StatusMessage | ChatMessage; export type BotToGateway = TranscriptMessage | StatusMessage | ChatMessage | AudioChunkMessage;
// Bot State // Bot State
export type BotState = export type BotState =