From 04abaf9402e9ad25279ce7363d6540dd07687d9c Mon Sep 17 00:00:00 2001
From: ValueOn AG
Date: Tue, 17 Feb 2026 18:43:30 +0100
Subject: [PATCH] feat: authenticated join flow, audio capture, camera
activation, transferMode support
Co-authored-by: Cursor
---
src/bot/audioCaptureProcedure.ts | 183 +++++++++++++++++
src/bot/orchestrator.ts | 333 ++++++++++++++++++++++++++++---
src/index.ts | 4 +-
src/server/httpServer.ts | 6 +-
src/sessionManager.ts | 4 +-
src/types/index.ts | 13 +-
6 files changed, 505 insertions(+), 38 deletions(-)
create mode 100644 src/bot/audioCaptureProcedure.ts
diff --git a/src/bot/audioCaptureProcedure.ts b/src/bot/audioCaptureProcedure.ts
new file mode 100644
index 0000000..b81a57e
--- /dev/null
+++ b/src/bot/audioCaptureProcedure.ts
@@ -0,0 +1,183 @@
+import { Page } from 'playwright';
+import { Logger } from 'winston';
+
+/**
+ * Captures incoming meeting audio by intercepting WebRTC RTCPeerConnection.
+ *
+ * How it works:
+ * 1. Before page navigation, wraps window.RTCPeerConnection via addInitScript
+ * 2. When Teams establishes WebRTC connections, the wrapper intercepts incoming audio tracks
+ * 3. Incoming audio tracks are captured via AudioContext + ScriptProcessorNode
+ * 4. Audio chunks (PCM16, 16kHz mono) are buffered and exposed via a global callback
+ * 5. The Node.js side polls for chunks and sends them to the Gateway
+ */
+export class AudioCaptureProcedure {
+ private _page: Page;
+ private _logger: Logger;
+ private _onAudioChunk: (base64Data: string, sampleRate: number) => void;
+ private _isCapturing: boolean = false;
+ private _pollInterval: ReturnType | null = null;
+ private _injected: boolean = false;
+
+ constructor(
+ page: Page,
+ logger: Logger,
+ onAudioChunk: (base64Data: string, sampleRate: number) => void,
+ ) {
+ this._page = page;
+ this._logger = logger;
+ this._onAudioChunk = onAudioChunk;
+ }
+
+ /**
+ * Inject the RTCPeerConnection wrapper BEFORE any page navigation.
+ * Must be called before navigating to Teams.
+ */
+ async injectCaptureOverride(): Promise {
+ if (this._injected) return;
+
+ this._logger.info('[AudioCapture] Injecting RTCPeerConnection wrapper...');
+
+ await this._page.addInitScript(() => {
+ // Audio chunk buffer — Node.js polls this periodically
+ (window as any).__audioCaptureChunks = [] as string[];
+ (window as any).__audioCaptureActive = false;
+
+ const OrigRTC = window.RTCPeerConnection;
+
+ // @ts-ignore — wrapping constructor
+ window.RTCPeerConnection = function (this: RTCPeerConnection, ...args: any[]) {
+ const pc = new OrigRTC(...args);
+
+ pc.addEventListener('track', (event: RTCTrackEvent) => {
+ if (event.track.kind !== 'audio') return;
+ if ((window as any).__audioCaptureActive) return;
+ (window as any).__audioCaptureActive = true;
+
+ try {
+ const AudioCtx = window.AudioContext || (window as any).webkitAudioContext;
+ const ctx = new AudioCtx({ sampleRate: 16000 });
+ const stream = new MediaStream([event.track]);
+ const source = ctx.createMediaStreamSource(stream);
+
+ // ScriptProcessor for raw PCM access (deprecated but widely supported)
+ const processor = ctx.createScriptProcessor(4096, 1, 1);
+ let chunkBuffer: Float32Array[] = [];
+ let samplesCollected = 0;
+ const samplesPerChunk = 16000; // 1 second of audio at 16kHz
+
+ processor.onaudioprocess = (e: AudioProcessingEvent) => {
+ const input = e.inputBuffer.getChannelData(0);
+ chunkBuffer.push(new Float32Array(input));
+ samplesCollected += input.length;
+
+ if (samplesCollected >= samplesPerChunk) {
+ // Merge buffers into one Float32Array
+ const merged = new Float32Array(samplesCollected);
+ let offset = 0;
+ for (const buf of chunkBuffer) {
+ merged.set(buf, offset);
+ offset += buf.length;
+ }
+
+ // Convert Float32 [-1,1] to PCM16 Int16
+ const pcm16 = new Int16Array(merged.length);
+ for (let i = 0; i < merged.length; i++) {
+ const s = Math.max(-1, Math.min(1, merged[i]));
+ pcm16[i] = s < 0 ? s * 0x8000 : s * 0x7FFF;
+ }
+
+ // Convert to base64
+ const bytes = new Uint8Array(pcm16.buffer);
+ let binary = '';
+ for (let i = 0; i < bytes.length; i++) {
+ binary += String.fromCharCode(bytes[i]);
+ }
+ const base64 = btoa(binary);
+
+ // Push to buffer for Node.js to poll
+ const chunks = (window as any).__audioCaptureChunks as string[];
+ if (chunks.length < 30) {
+ chunks.push(base64);
+ }
+
+ chunkBuffer = [];
+ samplesCollected = 0;
+ }
+ };
+
+ source.connect(processor);
+ processor.connect(ctx.destination);
+
+ // Store references for cleanup
+ (window as any).__audioCaptureCtx = ctx;
+ (window as any).__audioCaptureProcessor = processor;
+
+ console.log('[AudioCapture] WebRTC audio track intercepted, capturing at 16kHz mono');
+ } catch (err) {
+ console.error('[AudioCapture] Failed to set up audio capture:', err);
+ }
+ });
+
+ return pc;
+ } as any;
+
+ // Copy static properties
+ window.RTCPeerConnection.prototype = OrigRTC.prototype;
+ Object.setPrototypeOf(window.RTCPeerConnection, OrigRTC);
+ });
+
+ this._injected = true;
+ this._logger.info('[AudioCapture] RTCPeerConnection wrapper injected');
+ }
+
+ /**
+ * Start polling for captured audio chunks and forwarding them to the callback.
+ */
+ async startCapture(): Promise {
+ if (this._isCapturing) return;
+ this._isCapturing = true;
+
+ this._logger.info('[AudioCapture] Starting audio chunk polling...');
+
+ this._pollInterval = setInterval(async () => {
+ try {
+ const chunks = await this._page.evaluate(() => {
+ const buf = (window as any).__audioCaptureChunks as string[];
+ const result = buf.splice(0, buf.length);
+ return result;
+ });
+
+ for (const chunk of chunks) {
+ this._onAudioChunk(chunk, 16000);
+ }
+ } catch {
+ // Page might be navigating or closed
+ }
+ }, 500);
+ }
+
+ /**
+ * Stop capturing audio.
+ */
+ async stopCapture(): Promise {
+ this._isCapturing = false;
+
+ if (this._pollInterval) {
+ clearInterval(this._pollInterval);
+ this._pollInterval = null;
+ }
+
+ try {
+ await this._page.evaluate(() => {
+ (window as any).__audioCaptureActive = false;
+ const ctx = (window as any).__audioCaptureCtx as AudioContext;
+ if (ctx) ctx.close();
+ });
+ } catch {
+ // Page might already be closed
+ }
+
+ this._logger.info('[AudioCapture] Audio capture stopped');
+ }
+}
diff --git a/src/bot/orchestrator.ts b/src/bot/orchestrator.ts
index 4f35e00..75de834 100644
--- a/src/bot/orchestrator.ts
+++ b/src/bot/orchestrator.ts
@@ -7,11 +7,13 @@ import WebSocket from 'ws';
import { config } from '../config';
import { createSessionLogger } from '../utils/logger';
-import { BotSession, BotState, TranscriptEntry, StatusMessage, TranscriptMessage, PlayAudioMessage, ChatMessage, SendChatMessage } from '../types';
+import { BotSession, BotState, TranscriptEntry, StatusMessage, TranscriptMessage, PlayAudioMessage, ChatMessage, SendChatMessage, AudioChunkMessage } from '../types';
import { JoinProcedure } from './joinProcedure';
import { CaptionsProcedure } from './captionsProcedure';
import { AudioProcedure } from './audioProcedure';
+import { AudioCaptureProcedure } from './audioCaptureProcedure';
import { ChatProcedure, ChatMessageEntry } from './chatProcedure';
+import { AuthProcedure } from './authProcedure';
import { isValidMeetingUrl } from './meetingUrlParser';
export interface OrchestratorCallbacks {
@@ -26,7 +28,7 @@ export interface OrchestratorOptions {
language?: string;
botAccountEmail?: string;
botAccountPassword?: string;
- backgroundImageUrl?: string;
+ transferMode?: string;
}
/**
@@ -57,6 +59,7 @@ export class BotOrchestrator {
private _joinProcedure: JoinProcedure | null = null;
private _captionsProcedure: CaptionsProcedure | null = null;
private _audioProcedure: AudioProcedure | null = null;
+ private _audioCaptureProcedure: AudioCaptureProcedure | null = null;
private _chatProcedure: ChatProcedure | null = null;
private _state: BotState = 'idle';
@@ -87,10 +90,7 @@ export class BotOrchestrator {
/**
* Start the bot - connect to Gateway, launch browser, join meeting, enable captions.
- *
- * NOTE: Authentication is disabled. The bot always joins as an anonymous guest
- * with the configured bot name (typically the system bot's display name, e.g. "Nyla Larsson").
- * See Teamsbot-Auth-Join-Learnings.md for details on why and how to re-enable.
+ * Chooses between anonymous join and authenticated join based on credentials.
*/
async start(): Promise {
if (!isValidMeetingUrl(this._meetingUrl)) {
@@ -103,8 +103,15 @@ export class BotOrchestrator {
// Connect to Gateway WebSocket first
await this._connectToGateway();
- // Join meeting as anonymous guest with configured bot name
- await this._attemptJoin();
+ // Choose join method based on credentials
+ const hasCredentials = !!(this._options.botAccountEmail && this._options.botAccountPassword);
+ if (hasCredentials) {
+ this._logger.info(`Authenticated join as: ${this._options.botAccountEmail}`);
+ await this._attemptAuthJoin();
+ } else {
+ this._logger.info('Anonymous join with bot name: ' + this._botName);
+ await this._attemptJoin();
+ }
} catch (error) {
this._logger.error('Error starting bot:', error);
@@ -116,11 +123,6 @@ export class BotOrchestrator {
/**
* Join a meeting as anonymous guest with the configured bot name.
- *
- * NOTE: Authentication is disabled. See Teamsbot-Auth-Join-Learnings.md.
- * The bot name (e.g. "Nyla Larsson") comes from the system bot's display name,
- * configured in the Gateway. This provides a consistent identity without
- * requiring Microsoft authentication.
*/
private async _attemptJoin(): Promise {
// Launch browser
@@ -150,16 +152,197 @@ export class BotOrchestrator {
// Dismiss any post-join permission modals (e.g. "Manage windows on all displays")
await this._joinProcedure!.dismissBrowserPermissionModals();
- // Initialize audio
+ // Initialize audio playback
await this._audioProcedure!.initialize();
- // Enable and subscribe to captions
- await this._enableCaptions();
+ // Enable transcript capture (captions or audio based on transferMode)
+ await this._enableTranscriptCapture();
// Enable chat monitoring
await this._enableChat();
}
+ /**
+ * Join a meeting as authenticated user (System Bot or User Account).
+ * Flow: teams.microsoft.com → MS Login → Teams Chat → Join → Pre-Join → Join now
+ */
+ private async _attemptAuthJoin(): Promise {
+ // Launch browser in headful mode with minimal args (Chromium Minimal)
+ await this._launchBrowser(true);
+
+ this._setState('navigating');
+
+ // STEP 1: Navigate to teams.microsoft.com
+ this._logger.info('Auth join: navigating to teams.microsoft.com');
+ await this._page!.goto('https://teams.microsoft.com', {
+ waitUntil: 'domcontentloaded',
+ timeout: 30000,
+ });
+
+ // Wait for login redirect
+ try {
+ await this._page!.waitForURL('**/login.microsoftonline.com/**', { timeout: 30000 });
+ this._logger.info('Redirected to MS login page');
+ } catch {
+ this._logger.warn(`No login redirect, current URL: ${this._page!.url().substring(0, 150)}`);
+ }
+
+ // Wait for login page to render
+ try {
+ await this._page!.waitForSelector('input[name="loginfmt"], input[type="email"]', {
+ timeout: 15000, state: 'visible',
+ });
+ } catch {
+ this._logger.warn('Login page elements not found');
+ }
+
+ // STEP 2: Microsoft Authentication
+ this._logger.info(`Authenticating as ${this._options.botAccountEmail}`);
+ const authProcedure = new AuthProcedure(this._page!, this._logger);
+ const authSuccess = await authProcedure.authenticateWithMicrosoft(
+ this._options.botAccountEmail!,
+ this._options.botAccountPassword!,
+ true,
+ );
+
+ if (!authSuccess) {
+ throw new Error('Microsoft authentication failed');
+ }
+ this._logger.info('Authentication successful');
+
+ // STEP 3: Wait for Teams chat page (landing page after auth)
+ try {
+ await this._page!.waitForURL('**/teams.microsoft.com/**', { timeout: 30000 });
+ } catch {
+ // Also accept teams.cloud.microsoft
+ try {
+ await this._page!.waitForURL('**/teams.cloud.microsoft/**', { timeout: 10000 });
+ } catch {
+ this._logger.warn(`Unexpected URL after auth: ${this._page!.url().substring(0, 150)}`);
+ }
+ }
+
+ // Wait for "Join" button in chat header
+ try {
+ await this._page!.waitForSelector(
+ 'button[data-tid="chat-join-button"], button[data-tid="join-call-button"]',
+ { timeout: 30000, state: 'visible' },
+ );
+ this._logger.info('Teams chat page loaded, "Join" button found');
+ } catch {
+ this._logger.warn('"Join" button not found in chat header');
+ await this._takeScreenshot('auth-no-join-button');
+ }
+
+ // STEP 4: Click "Join" in chat header → Pre-Join screen
+ this._logger.info('Clicking "Join" in chat header');
+ const chatJoinSelectors = [
+ 'button[data-tid="chat-join-button"]',
+ 'button[data-tid="join-call-button"]',
+ ];
+
+ let chatJoinClicked = false;
+ for (const selector of chatJoinSelectors) {
+ try {
+ const btn = await this._page!.waitForSelector(selector, { timeout: 5000, state: 'visible' });
+ if (btn) {
+ await btn.click();
+ chatJoinClicked = true;
+ break;
+ }
+ } catch { /* try next */ }
+ }
+
+ if (!chatJoinClicked) {
+ throw new Error('"Join" button in chat header not found');
+ }
+
+ // STEP 5: Pre-Join screen → Click "Join now"
+ this._logger.info('Waiting for pre-join screen');
+ try {
+ await this._page!.waitForSelector(
+ 'button:has-text("Join now"), button:has-text("Jetzt teilnehmen"), button[data-tid="prejoin-join-button"]',
+ { timeout: 30000, state: 'visible' },
+ );
+ } catch {
+ this._logger.warn('"Join now" button not found');
+ await this._takeScreenshot('auth-no-join-now');
+ }
+
+ // Activate camera toggle if it's off (so background image is visible)
+ await this._ensureCameraOn();
+
+ await this._page!.waitForTimeout(2000);
+
+ const joinNowSelectors = [
+ 'button:has-text("Join now")',
+ 'button:has-text("Jetzt teilnehmen")',
+ 'button[data-tid="prejoin-join-button"]',
+ ];
+
+ let joinNowClicked = false;
+ for (const selector of joinNowSelectors) {
+ try {
+ const btn = await this._page!.waitForSelector(selector, { timeout: 5000, state: 'visible' });
+ if (btn) {
+ await btn.click();
+ joinNowClicked = true;
+ break;
+ }
+ } catch { /* try next */ }
+ }
+
+ if (!joinNowClicked) {
+ throw new Error('"Join now" button not found on pre-join screen');
+ }
+
+ this._logger.info('Clicked "Join now", waiting for meeting');
+
+ // Wait for meeting admission (hangup button = in meeting)
+ await this._waitForMeetingAdmission();
+
+ this._setState('in_meeting');
+ this._logger.info(`Bot joined the meeting (authenticated as ${this._options.botAccountEmail})`);
+
+ // Initialize audio playback
+ await this._audioProcedure!.initialize();
+
+ // Enable transcript capture (captions or audio based on transferMode)
+ await this._enableTranscriptCapture();
+ await this._enableChat();
+ }
+
+ /**
+ * Ensure the camera is turned on in the pre-join screen.
+ * When camera is on, Teams shows the profile/background image.
+ */
+ private async _ensureCameraOn(): Promise {
+ try {
+ // Look for camera toggle button
+ const cameraBtn = await this._page!.$('button[data-tid="toggle-video"], button[aria-label*="camera" i], button[aria-label*="Camera" i], button[aria-label*="Video" i]');
+ if (cameraBtn) {
+ // Check if camera is currently off (aria-pressed="false" or similar)
+ const isOff = await cameraBtn.evaluate((el) => {
+ return el.getAttribute('aria-pressed') === 'false' ||
+ el.getAttribute('aria-checked') === 'false' ||
+ el.classList.contains('is-off') ||
+ el.querySelector('.fui-Icon-regular') !== null;
+ });
+ if (isOff) {
+ await cameraBtn.click();
+ this._logger.info('Camera toggled ON');
+ await this._page!.waitForTimeout(1000);
+ } else {
+ this._logger.info('Camera already ON');
+ }
+ } else {
+ this._logger.warn('Camera toggle button not found');
+ }
+ } catch (err) {
+ this._logger.warn('Could not toggle camera:', err);
+ }
+ }
+
/**
* Connect to the Gateway WebSocket for this session.
*/
@@ -288,6 +471,9 @@ export class BotOrchestrator {
url = `${this._httpBaseUrl}/bot/transcript/${this._sessionId}`;
} else if (msgType === 'status') {
url = `${this._httpBaseUrl}/bot/status/${this._sessionId}`;
+ } else if (msgType === 'audioChunk') {
+ // Audio chunks are too frequent for HTTP — only send via WebSocket
+ return;
} else {
this._logger.debug(`HTTP fallback: unsupported message type ${msgType}`);
return;
@@ -351,6 +537,11 @@ export class BotOrchestrator {
try {
this._setState('leaving');
+ // Stop audio capture
+ if (this._audioCaptureProcedure) {
+ await this._audioCaptureProcedure.stopCapture();
+ }
+
// Unsubscribe from captions and chat
if (this._captionsProcedure) {
await this._captionsProcedure.unsubscribe();
@@ -359,7 +550,7 @@ export class BotOrchestrator {
await this._chatProcedure.unsubscribe();
}
- // Clean up audio
+ // Clean up audio playback
if (this._audioProcedure) {
await this._audioProcedure.cleanup();
}
@@ -403,20 +594,30 @@ export class BotOrchestrator {
/**
* Launch the browser and create a new page.
+ * @param authMode - If true, use headful + minimal args (Chromium Minimal, proven to work for auth)
*/
- private async _launchBrowser(): Promise {
- this._logger.info('Launching browser...');
+ private async _launchBrowser(authMode: boolean = false): Promise {
+ this._logger.info(`Launching browser (authMode=${authMode})...`);
+
+ const args = authMode
+ ? [
+ // Chromium Minimal: only --no-sandbox + fake media (proven to work for authenticated join)
+ '--no-sandbox',
+ '--use-fake-ui-for-media-stream',
+ '--use-fake-device-for-media-stream',
+ ]
+ : [
+ '--use-fake-ui-for-media-stream',
+ '--use-fake-device-for-media-stream',
+ '--disable-web-security',
+ '--disable-features=IsolateOrigins,site-per-process',
+ '--autoplay-policy=no-user-gesture-required',
+ '--disable-blink-features=AutomationControlled',
+ ];
this._browser = await chromium.launch({
- headless: config.botHeadless,
- args: [
- '--use-fake-ui-for-media-stream', // Auto-accept media permissions
- '--use-fake-device-for-media-stream', // Provide fake camera/mic so Teams sees devices
- '--disable-web-security',
- '--disable-features=IsolateOrigins,site-per-process',
- '--autoplay-policy=no-user-gesture-required',
- '--disable-blink-features=AutomationControlled', // Prevent navigator.webdriver=true
- ],
+ headless: authMode ? false : config.botHeadless,
+ args,
});
this._context = await this._browser.newContext({
@@ -463,8 +664,15 @@ export class BotOrchestrator {
if (!window.chrome.runtime) { window.chrome.runtime = {}; }
});
- // Initialize procedures (always anonymous join)
+ // Initialize procedures
this._joinProcedure = new JoinProcedure(this._page, this._logger, this._botName);
+ this._audioCaptureProcedure = new AudioCaptureProcedure(
+ this._page,
+ this._logger,
+ (base64Data, sampleRate) => {
+ this._sendAudioChunk(base64Data, sampleRate);
+ },
+ );
this._captionsProcedure = new CaptionsProcedure(
this._page,
this._logger,
@@ -496,6 +704,12 @@ export class BotOrchestrator {
// This ensures Teams gets our controlled audio stream when it calls getUserMedia
await this._audioProcedure.injectAudioOverride();
+ // Inject audio capture (WebRTC interception) if transfer mode requires it
+ const transferMode = this._getEffectiveTransferMode();
+ if (transferMode === 'audio') {
+ await this._audioCaptureProcedure!.injectCaptureOverride();
+ }
+
// Handle page errors
this._page.on('pageerror', (error) => {
this._logger.error('Page error:', error);
@@ -584,6 +798,19 @@ export class BotOrchestrator {
throw new Error('Timeout waiting to be admitted from lobby');
}
+ /**
+ * Determine the effective transfer mode based on config and join mode.
+ * auto: anonymous → audio, authenticated → caption
+ */
+ private _getEffectiveTransferMode(): 'caption' | 'audio' {
+ const mode = this._options.transferMode || 'auto';
+ if (mode === 'caption') return 'caption';
+ if (mode === 'audio') return 'audio';
+ // auto: use audio for anonymous (Teams only provides English captions), caption for auth
+ const isAuth = !!(this._options.botAccountEmail && this._options.botAccountPassword);
+ return isAuth ? 'caption' : 'audio';
+ }
+
/**
* Enable captions and start scraping.
*/
@@ -594,7 +821,36 @@ export class BotOrchestrator {
this._logger.info('Captions enabled and subscribed');
} catch (error) {
this._logger.warn('Could not enable captions:', error);
- // Continue without captions - not a fatal error
+ }
+ }
+
+ /**
+ * Enable audio capture from meeting participants.
+ */
+ private async _enableAudioCapture(): Promise {
+ if (!this._audioCaptureProcedure) {
+ this._logger.warn('Audio capture procedure not initialized');
+ return;
+ }
+ try {
+ await this._audioCaptureProcedure.startCapture();
+ this._logger.info('Audio capture started (PCM16 16kHz mono)');
+ } catch (error) {
+ this._logger.warn('Could not start audio capture:', error);
+ }
+ }
+
+ /**
+ * Enable transcript capture (captions or audio) based on transfer mode.
+ */
+ private async _enableTranscriptCapture(): Promise {
+ const transferMode = this._getEffectiveTransferMode();
+ this._logger.info(`Transfer mode: ${transferMode} (configured: ${this._options.transferMode || 'auto'})`);
+
+ if (transferMode === 'caption') {
+ await this._enableCaptions();
+ } else {
+ await this._enableAudioCapture();
}
}
@@ -628,6 +884,23 @@ export class BotOrchestrator {
this._sendToGateway(message);
}
+ /**
+ * Send an audio chunk to the Gateway for STT processing.
+ */
+ private _sendAudioChunk(base64Data: string, sampleRate: number): void {
+ const message: AudioChunkMessage = {
+ type: 'audioChunk',
+ sessionId: this._sessionId,
+ audio: {
+ format: 'pcm16',
+ sampleRate,
+ data: base64Data,
+ timestamp: new Date().toISOString(),
+ },
+ };
+ this._sendToGateway(message);
+ }
+
/**
* Send a text message to the meeting chat.
*/
diff --git a/src/index.ts b/src/index.ts
index 9551dbe..1bfe959 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -19,8 +19,8 @@ async function main(): Promise {
// Start HTTP server
httpServer = new HttpServer({
- onJoinRequest: async (sessionId, meetingUrl, botName, instanceId, gatewayWsUrl, language, botAccountEmail, botAccountPassword, backgroundImageUrl) => {
- await sessionManager.createSession(sessionId, meetingUrl, botName, instanceId, gatewayWsUrl, language, botAccountEmail, botAccountPassword, backgroundImageUrl);
+ onJoinRequest: async (sessionId, meetingUrl, botName, instanceId, gatewayWsUrl, language, botAccountEmail, botAccountPassword, transferMode) => {
+ await sessionManager.createSession(sessionId, meetingUrl, botName, instanceId, gatewayWsUrl, language, botAccountEmail, botAccountPassword, transferMode);
},
onLeaveRequest: async (sessionId) => {
await sessionManager.endSession(sessionId);
diff --git a/src/server/httpServer.ts b/src/server/httpServer.ts
index 818e36d..3296728 100644
--- a/src/server/httpServer.ts
+++ b/src/server/httpServer.ts
@@ -5,7 +5,7 @@ import { config } from '../config';
import { runAuthTests, runSingleVariant, getVariantIds } from '../bot/authTestProcedure';
export interface HttpServerCallbacks {
- onJoinRequest: (sessionId: string, meetingUrl: string, botName?: string, instanceId?: string, gatewayWsUrl?: string, language?: string, botAccountEmail?: string, botAccountPassword?: string, backgroundImageUrl?: string) => Promise;
+ onJoinRequest: (sessionId: string, meetingUrl: string, botName?: string, instanceId?: string, gatewayWsUrl?: string, language?: string, botAccountEmail?: string, botAccountPassword?: string, transferMode?: string) => Promise;
onLeaveRequest: (sessionId: string) => Promise;
onStatusRequest: (sessionId: string) => { state: string; error?: string } | null;
}
@@ -78,14 +78,14 @@ export class HttpServer {
// Deploy a new bot
this._app.post('/api/bot', async (req: Request, res: Response) => {
try {
- const { sessionId, meetingUrl, botName, instanceId, gatewayWsUrl, language, botAccountEmail, botAccountPassword, backgroundImageUrl } = req.body;
+ const { sessionId, meetingUrl, botName, instanceId, gatewayWsUrl, language, botAccountEmail, botAccountPassword, transferMode } = req.body;
if (!sessionId || !meetingUrl) {
res.status(400).json({ error: 'Missing required fields: sessionId, meetingUrl' });
return;
}
- await this._callbacks.onJoinRequest(sessionId, meetingUrl, botName, instanceId, gatewayWsUrl, language, botAccountEmail, botAccountPassword, backgroundImageUrl);
+ await this._callbacks.onJoinRequest(sessionId, meetingUrl, botName, instanceId, gatewayWsUrl, language, botAccountEmail, botAccountPassword, transferMode);
res.json({
success: true,
diff --git a/src/sessionManager.ts b/src/sessionManager.ts
index 4b48d5d..81af19c 100644
--- a/src/sessionManager.ts
+++ b/src/sessionManager.ts
@@ -41,7 +41,7 @@ export class SessionManager {
language?: string,
botAccountEmail?: string,
botAccountPassword?: string,
- backgroundImageUrl?: string
+ transferMode?: string,
): Promise {
if (this._sessions.has(sessionId)) {
logger.warn(`Session ${sessionId} already exists`);
@@ -75,7 +75,7 @@ export class SessionManager {
language: language,
botAccountEmail: botAccountEmail,
botAccountPassword: botAccountPassword,
- backgroundImageUrl: backgroundImageUrl,
+ transferMode: transferMode,
};
const orchestrator = new BotOrchestrator(
diff --git a/src/types/index.ts b/src/types/index.ts
index 415e789..2d5033e 100644
--- a/src/types/index.ts
+++ b/src/types/index.ts
@@ -55,8 +55,19 @@ export interface SendChatMessage {
text: string;
}
+export interface AudioChunkMessage {
+ type: 'audioChunk';
+ sessionId: string;
+ audio: {
+ format: 'pcm16' | 'opus';
+ sampleRate: number;
+ data: string; // base64 encoded
+ timestamp: string;
+ };
+}
+
export type GatewayToBot = PlayAudioMessage | JoinMeetingMessage | LeaveMeetingMessage | SendChatMessage;
-export type BotToGateway = TranscriptMessage | StatusMessage | ChatMessage;
+export type BotToGateway = TranscriptMessage | StatusMessage | ChatMessage | AudioChunkMessage;
// Bot State
export type BotState =