diff --git a/src/bot/audioCaptureProcedure.ts b/src/bot/audioCaptureProcedure.ts new file mode 100644 index 0000000..b81a57e --- /dev/null +++ b/src/bot/audioCaptureProcedure.ts @@ -0,0 +1,183 @@ +import { Page } from 'playwright'; +import { Logger } from 'winston'; + +/** + * Captures incoming meeting audio by intercepting WebRTC RTCPeerConnection. + * + * How it works: + * 1. Before page navigation, wraps window.RTCPeerConnection via addInitScript + * 2. When Teams establishes WebRTC connections, the wrapper intercepts incoming audio tracks + * 3. Incoming audio tracks are captured via AudioContext + ScriptProcessorNode + * 4. Audio chunks (PCM16, 16kHz mono) are buffered and exposed via a global callback + * 5. The Node.js side polls for chunks and sends them to the Gateway + */ +export class AudioCaptureProcedure { + private _page: Page; + private _logger: Logger; + private _onAudioChunk: (base64Data: string, sampleRate: number) => void; + private _isCapturing: boolean = false; + private _pollInterval: ReturnType | null = null; + private _injected: boolean = false; + + constructor( + page: Page, + logger: Logger, + onAudioChunk: (base64Data: string, sampleRate: number) => void, + ) { + this._page = page; + this._logger = logger; + this._onAudioChunk = onAudioChunk; + } + + /** + * Inject the RTCPeerConnection wrapper BEFORE any page navigation. + * Must be called before navigating to Teams. + */ + async injectCaptureOverride(): Promise { + if (this._injected) return; + + this._logger.info('[AudioCapture] Injecting RTCPeerConnection wrapper...'); + + await this._page.addInitScript(() => { + // Audio chunk buffer — Node.js polls this periodically + (window as any).__audioCaptureChunks = [] as string[]; + (window as any).__audioCaptureActive = false; + + const OrigRTC = window.RTCPeerConnection; + + // @ts-ignore — wrapping constructor + window.RTCPeerConnection = function (this: RTCPeerConnection, ...args: any[]) { + const pc = new OrigRTC(...args); + + pc.addEventListener('track', (event: RTCTrackEvent) => { + if (event.track.kind !== 'audio') return; + if ((window as any).__audioCaptureActive) return; + (window as any).__audioCaptureActive = true; + + try { + const AudioCtx = window.AudioContext || (window as any).webkitAudioContext; + const ctx = new AudioCtx({ sampleRate: 16000 }); + const stream = new MediaStream([event.track]); + const source = ctx.createMediaStreamSource(stream); + + // ScriptProcessor for raw PCM access (deprecated but widely supported) + const processor = ctx.createScriptProcessor(4096, 1, 1); + let chunkBuffer: Float32Array[] = []; + let samplesCollected = 0; + const samplesPerChunk = 16000; // 1 second of audio at 16kHz + + processor.onaudioprocess = (e: AudioProcessingEvent) => { + const input = e.inputBuffer.getChannelData(0); + chunkBuffer.push(new Float32Array(input)); + samplesCollected += input.length; + + if (samplesCollected >= samplesPerChunk) { + // Merge buffers into one Float32Array + const merged = new Float32Array(samplesCollected); + let offset = 0; + for (const buf of chunkBuffer) { + merged.set(buf, offset); + offset += buf.length; + } + + // Convert Float32 [-1,1] to PCM16 Int16 + const pcm16 = new Int16Array(merged.length); + for (let i = 0; i < merged.length; i++) { + const s = Math.max(-1, Math.min(1, merged[i])); + pcm16[i] = s < 0 ? s * 0x8000 : s * 0x7FFF; + } + + // Convert to base64 + const bytes = new Uint8Array(pcm16.buffer); + let binary = ''; + for (let i = 0; i < bytes.length; i++) { + binary += String.fromCharCode(bytes[i]); + } + const base64 = btoa(binary); + + // Push to buffer for Node.js to poll + const chunks = (window as any).__audioCaptureChunks as string[]; + if (chunks.length < 30) { + chunks.push(base64); + } + + chunkBuffer = []; + samplesCollected = 0; + } + }; + + source.connect(processor); + processor.connect(ctx.destination); + + // Store references for cleanup + (window as any).__audioCaptureCtx = ctx; + (window as any).__audioCaptureProcessor = processor; + + console.log('[AudioCapture] WebRTC audio track intercepted, capturing at 16kHz mono'); + } catch (err) { + console.error('[AudioCapture] Failed to set up audio capture:', err); + } + }); + + return pc; + } as any; + + // Copy static properties + window.RTCPeerConnection.prototype = OrigRTC.prototype; + Object.setPrototypeOf(window.RTCPeerConnection, OrigRTC); + }); + + this._injected = true; + this._logger.info('[AudioCapture] RTCPeerConnection wrapper injected'); + } + + /** + * Start polling for captured audio chunks and forwarding them to the callback. + */ + async startCapture(): Promise { + if (this._isCapturing) return; + this._isCapturing = true; + + this._logger.info('[AudioCapture] Starting audio chunk polling...'); + + this._pollInterval = setInterval(async () => { + try { + const chunks = await this._page.evaluate(() => { + const buf = (window as any).__audioCaptureChunks as string[]; + const result = buf.splice(0, buf.length); + return result; + }); + + for (const chunk of chunks) { + this._onAudioChunk(chunk, 16000); + } + } catch { + // Page might be navigating or closed + } + }, 500); + } + + /** + * Stop capturing audio. + */ + async stopCapture(): Promise { + this._isCapturing = false; + + if (this._pollInterval) { + clearInterval(this._pollInterval); + this._pollInterval = null; + } + + try { + await this._page.evaluate(() => { + (window as any).__audioCaptureActive = false; + const ctx = (window as any).__audioCaptureCtx as AudioContext; + if (ctx) ctx.close(); + }); + } catch { + // Page might already be closed + } + + this._logger.info('[AudioCapture] Audio capture stopped'); + } +} diff --git a/src/bot/orchestrator.ts b/src/bot/orchestrator.ts index 4f35e00..75de834 100644 --- a/src/bot/orchestrator.ts +++ b/src/bot/orchestrator.ts @@ -7,11 +7,13 @@ import WebSocket from 'ws'; import { config } from '../config'; import { createSessionLogger } from '../utils/logger'; -import { BotSession, BotState, TranscriptEntry, StatusMessage, TranscriptMessage, PlayAudioMessage, ChatMessage, SendChatMessage } from '../types'; +import { BotSession, BotState, TranscriptEntry, StatusMessage, TranscriptMessage, PlayAudioMessage, ChatMessage, SendChatMessage, AudioChunkMessage } from '../types'; import { JoinProcedure } from './joinProcedure'; import { CaptionsProcedure } from './captionsProcedure'; import { AudioProcedure } from './audioProcedure'; +import { AudioCaptureProcedure } from './audioCaptureProcedure'; import { ChatProcedure, ChatMessageEntry } from './chatProcedure'; +import { AuthProcedure } from './authProcedure'; import { isValidMeetingUrl } from './meetingUrlParser'; export interface OrchestratorCallbacks { @@ -26,7 +28,7 @@ export interface OrchestratorOptions { language?: string; botAccountEmail?: string; botAccountPassword?: string; - backgroundImageUrl?: string; + transferMode?: string; } /** @@ -57,6 +59,7 @@ export class BotOrchestrator { private _joinProcedure: JoinProcedure | null = null; private _captionsProcedure: CaptionsProcedure | null = null; private _audioProcedure: AudioProcedure | null = null; + private _audioCaptureProcedure: AudioCaptureProcedure | null = null; private _chatProcedure: ChatProcedure | null = null; private _state: BotState = 'idle'; @@ -87,10 +90,7 @@ export class BotOrchestrator { /** * Start the bot - connect to Gateway, launch browser, join meeting, enable captions. - * - * NOTE: Authentication is disabled. The bot always joins as an anonymous guest - * with the configured bot name (typically the system bot's display name, e.g. "Nyla Larsson"). - * See Teamsbot-Auth-Join-Learnings.md for details on why and how to re-enable. + * Chooses between anonymous join and authenticated join based on credentials. */ async start(): Promise { if (!isValidMeetingUrl(this._meetingUrl)) { @@ -103,8 +103,15 @@ export class BotOrchestrator { // Connect to Gateway WebSocket first await this._connectToGateway(); - // Join meeting as anonymous guest with configured bot name - await this._attemptJoin(); + // Choose join method based on credentials + const hasCredentials = !!(this._options.botAccountEmail && this._options.botAccountPassword); + if (hasCredentials) { + this._logger.info(`Authenticated join as: ${this._options.botAccountEmail}`); + await this._attemptAuthJoin(); + } else { + this._logger.info('Anonymous join with bot name: ' + this._botName); + await this._attemptJoin(); + } } catch (error) { this._logger.error('Error starting bot:', error); @@ -116,11 +123,6 @@ export class BotOrchestrator { /** * Join a meeting as anonymous guest with the configured bot name. - * - * NOTE: Authentication is disabled. See Teamsbot-Auth-Join-Learnings.md. - * The bot name (e.g. "Nyla Larsson") comes from the system bot's display name, - * configured in the Gateway. This provides a consistent identity without - * requiring Microsoft authentication. */ private async _attemptJoin(): Promise { // Launch browser @@ -150,16 +152,197 @@ export class BotOrchestrator { // Dismiss any post-join permission modals (e.g. "Manage windows on all displays") await this._joinProcedure!.dismissBrowserPermissionModals(); - // Initialize audio + // Initialize audio playback await this._audioProcedure!.initialize(); - // Enable and subscribe to captions - await this._enableCaptions(); + // Enable transcript capture (captions or audio based on transferMode) + await this._enableTranscriptCapture(); // Enable chat monitoring await this._enableChat(); } + /** + * Join a meeting as authenticated user (System Bot or User Account). + * Flow: teams.microsoft.com → MS Login → Teams Chat → Join → Pre-Join → Join now + */ + private async _attemptAuthJoin(): Promise { + // Launch browser in headful mode with minimal args (Chromium Minimal) + await this._launchBrowser(true); + + this._setState('navigating'); + + // STEP 1: Navigate to teams.microsoft.com + this._logger.info('Auth join: navigating to teams.microsoft.com'); + await this._page!.goto('https://teams.microsoft.com', { + waitUntil: 'domcontentloaded', + timeout: 30000, + }); + + // Wait for login redirect + try { + await this._page!.waitForURL('**/login.microsoftonline.com/**', { timeout: 30000 }); + this._logger.info('Redirected to MS login page'); + } catch { + this._logger.warn(`No login redirect, current URL: ${this._page!.url().substring(0, 150)}`); + } + + // Wait for login page to render + try { + await this._page!.waitForSelector('input[name="loginfmt"], input[type="email"]', { + timeout: 15000, state: 'visible', + }); + } catch { + this._logger.warn('Login page elements not found'); + } + + // STEP 2: Microsoft Authentication + this._logger.info(`Authenticating as ${this._options.botAccountEmail}`); + const authProcedure = new AuthProcedure(this._page!, this._logger); + const authSuccess = await authProcedure.authenticateWithMicrosoft( + this._options.botAccountEmail!, + this._options.botAccountPassword!, + true, + ); + + if (!authSuccess) { + throw new Error('Microsoft authentication failed'); + } + this._logger.info('Authentication successful'); + + // STEP 3: Wait for Teams chat page (landing page after auth) + try { + await this._page!.waitForURL('**/teams.microsoft.com/**', { timeout: 30000 }); + } catch { + // Also accept teams.cloud.microsoft + try { + await this._page!.waitForURL('**/teams.cloud.microsoft/**', { timeout: 10000 }); + } catch { + this._logger.warn(`Unexpected URL after auth: ${this._page!.url().substring(0, 150)}`); + } + } + + // Wait for "Join" button in chat header + try { + await this._page!.waitForSelector( + 'button[data-tid="chat-join-button"], button[data-tid="join-call-button"]', + { timeout: 30000, state: 'visible' }, + ); + this._logger.info('Teams chat page loaded, "Join" button found'); + } catch { + this._logger.warn('"Join" button not found in chat header'); + await this._takeScreenshot('auth-no-join-button'); + } + + // STEP 4: Click "Join" in chat header → Pre-Join screen + this._logger.info('Clicking "Join" in chat header'); + const chatJoinSelectors = [ + 'button[data-tid="chat-join-button"]', + 'button[data-tid="join-call-button"]', + ]; + + let chatJoinClicked = false; + for (const selector of chatJoinSelectors) { + try { + const btn = await this._page!.waitForSelector(selector, { timeout: 5000, state: 'visible' }); + if (btn) { + await btn.click(); + chatJoinClicked = true; + break; + } + } catch { /* try next */ } + } + + if (!chatJoinClicked) { + throw new Error('"Join" button in chat header not found'); + } + + // STEP 5: Pre-Join screen → Click "Join now" + this._logger.info('Waiting for pre-join screen'); + try { + await this._page!.waitForSelector( + 'button:has-text("Join now"), button:has-text("Jetzt teilnehmen"), button[data-tid="prejoin-join-button"]', + { timeout: 30000, state: 'visible' }, + ); + } catch { + this._logger.warn('"Join now" button not found'); + await this._takeScreenshot('auth-no-join-now'); + } + + // Activate camera toggle if it's off (so background image is visible) + await this._ensureCameraOn(); + + await this._page!.waitForTimeout(2000); + + const joinNowSelectors = [ + 'button:has-text("Join now")', + 'button:has-text("Jetzt teilnehmen")', + 'button[data-tid="prejoin-join-button"]', + ]; + + let joinNowClicked = false; + for (const selector of joinNowSelectors) { + try { + const btn = await this._page!.waitForSelector(selector, { timeout: 5000, state: 'visible' }); + if (btn) { + await btn.click(); + joinNowClicked = true; + break; + } + } catch { /* try next */ } + } + + if (!joinNowClicked) { + throw new Error('"Join now" button not found on pre-join screen'); + } + + this._logger.info('Clicked "Join now", waiting for meeting'); + + // Wait for meeting admission (hangup button = in meeting) + await this._waitForMeetingAdmission(); + + this._setState('in_meeting'); + this._logger.info(`Bot joined the meeting (authenticated as ${this._options.botAccountEmail})`); + + // Initialize audio playback + await this._audioProcedure!.initialize(); + + // Enable transcript capture (captions or audio based on transferMode) + await this._enableTranscriptCapture(); + await this._enableChat(); + } + + /** + * Ensure the camera is turned on in the pre-join screen. + * When camera is on, Teams shows the profile/background image. + */ + private async _ensureCameraOn(): Promise { + try { + // Look for camera toggle button + const cameraBtn = await this._page!.$('button[data-tid="toggle-video"], button[aria-label*="camera" i], button[aria-label*="Camera" i], button[aria-label*="Video" i]'); + if (cameraBtn) { + // Check if camera is currently off (aria-pressed="false" or similar) + const isOff = await cameraBtn.evaluate((el) => { + return el.getAttribute('aria-pressed') === 'false' || + el.getAttribute('aria-checked') === 'false' || + el.classList.contains('is-off') || + el.querySelector('.fui-Icon-regular') !== null; + }); + if (isOff) { + await cameraBtn.click(); + this._logger.info('Camera toggled ON'); + await this._page!.waitForTimeout(1000); + } else { + this._logger.info('Camera already ON'); + } + } else { + this._logger.warn('Camera toggle button not found'); + } + } catch (err) { + this._logger.warn('Could not toggle camera:', err); + } + } + /** * Connect to the Gateway WebSocket for this session. */ @@ -288,6 +471,9 @@ export class BotOrchestrator { url = `${this._httpBaseUrl}/bot/transcript/${this._sessionId}`; } else if (msgType === 'status') { url = `${this._httpBaseUrl}/bot/status/${this._sessionId}`; + } else if (msgType === 'audioChunk') { + // Audio chunks are too frequent for HTTP — only send via WebSocket + return; } else { this._logger.debug(`HTTP fallback: unsupported message type ${msgType}`); return; @@ -351,6 +537,11 @@ export class BotOrchestrator { try { this._setState('leaving'); + // Stop audio capture + if (this._audioCaptureProcedure) { + await this._audioCaptureProcedure.stopCapture(); + } + // Unsubscribe from captions and chat if (this._captionsProcedure) { await this._captionsProcedure.unsubscribe(); @@ -359,7 +550,7 @@ export class BotOrchestrator { await this._chatProcedure.unsubscribe(); } - // Clean up audio + // Clean up audio playback if (this._audioProcedure) { await this._audioProcedure.cleanup(); } @@ -403,20 +594,30 @@ export class BotOrchestrator { /** * Launch the browser and create a new page. + * @param authMode - If true, use headful + minimal args (Chromium Minimal, proven to work for auth) */ - private async _launchBrowser(): Promise { - this._logger.info('Launching browser...'); + private async _launchBrowser(authMode: boolean = false): Promise { + this._logger.info(`Launching browser (authMode=${authMode})...`); + + const args = authMode + ? [ + // Chromium Minimal: only --no-sandbox + fake media (proven to work for authenticated join) + '--no-sandbox', + '--use-fake-ui-for-media-stream', + '--use-fake-device-for-media-stream', + ] + : [ + '--use-fake-ui-for-media-stream', + '--use-fake-device-for-media-stream', + '--disable-web-security', + '--disable-features=IsolateOrigins,site-per-process', + '--autoplay-policy=no-user-gesture-required', + '--disable-blink-features=AutomationControlled', + ]; this._browser = await chromium.launch({ - headless: config.botHeadless, - args: [ - '--use-fake-ui-for-media-stream', // Auto-accept media permissions - '--use-fake-device-for-media-stream', // Provide fake camera/mic so Teams sees devices - '--disable-web-security', - '--disable-features=IsolateOrigins,site-per-process', - '--autoplay-policy=no-user-gesture-required', - '--disable-blink-features=AutomationControlled', // Prevent navigator.webdriver=true - ], + headless: authMode ? false : config.botHeadless, + args, }); this._context = await this._browser.newContext({ @@ -463,8 +664,15 @@ export class BotOrchestrator { if (!window.chrome.runtime) { window.chrome.runtime = {}; } }); - // Initialize procedures (always anonymous join) + // Initialize procedures this._joinProcedure = new JoinProcedure(this._page, this._logger, this._botName); + this._audioCaptureProcedure = new AudioCaptureProcedure( + this._page, + this._logger, + (base64Data, sampleRate) => { + this._sendAudioChunk(base64Data, sampleRate); + }, + ); this._captionsProcedure = new CaptionsProcedure( this._page, this._logger, @@ -496,6 +704,12 @@ export class BotOrchestrator { // This ensures Teams gets our controlled audio stream when it calls getUserMedia await this._audioProcedure.injectAudioOverride(); + // Inject audio capture (WebRTC interception) if transfer mode requires it + const transferMode = this._getEffectiveTransferMode(); + if (transferMode === 'audio') { + await this._audioCaptureProcedure!.injectCaptureOverride(); + } + // Handle page errors this._page.on('pageerror', (error) => { this._logger.error('Page error:', error); @@ -584,6 +798,19 @@ export class BotOrchestrator { throw new Error('Timeout waiting to be admitted from lobby'); } + /** + * Determine the effective transfer mode based on config and join mode. + * auto: anonymous → audio, authenticated → caption + */ + private _getEffectiveTransferMode(): 'caption' | 'audio' { + const mode = this._options.transferMode || 'auto'; + if (mode === 'caption') return 'caption'; + if (mode === 'audio') return 'audio'; + // auto: use audio for anonymous (Teams only provides English captions), caption for auth + const isAuth = !!(this._options.botAccountEmail && this._options.botAccountPassword); + return isAuth ? 'caption' : 'audio'; + } + /** * Enable captions and start scraping. */ @@ -594,7 +821,36 @@ export class BotOrchestrator { this._logger.info('Captions enabled and subscribed'); } catch (error) { this._logger.warn('Could not enable captions:', error); - // Continue without captions - not a fatal error + } + } + + /** + * Enable audio capture from meeting participants. + */ + private async _enableAudioCapture(): Promise { + if (!this._audioCaptureProcedure) { + this._logger.warn('Audio capture procedure not initialized'); + return; + } + try { + await this._audioCaptureProcedure.startCapture(); + this._logger.info('Audio capture started (PCM16 16kHz mono)'); + } catch (error) { + this._logger.warn('Could not start audio capture:', error); + } + } + + /** + * Enable transcript capture (captions or audio) based on transfer mode. + */ + private async _enableTranscriptCapture(): Promise { + const transferMode = this._getEffectiveTransferMode(); + this._logger.info(`Transfer mode: ${transferMode} (configured: ${this._options.transferMode || 'auto'})`); + + if (transferMode === 'caption') { + await this._enableCaptions(); + } else { + await this._enableAudioCapture(); } } @@ -628,6 +884,23 @@ export class BotOrchestrator { this._sendToGateway(message); } + /** + * Send an audio chunk to the Gateway for STT processing. + */ + private _sendAudioChunk(base64Data: string, sampleRate: number): void { + const message: AudioChunkMessage = { + type: 'audioChunk', + sessionId: this._sessionId, + audio: { + format: 'pcm16', + sampleRate, + data: base64Data, + timestamp: new Date().toISOString(), + }, + }; + this._sendToGateway(message); + } + /** * Send a text message to the meeting chat. */ diff --git a/src/index.ts b/src/index.ts index 9551dbe..1bfe959 100644 --- a/src/index.ts +++ b/src/index.ts @@ -19,8 +19,8 @@ async function main(): Promise { // Start HTTP server httpServer = new HttpServer({ - onJoinRequest: async (sessionId, meetingUrl, botName, instanceId, gatewayWsUrl, language, botAccountEmail, botAccountPassword, backgroundImageUrl) => { - await sessionManager.createSession(sessionId, meetingUrl, botName, instanceId, gatewayWsUrl, language, botAccountEmail, botAccountPassword, backgroundImageUrl); + onJoinRequest: async (sessionId, meetingUrl, botName, instanceId, gatewayWsUrl, language, botAccountEmail, botAccountPassword, transferMode) => { + await sessionManager.createSession(sessionId, meetingUrl, botName, instanceId, gatewayWsUrl, language, botAccountEmail, botAccountPassword, transferMode); }, onLeaveRequest: async (sessionId) => { await sessionManager.endSession(sessionId); diff --git a/src/server/httpServer.ts b/src/server/httpServer.ts index 818e36d..3296728 100644 --- a/src/server/httpServer.ts +++ b/src/server/httpServer.ts @@ -5,7 +5,7 @@ import { config } from '../config'; import { runAuthTests, runSingleVariant, getVariantIds } from '../bot/authTestProcedure'; export interface HttpServerCallbacks { - onJoinRequest: (sessionId: string, meetingUrl: string, botName?: string, instanceId?: string, gatewayWsUrl?: string, language?: string, botAccountEmail?: string, botAccountPassword?: string, backgroundImageUrl?: string) => Promise; + onJoinRequest: (sessionId: string, meetingUrl: string, botName?: string, instanceId?: string, gatewayWsUrl?: string, language?: string, botAccountEmail?: string, botAccountPassword?: string, transferMode?: string) => Promise; onLeaveRequest: (sessionId: string) => Promise; onStatusRequest: (sessionId: string) => { state: string; error?: string } | null; } @@ -78,14 +78,14 @@ export class HttpServer { // Deploy a new bot this._app.post('/api/bot', async (req: Request, res: Response) => { try { - const { sessionId, meetingUrl, botName, instanceId, gatewayWsUrl, language, botAccountEmail, botAccountPassword, backgroundImageUrl } = req.body; + const { sessionId, meetingUrl, botName, instanceId, gatewayWsUrl, language, botAccountEmail, botAccountPassword, transferMode } = req.body; if (!sessionId || !meetingUrl) { res.status(400).json({ error: 'Missing required fields: sessionId, meetingUrl' }); return; } - await this._callbacks.onJoinRequest(sessionId, meetingUrl, botName, instanceId, gatewayWsUrl, language, botAccountEmail, botAccountPassword, backgroundImageUrl); + await this._callbacks.onJoinRequest(sessionId, meetingUrl, botName, instanceId, gatewayWsUrl, language, botAccountEmail, botAccountPassword, transferMode); res.json({ success: true, diff --git a/src/sessionManager.ts b/src/sessionManager.ts index 4b48d5d..81af19c 100644 --- a/src/sessionManager.ts +++ b/src/sessionManager.ts @@ -41,7 +41,7 @@ export class SessionManager { language?: string, botAccountEmail?: string, botAccountPassword?: string, - backgroundImageUrl?: string + transferMode?: string, ): Promise { if (this._sessions.has(sessionId)) { logger.warn(`Session ${sessionId} already exists`); @@ -75,7 +75,7 @@ export class SessionManager { language: language, botAccountEmail: botAccountEmail, botAccountPassword: botAccountPassword, - backgroundImageUrl: backgroundImageUrl, + transferMode: transferMode, }; const orchestrator = new BotOrchestrator( diff --git a/src/types/index.ts b/src/types/index.ts index 415e789..2d5033e 100644 --- a/src/types/index.ts +++ b/src/types/index.ts @@ -55,8 +55,19 @@ export interface SendChatMessage { text: string; } +export interface AudioChunkMessage { + type: 'audioChunk'; + sessionId: string; + audio: { + format: 'pcm16' | 'opus'; + sampleRate: number; + data: string; // base64 encoded + timestamp: string; + }; +} + export type GatewayToBot = PlayAudioMessage | JoinMeetingMessage | LeaveMeetingMessage | SendChatMessage; -export type BotToGateway = TranscriptMessage | StatusMessage | ChatMessage; +export type BotToGateway = TranscriptMessage | StatusMessage | ChatMessage | AudioChunkMessage; // Bot State export type BotState =