diff --git a/src/bot/captionsProcedure.ts b/src/bot/captionsProcedure.ts index ccd7cd6..bba1629 100644 --- a/src/bot/captionsProcedure.ts +++ b/src/bot/captionsProcedure.ts @@ -20,20 +20,23 @@ export class CaptionsProcedure { private _onTranscript: (entry: TranscriptEntry) => void; private _isSubscribed: boolean = false; private _lastCaptionText: string = ''; + private _language: string; constructor( page: Page, logger: Logger, - onTranscript: (entry: TranscriptEntry) => void + onTranscript: (entry: TranscriptEntry) => void, + language?: string ) { this._page = page; this._logger = logger; this._onTranscript = onTranscript; + this._language = language || 'de-DE'; } /** * Enable live captions in the meeting. - * Opens the "More" menu and clicks the captions button. + * Opens the "More" menu and clicks the captions button, then sets the spoken language. */ async enableCaptionsFlow(): Promise { this._logger.info('Enabling live captions...'); @@ -48,6 +51,9 @@ export class CaptionsProcedure { await this._waitForCaptionsContainer(); this._logger.info('Live captions enabled'); + + // Set the spoken language (Teams defaults to English for anonymous users) + await this._setSpokenLanguage(); } /** @@ -155,6 +161,207 @@ export class CaptionsProcedure { } } + /** + * Set the spoken language for captions. + * + * Teams defaults to English for anonymous users. This method attempts to + * change the "Meeting spoken language" to the configured language (e.g. "de-DE"). + * + * Flow (per Microsoft docs): + * 1. Click "Caption settings" (gear/settings icon near captions area) + * 2. Click "Language settings" + * 3. Change "Meeting spoken language" dropdown + * 4. Click "Update" + * + * Note: Changing spoken language affects ALL meeting participants. + */ + private async _setSpokenLanguage(): Promise { + // Map BCP-47 codes to Teams display names for the spoken language dropdown + const languageDisplayNames: Record = { + 'de-DE': ['German (Germany)', 'Deutsch (Deutschland)', 'German'], + 'de-CH': ['German (Switzerland)', 'Deutsch (Schweiz)', 'German'], + 'en-US': ['English (United States)', 'English (US)', 'English'], + 'en-GB': ['English (United Kingdom)', 'English (UK)'], + 'fr-FR': ['French (France)', 'Français (France)', 'French'], + 'fr-CH': ['French (Switzerland)', 'Français (Suisse)'], + 'it-IT': ['Italian (Italy)', 'Italiano (Italia)', 'Italian'], + }; + + const targetNames = languageDisplayNames[this._language] || [this._language]; + this._logger.info(`Setting spoken language to: ${this._language} (looking for: ${targetNames.join(', ')})`); + + try { + // Wait a moment for the captions UI to stabilize + await this._page.waitForTimeout(2000); + + // Strategy 1: Try "Caption settings" button near the captions area + // This is typically a gear icon or "..." button in the captions banner + const captionSettingsSelectors = [ + 'button[aria-label*="Caption settings"]', + 'button[aria-label*="caption settings"]', + 'button[aria-label*="Captions settings"]', + 'button[data-tid="caption-settings-button"]', + 'button[id="caption-settings-button"]', + ]; + + let settingsOpened = false; + for (const selector of captionSettingsSelectors) { + try { + const button = await this._page.$(selector); + if (button) { + await button.click(); + this._logger.info(`Clicked caption settings: ${selector}`); + settingsOpened = true; + await this._page.waitForTimeout(1000); + break; + } + } catch { + // Continue + } + } + + // Strategy 2: If no caption settings button found, try More menu > Language and speech + if (!settingsOpened) { + this._logger.info('Caption settings button not found, trying More menu > Language and speech...'); + + await this._openMoreMenu(); + await this._page.waitForTimeout(500); + + // Look for "Language and speech" or "Spoken language" menu item + const languageMenuSelectors = [ + ':has-text("Language and speech")', + ':has-text("Spoken language")', + ':has-text("Sprache und Spracheingabe")', + '[data-tid="language-and-speech-button"]', + 'button:has-text("Language")', + ]; + + for (const selector of languageMenuSelectors) { + try { + const item = await this._page.$(selector); + if (item) { + await item.click(); + this._logger.info(`Clicked language menu: ${selector}`); + settingsOpened = true; + await this._page.waitForTimeout(1000); + break; + } + } catch { + // Continue + } + } + } + + if (!settingsOpened) { + this._logger.warn('Could not open language settings - captions will use default language (English)'); + return; + } + + // Now look for the "Language settings" sub-option if needed + const langSettingsSelectors = [ + ':has-text("Language settings")', + ':has-text("Spracheinstellungen")', + 'button:has-text("Language settings")', + ]; + + for (const selector of langSettingsSelectors) { + try { + const item = await this._page.$(selector); + if (item) { + await item.click(); + this._logger.info(`Clicked language settings: ${selector}`); + await this._page.waitForTimeout(1000); + break; + } + } catch { + // Continue - might already be on the language settings page + } + } + + // Look for the spoken language dropdown + const dropdownSelectors = [ + 'select[aria-label*="spoken language" i]', + 'select[aria-label*="Meeting spoken language" i]', + '[data-tid="spoken-language-dropdown"]', + 'div[role="listbox"]', + 'select', // Generic fallback + ]; + + for (const selector of dropdownSelectors) { + try { + const dropdown = await this._page.$(selector); + if (dropdown) { + const tagName = await dropdown.evaluate(el => el.tagName.toLowerCase()); + + if (tagName === 'select') { + // Native select element - try to select by text + for (const name of targetNames) { + try { + await this._page.selectOption(selector, { label: name }); + this._logger.info(`Selected spoken language: ${name}`); + break; + } catch { + // Try next name variant + } + } + } else { + // Fluent UI dropdown - click and select from options + await dropdown.click(); + await this._page.waitForTimeout(500); + + for (const name of targetNames) { + try { + const option = await this._page.$(`[role="option"]:has-text("${name}")`); + if (option) { + await option.click(); + this._logger.info(`Selected spoken language: ${name}`); + break; + } + } catch { + // Try next name variant + } + } + } + break; + } + } catch { + // Continue + } + } + + // Click "Update" or "Apply" button + const updateSelectors = [ + 'button:has-text("Update")', + 'button:has-text("Apply")', + 'button:has-text("Aktualisieren")', + 'button:has-text("Übernehmen")', + 'button[data-tid="language-update-button"]', + ]; + + for (const selector of updateSelectors) { + try { + const button = await this._page.$(selector); + if (button) { + await button.click(); + this._logger.info(`Clicked update button: ${selector}`); + await this._page.waitForTimeout(1000); + break; + } + } catch { + // Continue + } + } + + // Close any open dialogs/menus + await this._page.keyboard.press('Escape'); + this._logger.info('Spoken language setting attempt completed'); + + } catch (error) { + this._logger.warn(`Could not set spoken language to ${this._language}: ${error}`); + // Not fatal - captions will still work, just in the wrong language + } + } + /** * Start watching the captions DOM for updates using Recall.ai's approach. * @@ -278,6 +485,8 @@ export class CaptionsProcedure { this._lastCaptionText = caption.text; + this._logger.info(`Caption: [${caption.speaker}] ${caption.text}`); + this._onTranscript({ speaker: caption.speaker, text: caption.text, diff --git a/src/bot/orchestrator.ts b/src/bot/orchestrator.ts index a45ee1b..5ed387a 100644 --- a/src/bot/orchestrator.ts +++ b/src/bot/orchestrator.ts @@ -22,6 +22,7 @@ export interface OrchestratorCallbacks { export interface OrchestratorOptions { gatewayWsUrl: string; instanceId: string; + language?: string; } /** @@ -326,12 +327,17 @@ export class BotOrchestrator { // Initialize procedures this._joinProcedure = new JoinProcedure(this._page, this._logger, this._botName); - this._captionsProcedure = new CaptionsProcedure(this._page, this._logger, (entry) => { - // Send transcript to Gateway - this._sendTranscript(entry.speaker, entry.text, entry.isFinal); - // Also notify local callbacks - this._callbacks.onTranscript(entry); - }); + this._captionsProcedure = new CaptionsProcedure( + this._page, + this._logger, + (entry) => { + // Send transcript to Gateway + this._sendTranscript(entry.speaker, entry.text, entry.isFinal); + // Also notify local callbacks + this._callbacks.onTranscript(entry); + }, + this._options.language + ); this._audioProcedure = new AudioProcedure(this._page, this._logger); // Handle page errors diff --git a/src/index.ts b/src/index.ts index 70a81d0..064a951 100644 --- a/src/index.ts +++ b/src/index.ts @@ -19,8 +19,8 @@ async function main(): Promise { // Start HTTP server httpServer = new HttpServer({ - onJoinRequest: async (sessionId, meetingUrl, botName, instanceId, gatewayWsUrl) => { - await sessionManager.createSession(sessionId, meetingUrl, botName, instanceId, gatewayWsUrl); + onJoinRequest: async (sessionId, meetingUrl, botName, instanceId, gatewayWsUrl, language) => { + await sessionManager.createSession(sessionId, meetingUrl, botName, instanceId, gatewayWsUrl, language); }, onLeaveRequest: async (sessionId) => { await sessionManager.endSession(sessionId); diff --git a/src/server/httpServer.ts b/src/server/httpServer.ts index 8152ed1..0ef1645 100644 --- a/src/server/httpServer.ts +++ b/src/server/httpServer.ts @@ -4,7 +4,7 @@ import { logger } from '../utils/logger'; import { config } from '../config'; export interface HttpServerCallbacks { - onJoinRequest: (sessionId: string, meetingUrl: string, botName?: string, instanceId?: string, gatewayWsUrl?: string) => Promise; + onJoinRequest: (sessionId: string, meetingUrl: string, botName?: string, instanceId?: string, gatewayWsUrl?: string, language?: string) => Promise; onLeaveRequest: (sessionId: string) => Promise; onStatusRequest: (sessionId: string) => { state: string; error?: string } | null; } @@ -77,14 +77,14 @@ export class HttpServer { // Deploy a new bot this._app.post('/api/bot', async (req: Request, res: Response) => { try { - const { sessionId, meetingUrl, botName, instanceId, gatewayWsUrl } = req.body; + const { sessionId, meetingUrl, botName, instanceId, gatewayWsUrl, language } = req.body; if (!sessionId || !meetingUrl) { res.status(400).json({ error: 'Missing required fields: sessionId, meetingUrl' }); return; } - await this._callbacks.onJoinRequest(sessionId, meetingUrl, botName, instanceId, gatewayWsUrl); + await this._callbacks.onJoinRequest(sessionId, meetingUrl, botName, instanceId, gatewayWsUrl, language); res.json({ success: true, diff --git a/src/sessionManager.ts b/src/sessionManager.ts index e888113..2b2880d 100644 --- a/src/sessionManager.ts +++ b/src/sessionManager.ts @@ -30,13 +30,15 @@ export class SessionManager { * @param botName - Display name for the bot * @param instanceId - Feature instance ID (for Gateway routing) * @param gatewayWsUrl - Full WebSocket URL to connect back to Gateway (supports multi-instance) + * @param language - BCP-47 language code for captions spoken language (e.g. "de-DE") */ async createSession( sessionId: string, meetingUrl: string, botName?: string, instanceId?: string, - gatewayWsUrl?: string + gatewayWsUrl?: string, + language?: string ): Promise { if (this._sessions.has(sessionId)) { logger.warn(`Session ${sessionId} already exists`); @@ -64,6 +66,7 @@ export class SessionManager { const options: OrchestratorOptions = { gatewayWsUrl: gatewayWsUrl || config.gatewayWsUrl, instanceId: instanceId || 'default', + language: language, }; const orchestrator = new BotOrchestrator(