feat: Add spoken language config for captions + caption event logging

- Accept language param from gateway (de-DE, en-US, etc.) in join request
- Pass language through: httpServer -> sessionManager -> orchestrator -> captionsProcedure
- After enabling captions, attempt to change Teams spoken language via UI
  (Caption settings > Language settings > spoken language dropdown)
- Add per-caption log line for debugging

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
ValueOn AG 2026-02-15 01:25:45 +01:00
parent 36bf5269ac
commit d8c0331921
5 changed files with 232 additions and 14 deletions

View file

@ -20,20 +20,23 @@ export class CaptionsProcedure {
private _onTranscript: (entry: TranscriptEntry) => void;
private _isSubscribed: boolean = false;
private _lastCaptionText: string = '';
private _language: string;
constructor(
page: Page,
logger: Logger,
onTranscript: (entry: TranscriptEntry) => void
onTranscript: (entry: TranscriptEntry) => void,
language?: string
) {
this._page = page;
this._logger = logger;
this._onTranscript = onTranscript;
this._language = language || 'de-DE';
}
/**
* Enable live captions in the meeting.
* Opens the "More" menu and clicks the captions button.
* Opens the "More" menu and clicks the captions button, then sets the spoken language.
*/
async enableCaptionsFlow(): Promise<void> {
this._logger.info('Enabling live captions...');
@ -48,6 +51,9 @@ export class CaptionsProcedure {
await this._waitForCaptionsContainer();
this._logger.info('Live captions enabled');
// Set the spoken language (Teams defaults to English for anonymous users)
await this._setSpokenLanguage();
}
/**
@ -155,6 +161,207 @@ export class CaptionsProcedure {
}
}
/**
* Set the spoken language for captions.
*
* Teams defaults to English for anonymous users. This method attempts to
* change the "Meeting spoken language" to the configured language (e.g. "de-DE").
*
* Flow (per Microsoft docs):
* 1. Click "Caption settings" (gear/settings icon near captions area)
* 2. Click "Language settings"
* 3. Change "Meeting spoken language" dropdown
* 4. Click "Update"
*
* Note: Changing spoken language affects ALL meeting participants.
*/
private async _setSpokenLanguage(): Promise<void> {
// Map BCP-47 codes to Teams display names for the spoken language dropdown
const languageDisplayNames: Record<string, string[]> = {
'de-DE': ['German (Germany)', 'Deutsch (Deutschland)', 'German'],
'de-CH': ['German (Switzerland)', 'Deutsch (Schweiz)', 'German'],
'en-US': ['English (United States)', 'English (US)', 'English'],
'en-GB': ['English (United Kingdom)', 'English (UK)'],
'fr-FR': ['French (France)', 'Français (France)', 'French'],
'fr-CH': ['French (Switzerland)', 'Français (Suisse)'],
'it-IT': ['Italian (Italy)', 'Italiano (Italia)', 'Italian'],
};
const targetNames = languageDisplayNames[this._language] || [this._language];
this._logger.info(`Setting spoken language to: ${this._language} (looking for: ${targetNames.join(', ')})`);
try {
// Wait a moment for the captions UI to stabilize
await this._page.waitForTimeout(2000);
// Strategy 1: Try "Caption settings" button near the captions area
// This is typically a gear icon or "..." button in the captions banner
const captionSettingsSelectors = [
'button[aria-label*="Caption settings"]',
'button[aria-label*="caption settings"]',
'button[aria-label*="Captions settings"]',
'button[data-tid="caption-settings-button"]',
'button[id="caption-settings-button"]',
];
let settingsOpened = false;
for (const selector of captionSettingsSelectors) {
try {
const button = await this._page.$(selector);
if (button) {
await button.click();
this._logger.info(`Clicked caption settings: ${selector}`);
settingsOpened = true;
await this._page.waitForTimeout(1000);
break;
}
} catch {
// Continue
}
}
// Strategy 2: If no caption settings button found, try More menu > Language and speech
if (!settingsOpened) {
this._logger.info('Caption settings button not found, trying More menu > Language and speech...');
await this._openMoreMenu();
await this._page.waitForTimeout(500);
// Look for "Language and speech" or "Spoken language" menu item
const languageMenuSelectors = [
':has-text("Language and speech")',
':has-text("Spoken language")',
':has-text("Sprache und Spracheingabe")',
'[data-tid="language-and-speech-button"]',
'button:has-text("Language")',
];
for (const selector of languageMenuSelectors) {
try {
const item = await this._page.$(selector);
if (item) {
await item.click();
this._logger.info(`Clicked language menu: ${selector}`);
settingsOpened = true;
await this._page.waitForTimeout(1000);
break;
}
} catch {
// Continue
}
}
}
if (!settingsOpened) {
this._logger.warn('Could not open language settings - captions will use default language (English)');
return;
}
// Now look for the "Language settings" sub-option if needed
const langSettingsSelectors = [
':has-text("Language settings")',
':has-text("Spracheinstellungen")',
'button:has-text("Language settings")',
];
for (const selector of langSettingsSelectors) {
try {
const item = await this._page.$(selector);
if (item) {
await item.click();
this._logger.info(`Clicked language settings: ${selector}`);
await this._page.waitForTimeout(1000);
break;
}
} catch {
// Continue - might already be on the language settings page
}
}
// Look for the spoken language dropdown
const dropdownSelectors = [
'select[aria-label*="spoken language" i]',
'select[aria-label*="Meeting spoken language" i]',
'[data-tid="spoken-language-dropdown"]',
'div[role="listbox"]',
'select', // Generic fallback
];
for (const selector of dropdownSelectors) {
try {
const dropdown = await this._page.$(selector);
if (dropdown) {
const tagName = await dropdown.evaluate(el => el.tagName.toLowerCase());
if (tagName === 'select') {
// Native select element - try to select by text
for (const name of targetNames) {
try {
await this._page.selectOption(selector, { label: name });
this._logger.info(`Selected spoken language: ${name}`);
break;
} catch {
// Try next name variant
}
}
} else {
// Fluent UI dropdown - click and select from options
await dropdown.click();
await this._page.waitForTimeout(500);
for (const name of targetNames) {
try {
const option = await this._page.$(`[role="option"]:has-text("${name}")`);
if (option) {
await option.click();
this._logger.info(`Selected spoken language: ${name}`);
break;
}
} catch {
// Try next name variant
}
}
}
break;
}
} catch {
// Continue
}
}
// Click "Update" or "Apply" button
const updateSelectors = [
'button:has-text("Update")',
'button:has-text("Apply")',
'button:has-text("Aktualisieren")',
'button:has-text("Übernehmen")',
'button[data-tid="language-update-button"]',
];
for (const selector of updateSelectors) {
try {
const button = await this._page.$(selector);
if (button) {
await button.click();
this._logger.info(`Clicked update button: ${selector}`);
await this._page.waitForTimeout(1000);
break;
}
} catch {
// Continue
}
}
// Close any open dialogs/menus
await this._page.keyboard.press('Escape');
this._logger.info('Spoken language setting attempt completed');
} catch (error) {
this._logger.warn(`Could not set spoken language to ${this._language}: ${error}`);
// Not fatal - captions will still work, just in the wrong language
}
}
/**
* Start watching the captions DOM for updates using Recall.ai's approach.
*
@ -278,6 +485,8 @@ export class CaptionsProcedure {
this._lastCaptionText = caption.text;
this._logger.info(`Caption: [${caption.speaker}] ${caption.text}`);
this._onTranscript({
speaker: caption.speaker,
text: caption.text,

View file

@ -22,6 +22,7 @@ export interface OrchestratorCallbacks {
export interface OrchestratorOptions {
gatewayWsUrl: string;
instanceId: string;
language?: string;
}
/**
@ -326,12 +327,17 @@ export class BotOrchestrator {
// Initialize procedures
this._joinProcedure = new JoinProcedure(this._page, this._logger, this._botName);
this._captionsProcedure = new CaptionsProcedure(this._page, this._logger, (entry) => {
this._captionsProcedure = new CaptionsProcedure(
this._page,
this._logger,
(entry) => {
// Send transcript to Gateway
this._sendTranscript(entry.speaker, entry.text, entry.isFinal);
// Also notify local callbacks
this._callbacks.onTranscript(entry);
});
},
this._options.language
);
this._audioProcedure = new AudioProcedure(this._page, this._logger);
// Handle page errors

View file

@ -19,8 +19,8 @@ async function main(): Promise<void> {
// Start HTTP server
httpServer = new HttpServer({
onJoinRequest: async (sessionId, meetingUrl, botName, instanceId, gatewayWsUrl) => {
await sessionManager.createSession(sessionId, meetingUrl, botName, instanceId, gatewayWsUrl);
onJoinRequest: async (sessionId, meetingUrl, botName, instanceId, gatewayWsUrl, language) => {
await sessionManager.createSession(sessionId, meetingUrl, botName, instanceId, gatewayWsUrl, language);
},
onLeaveRequest: async (sessionId) => {
await sessionManager.endSession(sessionId);

View file

@ -4,7 +4,7 @@ import { logger } from '../utils/logger';
import { config } from '../config';
export interface HttpServerCallbacks {
onJoinRequest: (sessionId: string, meetingUrl: string, botName?: string, instanceId?: string, gatewayWsUrl?: string) => Promise<void>;
onJoinRequest: (sessionId: string, meetingUrl: string, botName?: string, instanceId?: string, gatewayWsUrl?: string, language?: string) => Promise<void>;
onLeaveRequest: (sessionId: string) => Promise<void>;
onStatusRequest: (sessionId: string) => { state: string; error?: string } | null;
}
@ -77,14 +77,14 @@ export class HttpServer {
// Deploy a new bot
this._app.post('/api/bot', async (req: Request, res: Response) => {
try {
const { sessionId, meetingUrl, botName, instanceId, gatewayWsUrl } = req.body;
const { sessionId, meetingUrl, botName, instanceId, gatewayWsUrl, language } = req.body;
if (!sessionId || !meetingUrl) {
res.status(400).json({ error: 'Missing required fields: sessionId, meetingUrl' });
return;
}
await this._callbacks.onJoinRequest(sessionId, meetingUrl, botName, instanceId, gatewayWsUrl);
await this._callbacks.onJoinRequest(sessionId, meetingUrl, botName, instanceId, gatewayWsUrl, language);
res.json({
success: true,

View file

@ -30,13 +30,15 @@ export class SessionManager {
* @param botName - Display name for the bot
* @param instanceId - Feature instance ID (for Gateway routing)
* @param gatewayWsUrl - Full WebSocket URL to connect back to Gateway (supports multi-instance)
* @param language - BCP-47 language code for captions spoken language (e.g. "de-DE")
*/
async createSession(
sessionId: string,
meetingUrl: string,
botName?: string,
instanceId?: string,
gatewayWsUrl?: string
gatewayWsUrl?: string,
language?: string
): Promise<void> {
if (this._sessions.has(sessionId)) {
logger.warn(`Session ${sessionId} already exists`);
@ -64,6 +66,7 @@ export class SessionManager {
const options: OrchestratorOptions = {
gatewayWsUrl: gatewayWsUrl || config.gatewayWsUrl,
instanceId: instanceId || 'default',
language: language,
};
const orchestrator = new BotOrchestrator(