feat: Add spoken language config for captions + caption event logging
- Accept language param from gateway (de-DE, en-US, etc.) in join request - Pass language through: httpServer -> sessionManager -> orchestrator -> captionsProcedure - After enabling captions, attempt to change Teams spoken language via UI (Caption settings > Language settings > spoken language dropdown) - Add per-caption log line for debugging Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
parent
36bf5269ac
commit
d8c0331921
5 changed files with 232 additions and 14 deletions
|
|
@ -20,20 +20,23 @@ export class CaptionsProcedure {
|
|||
private _onTranscript: (entry: TranscriptEntry) => void;
|
||||
private _isSubscribed: boolean = false;
|
||||
private _lastCaptionText: string = '';
|
||||
private _language: string;
|
||||
|
||||
constructor(
|
||||
page: Page,
|
||||
logger: Logger,
|
||||
onTranscript: (entry: TranscriptEntry) => void
|
||||
onTranscript: (entry: TranscriptEntry) => void,
|
||||
language?: string
|
||||
) {
|
||||
this._page = page;
|
||||
this._logger = logger;
|
||||
this._onTranscript = onTranscript;
|
||||
this._language = language || 'de-DE';
|
||||
}
|
||||
|
||||
/**
|
||||
* Enable live captions in the meeting.
|
||||
* Opens the "More" menu and clicks the captions button.
|
||||
* Opens the "More" menu and clicks the captions button, then sets the spoken language.
|
||||
*/
|
||||
async enableCaptionsFlow(): Promise<void> {
|
||||
this._logger.info('Enabling live captions...');
|
||||
|
|
@ -48,6 +51,9 @@ export class CaptionsProcedure {
|
|||
await this._waitForCaptionsContainer();
|
||||
|
||||
this._logger.info('Live captions enabled');
|
||||
|
||||
// Set the spoken language (Teams defaults to English for anonymous users)
|
||||
await this._setSpokenLanguage();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -155,6 +161,207 @@ export class CaptionsProcedure {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the spoken language for captions.
|
||||
*
|
||||
* Teams defaults to English for anonymous users. This method attempts to
|
||||
* change the "Meeting spoken language" to the configured language (e.g. "de-DE").
|
||||
*
|
||||
* Flow (per Microsoft docs):
|
||||
* 1. Click "Caption settings" (gear/settings icon near captions area)
|
||||
* 2. Click "Language settings"
|
||||
* 3. Change "Meeting spoken language" dropdown
|
||||
* 4. Click "Update"
|
||||
*
|
||||
* Note: Changing spoken language affects ALL meeting participants.
|
||||
*/
|
||||
private async _setSpokenLanguage(): Promise<void> {
|
||||
// Map BCP-47 codes to Teams display names for the spoken language dropdown
|
||||
const languageDisplayNames: Record<string, string[]> = {
|
||||
'de-DE': ['German (Germany)', 'Deutsch (Deutschland)', 'German'],
|
||||
'de-CH': ['German (Switzerland)', 'Deutsch (Schweiz)', 'German'],
|
||||
'en-US': ['English (United States)', 'English (US)', 'English'],
|
||||
'en-GB': ['English (United Kingdom)', 'English (UK)'],
|
||||
'fr-FR': ['French (France)', 'Français (France)', 'French'],
|
||||
'fr-CH': ['French (Switzerland)', 'Français (Suisse)'],
|
||||
'it-IT': ['Italian (Italy)', 'Italiano (Italia)', 'Italian'],
|
||||
};
|
||||
|
||||
const targetNames = languageDisplayNames[this._language] || [this._language];
|
||||
this._logger.info(`Setting spoken language to: ${this._language} (looking for: ${targetNames.join(', ')})`);
|
||||
|
||||
try {
|
||||
// Wait a moment for the captions UI to stabilize
|
||||
await this._page.waitForTimeout(2000);
|
||||
|
||||
// Strategy 1: Try "Caption settings" button near the captions area
|
||||
// This is typically a gear icon or "..." button in the captions banner
|
||||
const captionSettingsSelectors = [
|
||||
'button[aria-label*="Caption settings"]',
|
||||
'button[aria-label*="caption settings"]',
|
||||
'button[aria-label*="Captions settings"]',
|
||||
'button[data-tid="caption-settings-button"]',
|
||||
'button[id="caption-settings-button"]',
|
||||
];
|
||||
|
||||
let settingsOpened = false;
|
||||
for (const selector of captionSettingsSelectors) {
|
||||
try {
|
||||
const button = await this._page.$(selector);
|
||||
if (button) {
|
||||
await button.click();
|
||||
this._logger.info(`Clicked caption settings: ${selector}`);
|
||||
settingsOpened = true;
|
||||
await this._page.waitForTimeout(1000);
|
||||
break;
|
||||
}
|
||||
} catch {
|
||||
// Continue
|
||||
}
|
||||
}
|
||||
|
||||
// Strategy 2: If no caption settings button found, try More menu > Language and speech
|
||||
if (!settingsOpened) {
|
||||
this._logger.info('Caption settings button not found, trying More menu > Language and speech...');
|
||||
|
||||
await this._openMoreMenu();
|
||||
await this._page.waitForTimeout(500);
|
||||
|
||||
// Look for "Language and speech" or "Spoken language" menu item
|
||||
const languageMenuSelectors = [
|
||||
':has-text("Language and speech")',
|
||||
':has-text("Spoken language")',
|
||||
':has-text("Sprache und Spracheingabe")',
|
||||
'[data-tid="language-and-speech-button"]',
|
||||
'button:has-text("Language")',
|
||||
];
|
||||
|
||||
for (const selector of languageMenuSelectors) {
|
||||
try {
|
||||
const item = await this._page.$(selector);
|
||||
if (item) {
|
||||
await item.click();
|
||||
this._logger.info(`Clicked language menu: ${selector}`);
|
||||
settingsOpened = true;
|
||||
await this._page.waitForTimeout(1000);
|
||||
break;
|
||||
}
|
||||
} catch {
|
||||
// Continue
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!settingsOpened) {
|
||||
this._logger.warn('Could not open language settings - captions will use default language (English)');
|
||||
return;
|
||||
}
|
||||
|
||||
// Now look for the "Language settings" sub-option if needed
|
||||
const langSettingsSelectors = [
|
||||
':has-text("Language settings")',
|
||||
':has-text("Spracheinstellungen")',
|
||||
'button:has-text("Language settings")',
|
||||
];
|
||||
|
||||
for (const selector of langSettingsSelectors) {
|
||||
try {
|
||||
const item = await this._page.$(selector);
|
||||
if (item) {
|
||||
await item.click();
|
||||
this._logger.info(`Clicked language settings: ${selector}`);
|
||||
await this._page.waitForTimeout(1000);
|
||||
break;
|
||||
}
|
||||
} catch {
|
||||
// Continue - might already be on the language settings page
|
||||
}
|
||||
}
|
||||
|
||||
// Look for the spoken language dropdown
|
||||
const dropdownSelectors = [
|
||||
'select[aria-label*="spoken language" i]',
|
||||
'select[aria-label*="Meeting spoken language" i]',
|
||||
'[data-tid="spoken-language-dropdown"]',
|
||||
'div[role="listbox"]',
|
||||
'select', // Generic fallback
|
||||
];
|
||||
|
||||
for (const selector of dropdownSelectors) {
|
||||
try {
|
||||
const dropdown = await this._page.$(selector);
|
||||
if (dropdown) {
|
||||
const tagName = await dropdown.evaluate(el => el.tagName.toLowerCase());
|
||||
|
||||
if (tagName === 'select') {
|
||||
// Native select element - try to select by text
|
||||
for (const name of targetNames) {
|
||||
try {
|
||||
await this._page.selectOption(selector, { label: name });
|
||||
this._logger.info(`Selected spoken language: ${name}`);
|
||||
break;
|
||||
} catch {
|
||||
// Try next name variant
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Fluent UI dropdown - click and select from options
|
||||
await dropdown.click();
|
||||
await this._page.waitForTimeout(500);
|
||||
|
||||
for (const name of targetNames) {
|
||||
try {
|
||||
const option = await this._page.$(`[role="option"]:has-text("${name}")`);
|
||||
if (option) {
|
||||
await option.click();
|
||||
this._logger.info(`Selected spoken language: ${name}`);
|
||||
break;
|
||||
}
|
||||
} catch {
|
||||
// Try next name variant
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
} catch {
|
||||
// Continue
|
||||
}
|
||||
}
|
||||
|
||||
// Click "Update" or "Apply" button
|
||||
const updateSelectors = [
|
||||
'button:has-text("Update")',
|
||||
'button:has-text("Apply")',
|
||||
'button:has-text("Aktualisieren")',
|
||||
'button:has-text("Übernehmen")',
|
||||
'button[data-tid="language-update-button"]',
|
||||
];
|
||||
|
||||
for (const selector of updateSelectors) {
|
||||
try {
|
||||
const button = await this._page.$(selector);
|
||||
if (button) {
|
||||
await button.click();
|
||||
this._logger.info(`Clicked update button: ${selector}`);
|
||||
await this._page.waitForTimeout(1000);
|
||||
break;
|
||||
}
|
||||
} catch {
|
||||
// Continue
|
||||
}
|
||||
}
|
||||
|
||||
// Close any open dialogs/menus
|
||||
await this._page.keyboard.press('Escape');
|
||||
this._logger.info('Spoken language setting attempt completed');
|
||||
|
||||
} catch (error) {
|
||||
this._logger.warn(`Could not set spoken language to ${this._language}: ${error}`);
|
||||
// Not fatal - captions will still work, just in the wrong language
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Start watching the captions DOM for updates using Recall.ai's approach.
|
||||
*
|
||||
|
|
@ -278,6 +485,8 @@ export class CaptionsProcedure {
|
|||
|
||||
this._lastCaptionText = caption.text;
|
||||
|
||||
this._logger.info(`Caption: [${caption.speaker}] ${caption.text}`);
|
||||
|
||||
this._onTranscript({
|
||||
speaker: caption.speaker,
|
||||
text: caption.text,
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@ export interface OrchestratorCallbacks {
|
|||
export interface OrchestratorOptions {
|
||||
gatewayWsUrl: string;
|
||||
instanceId: string;
|
||||
language?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -326,12 +327,17 @@ export class BotOrchestrator {
|
|||
|
||||
// Initialize procedures
|
||||
this._joinProcedure = new JoinProcedure(this._page, this._logger, this._botName);
|
||||
this._captionsProcedure = new CaptionsProcedure(this._page, this._logger, (entry) => {
|
||||
this._captionsProcedure = new CaptionsProcedure(
|
||||
this._page,
|
||||
this._logger,
|
||||
(entry) => {
|
||||
// Send transcript to Gateway
|
||||
this._sendTranscript(entry.speaker, entry.text, entry.isFinal);
|
||||
// Also notify local callbacks
|
||||
this._callbacks.onTranscript(entry);
|
||||
});
|
||||
},
|
||||
this._options.language
|
||||
);
|
||||
this._audioProcedure = new AudioProcedure(this._page, this._logger);
|
||||
|
||||
// Handle page errors
|
||||
|
|
|
|||
|
|
@ -19,8 +19,8 @@ async function main(): Promise<void> {
|
|||
|
||||
// Start HTTP server
|
||||
httpServer = new HttpServer({
|
||||
onJoinRequest: async (sessionId, meetingUrl, botName, instanceId, gatewayWsUrl) => {
|
||||
await sessionManager.createSession(sessionId, meetingUrl, botName, instanceId, gatewayWsUrl);
|
||||
onJoinRequest: async (sessionId, meetingUrl, botName, instanceId, gatewayWsUrl, language) => {
|
||||
await sessionManager.createSession(sessionId, meetingUrl, botName, instanceId, gatewayWsUrl, language);
|
||||
},
|
||||
onLeaveRequest: async (sessionId) => {
|
||||
await sessionManager.endSession(sessionId);
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ import { logger } from '../utils/logger';
|
|||
import { config } from '../config';
|
||||
|
||||
export interface HttpServerCallbacks {
|
||||
onJoinRequest: (sessionId: string, meetingUrl: string, botName?: string, instanceId?: string, gatewayWsUrl?: string) => Promise<void>;
|
||||
onJoinRequest: (sessionId: string, meetingUrl: string, botName?: string, instanceId?: string, gatewayWsUrl?: string, language?: string) => Promise<void>;
|
||||
onLeaveRequest: (sessionId: string) => Promise<void>;
|
||||
onStatusRequest: (sessionId: string) => { state: string; error?: string } | null;
|
||||
}
|
||||
|
|
@ -77,14 +77,14 @@ export class HttpServer {
|
|||
// Deploy a new bot
|
||||
this._app.post('/api/bot', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const { sessionId, meetingUrl, botName, instanceId, gatewayWsUrl } = req.body;
|
||||
const { sessionId, meetingUrl, botName, instanceId, gatewayWsUrl, language } = req.body;
|
||||
|
||||
if (!sessionId || !meetingUrl) {
|
||||
res.status(400).json({ error: 'Missing required fields: sessionId, meetingUrl' });
|
||||
return;
|
||||
}
|
||||
|
||||
await this._callbacks.onJoinRequest(sessionId, meetingUrl, botName, instanceId, gatewayWsUrl);
|
||||
await this._callbacks.onJoinRequest(sessionId, meetingUrl, botName, instanceId, gatewayWsUrl, language);
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
|
|
|
|||
|
|
@ -30,13 +30,15 @@ export class SessionManager {
|
|||
* @param botName - Display name for the bot
|
||||
* @param instanceId - Feature instance ID (for Gateway routing)
|
||||
* @param gatewayWsUrl - Full WebSocket URL to connect back to Gateway (supports multi-instance)
|
||||
* @param language - BCP-47 language code for captions spoken language (e.g. "de-DE")
|
||||
*/
|
||||
async createSession(
|
||||
sessionId: string,
|
||||
meetingUrl: string,
|
||||
botName?: string,
|
||||
instanceId?: string,
|
||||
gatewayWsUrl?: string
|
||||
gatewayWsUrl?: string,
|
||||
language?: string
|
||||
): Promise<void> {
|
||||
if (this._sessions.has(sessionId)) {
|
||||
logger.warn(`Session ${sessionId} already exists`);
|
||||
|
|
@ -64,6 +66,7 @@ export class SessionManager {
|
|||
const options: OrchestratorOptions = {
|
||||
gatewayWsUrl: gatewayWsUrl || config.gatewayWsUrl,
|
||||
instanceId: instanceId || 'default',
|
||||
language: language,
|
||||
};
|
||||
|
||||
const orchestrator = new BotOrchestrator(
|
||||
|
|
|
|||
Loading…
Reference in a new issue