From 6e712858dc1ed8611751ebc4798bdc40c55e7152 Mon Sep 17 00:00:00 2001
From: ValueOn AG
Date: Tue, 17 Feb 2026 22:00:36 +0100
Subject: [PATCH] fix: use Record and transcribe > Start transcription flow
with language dialog for auth Teams
Co-authored-by: Cursor
---
src/bot/captionsProcedure.ts | 295 ++++++++++++++++++++++++++---------
1 file changed, 223 insertions(+), 72 deletions(-)
diff --git a/src/bot/captionsProcedure.ts b/src/bot/captionsProcedure.ts
index 8a10ad5..f3f55f1 100644
--- a/src/bot/captionsProcedure.ts
+++ b/src/bot/captionsProcedure.ts
@@ -39,21 +39,26 @@ export class CaptionsProcedure {
* Opens the "More" menu and clicks the captions button, then sets the spoken language.
*/
async enableCaptionsFlow(): Promise {
- this._logger.info('Enabling live captions...');
+ this._logger.info('Enabling captions/transcription...');
// First, open the "More actions" menu
await this._openMoreMenu();
- // Then click on the captions button
+ // Then click on the captions/transcription button
await this._clickEnableCaptions();
- // Wait for the captions container to appear
+ // Handle language dialog (appears after "Start transcription" in authenticated Teams)
+ const dialogHandled = await this._handleLanguageDialog();
+
+ // Wait for the captions/transcript container to appear
await this._waitForCaptionsContainer();
- this._logger.info('Live captions enabled');
+ this._logger.info('Captions/transcription enabled');
- // Set the spoken language (Teams defaults to English for anonymous users)
- await this._setSpokenLanguage();
+ // Only try separate language setting if dialog wasn't already handled
+ if (!dialogHandled) {
+ await this._setSpokenLanguage();
+ }
}
/**
@@ -98,16 +103,20 @@ export class CaptionsProcedure {
}
/**
- * Click the captions button in the menu.
- * Handles two UI variants:
- * - Anonymous (light-meetings): direct div[id="closed-captions-button"]
- * - Authenticated (full Teams): submenu "Captions & transcripts" → "Turn on live captions"
+ * Enable captions or transcription from the "More" menu.
+ *
+ * Strategies in priority order:
+ * 1. Direct captions button (anonymous / light-meetings UI)
+ * 2. "Record and transcribe" → "Start transcription" (authenticated Teams 2025+)
+ * → triggers spoken-language-selection-dialog handled by _handleLanguageDialog()
+ * 3. "Captions & transcripts" submenu (older authenticated Teams)
+ * 4. "Language and speech" panel toggle (alternative path)
+ * 5. Generic text / DOM scan fallback
*/
private async _clickEnableCaptions(): Promise {
- // Log visible menu items for debugging
await this._logVisibleMenuItems();
- // Strategy 1: Direct captions button (anonymous/light-meetings UI)
+ // ── Strategy 1: Direct captions button (anonymous / light-meetings UI) ──
const directSelectors = [
'div[id="closed-captions-button"]',
'[data-tid="closed-captions-button"]',
@@ -128,15 +137,68 @@ export class CaptionsProcedure {
}
}
- // Strategy 2: Authenticated Teams UI — "Captions & transcripts" submenu first
+ // ── Strategy 2: "Record and transcribe" → "Start transcription" ──
+ // Authenticated Teams 2025+: More → Record and transcribe → Start transcription
+ // After clicking, a spoken-language-selection-dialog appears (handled later).
+ const recordMenuSelectors = [
+ '[data-tid="RecordingMenuControl-id"]',
+ 'div[role="menuitem"]:has-text("Record and transcribe")',
+ 'div[role="menuitem"]:has-text("Aufzeichnen und transkribieren")',
+ 'div[role="menuitem"]:has-text("Aufnehmen und transkribieren")',
+ ];
+
+ for (const selector of recordMenuSelectors) {
+ try {
+ const item = await this._page.$(selector);
+ if (item) {
+ await item.click();
+ this._logger.info(`Clicked "Record and transcribe": ${selector}`);
+ await this._page.waitForTimeout(1500);
+
+ // Log the submenu items
+ await this._logVisibleMenuItems();
+
+ // Click "Start transcription"
+ const transcriptionSelectors = [
+ '[role="menuitem"]:has-text("Start transcription")',
+ '[role="menuitem"]:has-text("Transkription starten")',
+ '[role="menuitem"]:has-text("transcription")',
+ '[role="menuitem"]:has-text("Transkription")',
+ 'button:has-text("Start transcription")',
+ 'button:has-text("Transkription starten")',
+ 'div:has-text("Start transcription")[role="menuitem"]',
+ ];
+
+ for (const transSel of transcriptionSelectors) {
+ try {
+ const transBtn = await this._page.$(transSel);
+ if (transBtn) {
+ await transBtn.click();
+ this._logger.info(`Clicked "Start transcription": ${transSel}`);
+ await this._page.waitForTimeout(2000);
+ return; // language dialog handled by _handleLanguageDialog()
+ }
+ } catch {
+ // Continue
+ }
+ }
+
+ this._logger.warn('"Record and transcribe" opened but "Start transcription" not found');
+ await this._page.keyboard.press('Escape');
+ break;
+ }
+ } catch {
+ // Continue
+ }
+ }
+
+ // ── Strategy 3: "Captions & transcripts" submenu (older Teams) ──
const submenuSelectors = [
'[data-tid="captions-and-transcripts-button"]',
'[role="menuitem"]:has-text("Captions & transcripts")',
'[role="menuitem"]:has-text("Captions and transcripts")',
'[role="menuitem"]:has-text("Untertitel und Transkripte")',
'[role="menuitem"]:has-text("Untertitel")',
- 'button:has-text("Captions & transcripts")',
- 'button:has-text("Captions and transcripts")',
];
for (const selector of submenuSelectors) {
@@ -147,12 +209,10 @@ export class CaptionsProcedure {
this._logger.info(`Clicked captions submenu: ${selector}`);
await this._page.waitForTimeout(1500);
- // Now look for "Turn on live captions" inside the submenu/panel
const enableSelectors = [
'button:has-text("Turn on live captions")',
'button:has-text("Live captions")',
'button:has-text("Live-Untertitel aktivieren")',
- 'button:has-text("Liveuntertitel")',
'[role="menuitem"]:has-text("Turn on live captions")',
'[role="menuitem"]:has-text("Live captions")',
'[role="menuitemcheckbox"]:has-text("captions")',
@@ -181,8 +241,7 @@ export class CaptionsProcedure {
}
}
- // Strategy 2b: "Language and speech" submenu (authenticated Teams 2025+)
- // In the new Teams, captions are under "Language and speech" → toggle inside panel
+ // ── Strategy 4: "Language and speech" panel toggle ──
const langSpeechSelectors = [
'[data-tid="LanguageSpeechMenuControl-id"]',
'div[role="menuitem"]:has-text("Language and speech")',
@@ -197,7 +256,6 @@ export class CaptionsProcedure {
this._logger.info(`Clicked "Language and speech": ${selector}`);
await this._page.waitForTimeout(2000);
- // Log panel toggles for debugging
const panelToggles = await this._page.evaluate(() => {
const switches = document.querySelectorAll(
'input[role="switch"], [role="switch"], input[type="checkbox"]'
@@ -212,7 +270,6 @@ export class CaptionsProcedure {
});
this._logger.info(`Panel toggles: ${JSON.stringify(panelToggles)}`);
- // Find and click the live captions toggle
const toggleResult = await this._page.evaluate(() => {
const switches = document.querySelectorAll(
'input[role="switch"], [role="switch"], input[type="checkbox"]'
@@ -234,31 +291,14 @@ export class CaptionsProcedure {
return { found: true, clicked: false, info: `already on: ${label || tid}` };
}
}
- // Fallback: any button/link mentioning captions
- const btns = document.querySelectorAll('button, [role="menuitem"], [role="option"], a');
- for (const btn of Array.from(btns)) {
- const text = ((btn as HTMLElement).textContent || '').toLowerCase();
- if (
- text.includes('turn on live caption') ||
- text.includes('liveuntertitel aktivieren') ||
- text.includes('liveuntertitel einschalten')
- ) {
- (btn as HTMLElement).click();
- return { found: true, clicked: true, info: text.substring(0, 60) };
- }
- }
return { found: false, clicked: false, info: '' };
});
this._logger.info(`Captions toggle result: ${JSON.stringify(toggleResult)}`);
-
if (toggleResult.found && toggleResult.clicked) {
await this._page.waitForTimeout(1500);
}
-
- // Close the panel
await this._page.keyboard.press('Escape');
-
if (toggleResult.found) return;
this._logger.warn('Language panel opened but no captions toggle found');
@@ -269,32 +309,9 @@ export class CaptionsProcedure {
}
}
- // Strategy 3: Generic text-based fallbacks
- const textFallbacks = [
- 'button:has-text("Turn on live captions")',
- 'button:has-text("Live captions")',
- 'button[aria-label*="captions" i]',
- '[role="menuitem"]:has-text("captions")',
- '[role="menuitemcheckbox"]:has-text("captions")',
- ];
-
- for (const selector of textFallbacks) {
- try {
- const button = await this._page.$(selector);
- if (button) {
- await button.click();
- this._logger.info(`Clicked captions (text fallback): ${selector}`);
- await this._page.waitForTimeout(1000);
- return;
- }
- } catch {
- // Continue
- }
- }
-
- // Strategy 4: DOM scan — find any element mentioning "caption" in the open menu
+ // ── Strategy 5: DOM scan for anything containing "caption" / "transcri" ──
const found = await this._page.evaluate(() => {
- const keywords = ['caption', 'captions', 'untertitel', 'live caption'];
+ const keywords = ['caption', 'captions', 'untertitel', 'live caption', 'transcri', 'transkri'];
const candidates = document.querySelectorAll(
'[role="menuitem"], [role="menuitemcheckbox"], [role="menuitemradio"], button, li, div[role="option"]'
);
@@ -312,22 +329,138 @@ export class CaptionsProcedure {
});
if (found.clicked) {
- this._logger.info(`Clicked captions via DOM scan: "${found.clicked}"`);
+ this._logger.info(`Clicked via DOM scan: "${found.clicked}"`);
await this._page.waitForTimeout(1500);
- // Check if this opened a submenu — look for "Turn on" or "enable" inside
const turnOnBtn = await this._page.$('button:has-text("Turn on"), [role="menuitem"]:has-text("Turn on")');
if (turnOnBtn) {
await turnOnBtn.click();
- this._logger.info('Clicked "Turn on" in captions submenu');
+ this._logger.info('Clicked "Turn on" in submenu');
await this._page.waitForTimeout(1000);
}
return;
}
- // Nothing found
await this._page.keyboard.press('Escape');
- this._logger.warn(`Could not find captions option. DOM scan matches: ${JSON.stringify(found.allMatches)}`);
+ this._logger.warn(`Could not find captions/transcription option. DOM scan: ${JSON.stringify(found.allMatches)}`);
+ }
+
+ /**
+ * Handle the "What language is everyone speaking?" dialog.
+ * This dialog appears after clicking "Start transcription" in authenticated Teams.
+ *
+ * DOM structure (from user-provided HTML):
+ * [data-tid="spoken-language-selection-dialog"] — alertdialog
+ * button[data-tid="callingCaptions-spokenLanguages"] — combobox (current lang)
+ * button[data-tid="calling_captions_change_language_dialog_confirm_button"]
+ * button[data-tid="calling_captions_change_language_dialog_cancel_button"]
+ */
+ private async _handleLanguageDialog(): Promise {
+ try {
+ const dialogSelector = '[data-tid="spoken-language-selection-dialog"]';
+ await this._page.waitForSelector(dialogSelector, { timeout: 8000 });
+ this._logger.info('Spoken language selection dialog appeared');
+
+ // Read current language from dropdown button
+ const dropdownSelector = 'button[data-tid="callingCaptions-spokenLanguages"]';
+ const dropdown = await this._page.$(dropdownSelector);
+
+ if (dropdown) {
+ const currentLang = await dropdown.evaluate(
+ (el) => el.textContent?.trim() || '',
+ );
+ this._logger.info(`Dialog current language: "${currentLang}"`);
+
+ // Check if the language is already correct
+ const targetNames = this._getLanguageDisplayNames();
+ const isCorrect = targetNames.some((name) =>
+ currentLang.toLowerCase().includes(name.toLowerCase()),
+ );
+
+ if (!isCorrect) {
+ this._logger.info(`Need to change language to: ${targetNames.join(', ')}`);
+
+ // Open dropdown
+ await dropdown.click();
+ await this._page.waitForTimeout(800);
+
+ // Select the correct option
+ let selected = false;
+ for (const name of targetNames) {
+ if (selected) break;
+ const optionSelectors = [
+ `[role="option"]:has-text("${name}")`,
+ `li:has-text("${name}")`,
+ `div[role="option"]:has-text("${name}")`,
+ ];
+ for (const optSel of optionSelectors) {
+ try {
+ const option = await this._page.$(optSel);
+ if (option) {
+ await option.click();
+ this._logger.info(`Selected language: ${name} (via ${optSel})`);
+ selected = true;
+ break;
+ }
+ } catch {
+ // Continue
+ }
+ }
+ }
+
+ if (!selected) {
+ this._logger.warn('Could not select language in dialog dropdown');
+ }
+ await this._page.waitForTimeout(500);
+ } else {
+ this._logger.info('Language already correct in dialog');
+ }
+ }
+
+ // Click "Confirm"
+ const confirmSelectors = [
+ 'button[data-tid="calling_captions_change_language_dialog_confirm_button"]',
+ 'button:has-text("Confirm")',
+ 'button:has-text("Bestätigen")',
+ ];
+
+ for (const sel of confirmSelectors) {
+ try {
+ const btn = await this._page.$(sel);
+ if (btn) {
+ await btn.click();
+ this._logger.info(`Clicked "Confirm" in language dialog: ${sel}`);
+ await this._page.waitForTimeout(1500);
+ return true;
+ }
+ } catch {
+ // Continue
+ }
+ }
+
+ this._logger.warn('Language dialog found but could not click Confirm');
+ return false;
+ } catch {
+ // No dialog appeared — using captions mode, not transcription
+ this._logger.info('No language selection dialog appeared (may be using captions mode)');
+ return false;
+ }
+ }
+
+ /**
+ * Get display names for the configured language (used in dropdown selection).
+ */
+ private _getLanguageDisplayNames(): string[] {
+ const languageDisplayNames: Record = {
+ 'de-DE': ['German (Germany)', 'Deutsch (Deutschland)', 'German'],
+ 'de-CH': ['German (Switzerland)', 'Deutsch (Schweiz)', 'German'],
+ 'en-US': ['English (United States)', 'English (US)', 'English'],
+ 'en-GB': ['English (United Kingdom)', 'English (UK)'],
+ 'fr-FR': ['French (France)', 'Français (France)', 'French'],
+ 'fr-CH': ['French (Switzerland)', 'Français (Suisse)'],
+ 'it-IT': ['Italian (Italy)', 'Italiano (Italia)', 'Italian'],
+ };
+ return languageDisplayNames[this._language] || [this._language];
}
/**
@@ -360,19 +493,35 @@ export class CaptionsProcedure {
'div[data-tid="closed-caption-renderer-wrapper"]',
'div[data-tid="live-captions-renderer"]',
'[data-tid="caption-area"]',
+ // Transcript panel (authenticated Teams "Record and transcribe" flow)
+ '[data-tid="transcript-pane"]',
+ '[data-tid="transcript-view"]',
+ '[data-tid*="transcript"]',
];
for (const selector of containerSelectors) {
try {
- await this._page.waitForSelector(selector, { timeout: 15000 });
- this._logger.info(`Found captions container: ${selector}`);
+ await this._page.waitForSelector(selector, { timeout: 10000 });
+ this._logger.info(`Found captions/transcript container: ${selector}`);
return;
} catch {
// Try next
}
}
- this._logger.warn('Could not find captions container - captions may not have enabled or may use a different selector');
+ // Log visible data-tid elements for debugging
+ const tids = await this._page.evaluate(() => {
+ const els = document.querySelectorAll('[data-tid]');
+ return Array.from(els)
+ .map(e => e.getAttribute('data-tid') || '')
+ .filter(t => t.includes('caption') || t.includes('transcript') || t.includes('subtitle'))
+ .slice(0, 10);
+ });
+ if (tids.length > 0) {
+ this._logger.info(`Related data-tid elements found: ${JSON.stringify(tids)}`);
+ }
+
+ this._logger.warn('Could not find captions/transcript container with known selectors');
}
/**
@@ -823,11 +972,13 @@ export class CaptionsProcedure {
}
}
- // Try specific caption container selectors first
+ // Try specific caption/transcript container selectors first
const containerSelectors = [
'div[data-tid="closed-caption-renderer-wrapper"]',
'div[data-tid="live-captions-renderer"]',
'[data-tid="caption-area"]',
+ '[data-tid="transcript-pane"]',
+ '[data-tid="transcript-view"]',
];
let targetNode: Element | null = null;