fix: use Record and transcribe > Start transcription flow with language dialog for auth Teams

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
ValueOn AG 2026-02-17 22:00:36 +01:00
parent 252775a4b4
commit 6e712858dc

View file

@ -39,21 +39,26 @@ export class CaptionsProcedure {
* Opens the "More" menu and clicks the captions button, then sets the spoken language.
*/
async enableCaptionsFlow(): Promise<void> {
this._logger.info('Enabling live captions...');
this._logger.info('Enabling captions/transcription...');
// First, open the "More actions" menu
await this._openMoreMenu();
// Then click on the captions button
// Then click on the captions/transcription button
await this._clickEnableCaptions();
// Wait for the captions container to appear
// Handle language dialog (appears after "Start transcription" in authenticated Teams)
const dialogHandled = await this._handleLanguageDialog();
// Wait for the captions/transcript container to appear
await this._waitForCaptionsContainer();
this._logger.info('Live captions enabled');
this._logger.info('Captions/transcription enabled');
// Set the spoken language (Teams defaults to English for anonymous users)
await this._setSpokenLanguage();
// Only try separate language setting if dialog wasn't already handled
if (!dialogHandled) {
await this._setSpokenLanguage();
}
}
/**
@ -98,16 +103,20 @@ export class CaptionsProcedure {
}
/**
* Click the captions button in the menu.
* Handles two UI variants:
* - Anonymous (light-meetings): direct div[id="closed-captions-button"]
* - Authenticated (full Teams): submenu "Captions & transcripts" "Turn on live captions"
* Enable captions or transcription from the "More" menu.
*
* Strategies in priority order:
* 1. Direct captions button (anonymous / light-meetings UI)
* 2. "Record and transcribe" "Start transcription" (authenticated Teams 2025+)
* triggers spoken-language-selection-dialog handled by _handleLanguageDialog()
* 3. "Captions & transcripts" submenu (older authenticated Teams)
* 4. "Language and speech" panel toggle (alternative path)
* 5. Generic text / DOM scan fallback
*/
private async _clickEnableCaptions(): Promise<void> {
// Log visible menu items for debugging
await this._logVisibleMenuItems();
// Strategy 1: Direct captions button (anonymous/light-meetings UI)
// ── Strategy 1: Direct captions button (anonymous / light-meetings UI) ──
const directSelectors = [
'div[id="closed-captions-button"]',
'[data-tid="closed-captions-button"]',
@ -128,15 +137,68 @@ export class CaptionsProcedure {
}
}
// Strategy 2: Authenticated Teams UI — "Captions & transcripts" submenu first
// ── Strategy 2: "Record and transcribe" → "Start transcription" ──
// Authenticated Teams 2025+: More → Record and transcribe → Start transcription
// After clicking, a spoken-language-selection-dialog appears (handled later).
const recordMenuSelectors = [
'[data-tid="RecordingMenuControl-id"]',
'div[role="menuitem"]:has-text("Record and transcribe")',
'div[role="menuitem"]:has-text("Aufzeichnen und transkribieren")',
'div[role="menuitem"]:has-text("Aufnehmen und transkribieren")',
];
for (const selector of recordMenuSelectors) {
try {
const item = await this._page.$(selector);
if (item) {
await item.click();
this._logger.info(`Clicked "Record and transcribe": ${selector}`);
await this._page.waitForTimeout(1500);
// Log the submenu items
await this._logVisibleMenuItems();
// Click "Start transcription"
const transcriptionSelectors = [
'[role="menuitem"]:has-text("Start transcription")',
'[role="menuitem"]:has-text("Transkription starten")',
'[role="menuitem"]:has-text("transcription")',
'[role="menuitem"]:has-text("Transkription")',
'button:has-text("Start transcription")',
'button:has-text("Transkription starten")',
'div:has-text("Start transcription")[role="menuitem"]',
];
for (const transSel of transcriptionSelectors) {
try {
const transBtn = await this._page.$(transSel);
if (transBtn) {
await transBtn.click();
this._logger.info(`Clicked "Start transcription": ${transSel}`);
await this._page.waitForTimeout(2000);
return; // language dialog handled by _handleLanguageDialog()
}
} catch {
// Continue
}
}
this._logger.warn('"Record and transcribe" opened but "Start transcription" not found');
await this._page.keyboard.press('Escape');
break;
}
} catch {
// Continue
}
}
// ── Strategy 3: "Captions & transcripts" submenu (older Teams) ──
const submenuSelectors = [
'[data-tid="captions-and-transcripts-button"]',
'[role="menuitem"]:has-text("Captions & transcripts")',
'[role="menuitem"]:has-text("Captions and transcripts")',
'[role="menuitem"]:has-text("Untertitel und Transkripte")',
'[role="menuitem"]:has-text("Untertitel")',
'button:has-text("Captions & transcripts")',
'button:has-text("Captions and transcripts")',
];
for (const selector of submenuSelectors) {
@ -147,12 +209,10 @@ export class CaptionsProcedure {
this._logger.info(`Clicked captions submenu: ${selector}`);
await this._page.waitForTimeout(1500);
// Now look for "Turn on live captions" inside the submenu/panel
const enableSelectors = [
'button:has-text("Turn on live captions")',
'button:has-text("Live captions")',
'button:has-text("Live-Untertitel aktivieren")',
'button:has-text("Liveuntertitel")',
'[role="menuitem"]:has-text("Turn on live captions")',
'[role="menuitem"]:has-text("Live captions")',
'[role="menuitemcheckbox"]:has-text("captions")',
@ -181,8 +241,7 @@ export class CaptionsProcedure {
}
}
// Strategy 2b: "Language and speech" submenu (authenticated Teams 2025+)
// In the new Teams, captions are under "Language and speech" → toggle inside panel
// ── Strategy 4: "Language and speech" panel toggle ──
const langSpeechSelectors = [
'[data-tid="LanguageSpeechMenuControl-id"]',
'div[role="menuitem"]:has-text("Language and speech")',
@ -197,7 +256,6 @@ export class CaptionsProcedure {
this._logger.info(`Clicked "Language and speech": ${selector}`);
await this._page.waitForTimeout(2000);
// Log panel toggles for debugging
const panelToggles = await this._page.evaluate(() => {
const switches = document.querySelectorAll(
'input[role="switch"], [role="switch"], input[type="checkbox"]'
@ -212,7 +270,6 @@ export class CaptionsProcedure {
});
this._logger.info(`Panel toggles: ${JSON.stringify(panelToggles)}`);
// Find and click the live captions toggle
const toggleResult = await this._page.evaluate(() => {
const switches = document.querySelectorAll(
'input[role="switch"], [role="switch"], input[type="checkbox"]'
@ -234,31 +291,14 @@ export class CaptionsProcedure {
return { found: true, clicked: false, info: `already on: ${label || tid}` };
}
}
// Fallback: any button/link mentioning captions
const btns = document.querySelectorAll('button, [role="menuitem"], [role="option"], a');
for (const btn of Array.from(btns)) {
const text = ((btn as HTMLElement).textContent || '').toLowerCase();
if (
text.includes('turn on live caption') ||
text.includes('liveuntertitel aktivieren') ||
text.includes('liveuntertitel einschalten')
) {
(btn as HTMLElement).click();
return { found: true, clicked: true, info: text.substring(0, 60) };
}
}
return { found: false, clicked: false, info: '' };
});
this._logger.info(`Captions toggle result: ${JSON.stringify(toggleResult)}`);
if (toggleResult.found && toggleResult.clicked) {
await this._page.waitForTimeout(1500);
}
// Close the panel
await this._page.keyboard.press('Escape');
if (toggleResult.found) return;
this._logger.warn('Language panel opened but no captions toggle found');
@ -269,32 +309,9 @@ export class CaptionsProcedure {
}
}
// Strategy 3: Generic text-based fallbacks
const textFallbacks = [
'button:has-text("Turn on live captions")',
'button:has-text("Live captions")',
'button[aria-label*="captions" i]',
'[role="menuitem"]:has-text("captions")',
'[role="menuitemcheckbox"]:has-text("captions")',
];
for (const selector of textFallbacks) {
try {
const button = await this._page.$(selector);
if (button) {
await button.click();
this._logger.info(`Clicked captions (text fallback): ${selector}`);
await this._page.waitForTimeout(1000);
return;
}
} catch {
// Continue
}
}
// Strategy 4: DOM scan — find any element mentioning "caption" in the open menu
// ── Strategy 5: DOM scan for anything containing "caption" / "transcri" ──
const found = await this._page.evaluate(() => {
const keywords = ['caption', 'captions', 'untertitel', 'live caption'];
const keywords = ['caption', 'captions', 'untertitel', 'live caption', 'transcri', 'transkri'];
const candidates = document.querySelectorAll(
'[role="menuitem"], [role="menuitemcheckbox"], [role="menuitemradio"], button, li, div[role="option"]'
);
@ -312,22 +329,138 @@ export class CaptionsProcedure {
});
if (found.clicked) {
this._logger.info(`Clicked captions via DOM scan: "${found.clicked}"`);
this._logger.info(`Clicked via DOM scan: "${found.clicked}"`);
await this._page.waitForTimeout(1500);
// Check if this opened a submenu — look for "Turn on" or "enable" inside
const turnOnBtn = await this._page.$('button:has-text("Turn on"), [role="menuitem"]:has-text("Turn on")');
if (turnOnBtn) {
await turnOnBtn.click();
this._logger.info('Clicked "Turn on" in captions submenu');
this._logger.info('Clicked "Turn on" in submenu');
await this._page.waitForTimeout(1000);
}
return;
}
// Nothing found
await this._page.keyboard.press('Escape');
this._logger.warn(`Could not find captions option. DOM scan matches: ${JSON.stringify(found.allMatches)}`);
this._logger.warn(`Could not find captions/transcription option. DOM scan: ${JSON.stringify(found.allMatches)}`);
}
/**
* Handle the "What language is everyone speaking?" dialog.
* This dialog appears after clicking "Start transcription" in authenticated Teams.
*
* DOM structure (from user-provided HTML):
* [data-tid="spoken-language-selection-dialog"] alertdialog
* button[data-tid="callingCaptions-spokenLanguages"] combobox (current lang)
* button[data-tid="calling_captions_change_language_dialog_confirm_button"]
* button[data-tid="calling_captions_change_language_dialog_cancel_button"]
*/
private async _handleLanguageDialog(): Promise<boolean> {
try {
const dialogSelector = '[data-tid="spoken-language-selection-dialog"]';
await this._page.waitForSelector(dialogSelector, { timeout: 8000 });
this._logger.info('Spoken language selection dialog appeared');
// Read current language from dropdown button
const dropdownSelector = 'button[data-tid="callingCaptions-spokenLanguages"]';
const dropdown = await this._page.$(dropdownSelector);
if (dropdown) {
const currentLang = await dropdown.evaluate(
(el) => el.textContent?.trim() || '',
);
this._logger.info(`Dialog current language: "${currentLang}"`);
// Check if the language is already correct
const targetNames = this._getLanguageDisplayNames();
const isCorrect = targetNames.some((name) =>
currentLang.toLowerCase().includes(name.toLowerCase()),
);
if (!isCorrect) {
this._logger.info(`Need to change language to: ${targetNames.join(', ')}`);
// Open dropdown
await dropdown.click();
await this._page.waitForTimeout(800);
// Select the correct option
let selected = false;
for (const name of targetNames) {
if (selected) break;
const optionSelectors = [
`[role="option"]:has-text("${name}")`,
`li:has-text("${name}")`,
`div[role="option"]:has-text("${name}")`,
];
for (const optSel of optionSelectors) {
try {
const option = await this._page.$(optSel);
if (option) {
await option.click();
this._logger.info(`Selected language: ${name} (via ${optSel})`);
selected = true;
break;
}
} catch {
// Continue
}
}
}
if (!selected) {
this._logger.warn('Could not select language in dialog dropdown');
}
await this._page.waitForTimeout(500);
} else {
this._logger.info('Language already correct in dialog');
}
}
// Click "Confirm"
const confirmSelectors = [
'button[data-tid="calling_captions_change_language_dialog_confirm_button"]',
'button:has-text("Confirm")',
'button:has-text("Bestätigen")',
];
for (const sel of confirmSelectors) {
try {
const btn = await this._page.$(sel);
if (btn) {
await btn.click();
this._logger.info(`Clicked "Confirm" in language dialog: ${sel}`);
await this._page.waitForTimeout(1500);
return true;
}
} catch {
// Continue
}
}
this._logger.warn('Language dialog found but could not click Confirm');
return false;
} catch {
// No dialog appeared — using captions mode, not transcription
this._logger.info('No language selection dialog appeared (may be using captions mode)');
return false;
}
}
/**
* Get display names for the configured language (used in dropdown selection).
*/
private _getLanguageDisplayNames(): string[] {
const languageDisplayNames: Record<string, string[]> = {
'de-DE': ['German (Germany)', 'Deutsch (Deutschland)', 'German'],
'de-CH': ['German (Switzerland)', 'Deutsch (Schweiz)', 'German'],
'en-US': ['English (United States)', 'English (US)', 'English'],
'en-GB': ['English (United Kingdom)', 'English (UK)'],
'fr-FR': ['French (France)', 'Français (France)', 'French'],
'fr-CH': ['French (Switzerland)', 'Français (Suisse)'],
'it-IT': ['Italian (Italy)', 'Italiano (Italia)', 'Italian'],
};
return languageDisplayNames[this._language] || [this._language];
}
/**
@ -360,19 +493,35 @@ export class CaptionsProcedure {
'div[data-tid="closed-caption-renderer-wrapper"]',
'div[data-tid="live-captions-renderer"]',
'[data-tid="caption-area"]',
// Transcript panel (authenticated Teams "Record and transcribe" flow)
'[data-tid="transcript-pane"]',
'[data-tid="transcript-view"]',
'[data-tid*="transcript"]',
];
for (const selector of containerSelectors) {
try {
await this._page.waitForSelector(selector, { timeout: 15000 });
this._logger.info(`Found captions container: ${selector}`);
await this._page.waitForSelector(selector, { timeout: 10000 });
this._logger.info(`Found captions/transcript container: ${selector}`);
return;
} catch {
// Try next
}
}
this._logger.warn('Could not find captions container - captions may not have enabled or may use a different selector');
// Log visible data-tid elements for debugging
const tids = await this._page.evaluate(() => {
const els = document.querySelectorAll('[data-tid]');
return Array.from(els)
.map(e => e.getAttribute('data-tid') || '')
.filter(t => t.includes('caption') || t.includes('transcript') || t.includes('subtitle'))
.slice(0, 10);
});
if (tids.length > 0) {
this._logger.info(`Related data-tid elements found: ${JSON.stringify(tids)}`);
}
this._logger.warn('Could not find captions/transcript container with known selectors');
}
/**
@ -823,11 +972,13 @@ export class CaptionsProcedure {
}
}
// Try specific caption container selectors first
// Try specific caption/transcript container selectors first
const containerSelectors = [
'div[data-tid="closed-caption-renderer-wrapper"]',
'div[data-tid="live-captions-renderer"]',
'[data-tid="caption-area"]',
'[data-tid="transcript-pane"]',
'[data-tid="transcript-view"]',
];
let targetNode: Element | null = null;