diff --git a/src/bot/captionsProcedure.ts b/src/bot/captionsProcedure.ts index 8a10ad5..f3f55f1 100644 --- a/src/bot/captionsProcedure.ts +++ b/src/bot/captionsProcedure.ts @@ -39,21 +39,26 @@ export class CaptionsProcedure { * Opens the "More" menu and clicks the captions button, then sets the spoken language. */ async enableCaptionsFlow(): Promise { - this._logger.info('Enabling live captions...'); + this._logger.info('Enabling captions/transcription...'); // First, open the "More actions" menu await this._openMoreMenu(); - // Then click on the captions button + // Then click on the captions/transcription button await this._clickEnableCaptions(); - // Wait for the captions container to appear + // Handle language dialog (appears after "Start transcription" in authenticated Teams) + const dialogHandled = await this._handleLanguageDialog(); + + // Wait for the captions/transcript container to appear await this._waitForCaptionsContainer(); - this._logger.info('Live captions enabled'); + this._logger.info('Captions/transcription enabled'); - // Set the spoken language (Teams defaults to English for anonymous users) - await this._setSpokenLanguage(); + // Only try separate language setting if dialog wasn't already handled + if (!dialogHandled) { + await this._setSpokenLanguage(); + } } /** @@ -98,16 +103,20 @@ export class CaptionsProcedure { } /** - * Click the captions button in the menu. - * Handles two UI variants: - * - Anonymous (light-meetings): direct div[id="closed-captions-button"] - * - Authenticated (full Teams): submenu "Captions & transcripts" → "Turn on live captions" + * Enable captions or transcription from the "More" menu. + * + * Strategies in priority order: + * 1. Direct captions button (anonymous / light-meetings UI) + * 2. "Record and transcribe" → "Start transcription" (authenticated Teams 2025+) + * → triggers spoken-language-selection-dialog handled by _handleLanguageDialog() + * 3. "Captions & transcripts" submenu (older authenticated Teams) + * 4. "Language and speech" panel toggle (alternative path) + * 5. Generic text / DOM scan fallback */ private async _clickEnableCaptions(): Promise { - // Log visible menu items for debugging await this._logVisibleMenuItems(); - // Strategy 1: Direct captions button (anonymous/light-meetings UI) + // ── Strategy 1: Direct captions button (anonymous / light-meetings UI) ── const directSelectors = [ 'div[id="closed-captions-button"]', '[data-tid="closed-captions-button"]', @@ -128,15 +137,68 @@ export class CaptionsProcedure { } } - // Strategy 2: Authenticated Teams UI — "Captions & transcripts" submenu first + // ── Strategy 2: "Record and transcribe" → "Start transcription" ── + // Authenticated Teams 2025+: More → Record and transcribe → Start transcription + // After clicking, a spoken-language-selection-dialog appears (handled later). + const recordMenuSelectors = [ + '[data-tid="RecordingMenuControl-id"]', + 'div[role="menuitem"]:has-text("Record and transcribe")', + 'div[role="menuitem"]:has-text("Aufzeichnen und transkribieren")', + 'div[role="menuitem"]:has-text("Aufnehmen und transkribieren")', + ]; + + for (const selector of recordMenuSelectors) { + try { + const item = await this._page.$(selector); + if (item) { + await item.click(); + this._logger.info(`Clicked "Record and transcribe": ${selector}`); + await this._page.waitForTimeout(1500); + + // Log the submenu items + await this._logVisibleMenuItems(); + + // Click "Start transcription" + const transcriptionSelectors = [ + '[role="menuitem"]:has-text("Start transcription")', + '[role="menuitem"]:has-text("Transkription starten")', + '[role="menuitem"]:has-text("transcription")', + '[role="menuitem"]:has-text("Transkription")', + 'button:has-text("Start transcription")', + 'button:has-text("Transkription starten")', + 'div:has-text("Start transcription")[role="menuitem"]', + ]; + + for (const transSel of transcriptionSelectors) { + try { + const transBtn = await this._page.$(transSel); + if (transBtn) { + await transBtn.click(); + this._logger.info(`Clicked "Start transcription": ${transSel}`); + await this._page.waitForTimeout(2000); + return; // language dialog handled by _handleLanguageDialog() + } + } catch { + // Continue + } + } + + this._logger.warn('"Record and transcribe" opened but "Start transcription" not found'); + await this._page.keyboard.press('Escape'); + break; + } + } catch { + // Continue + } + } + + // ── Strategy 3: "Captions & transcripts" submenu (older Teams) ── const submenuSelectors = [ '[data-tid="captions-and-transcripts-button"]', '[role="menuitem"]:has-text("Captions & transcripts")', '[role="menuitem"]:has-text("Captions and transcripts")', '[role="menuitem"]:has-text("Untertitel und Transkripte")', '[role="menuitem"]:has-text("Untertitel")', - 'button:has-text("Captions & transcripts")', - 'button:has-text("Captions and transcripts")', ]; for (const selector of submenuSelectors) { @@ -147,12 +209,10 @@ export class CaptionsProcedure { this._logger.info(`Clicked captions submenu: ${selector}`); await this._page.waitForTimeout(1500); - // Now look for "Turn on live captions" inside the submenu/panel const enableSelectors = [ 'button:has-text("Turn on live captions")', 'button:has-text("Live captions")', 'button:has-text("Live-Untertitel aktivieren")', - 'button:has-text("Liveuntertitel")', '[role="menuitem"]:has-text("Turn on live captions")', '[role="menuitem"]:has-text("Live captions")', '[role="menuitemcheckbox"]:has-text("captions")', @@ -181,8 +241,7 @@ export class CaptionsProcedure { } } - // Strategy 2b: "Language and speech" submenu (authenticated Teams 2025+) - // In the new Teams, captions are under "Language and speech" → toggle inside panel + // ── Strategy 4: "Language and speech" panel toggle ── const langSpeechSelectors = [ '[data-tid="LanguageSpeechMenuControl-id"]', 'div[role="menuitem"]:has-text("Language and speech")', @@ -197,7 +256,6 @@ export class CaptionsProcedure { this._logger.info(`Clicked "Language and speech": ${selector}`); await this._page.waitForTimeout(2000); - // Log panel toggles for debugging const panelToggles = await this._page.evaluate(() => { const switches = document.querySelectorAll( 'input[role="switch"], [role="switch"], input[type="checkbox"]' @@ -212,7 +270,6 @@ export class CaptionsProcedure { }); this._logger.info(`Panel toggles: ${JSON.stringify(panelToggles)}`); - // Find and click the live captions toggle const toggleResult = await this._page.evaluate(() => { const switches = document.querySelectorAll( 'input[role="switch"], [role="switch"], input[type="checkbox"]' @@ -234,31 +291,14 @@ export class CaptionsProcedure { return { found: true, clicked: false, info: `already on: ${label || tid}` }; } } - // Fallback: any button/link mentioning captions - const btns = document.querySelectorAll('button, [role="menuitem"], [role="option"], a'); - for (const btn of Array.from(btns)) { - const text = ((btn as HTMLElement).textContent || '').toLowerCase(); - if ( - text.includes('turn on live caption') || - text.includes('liveuntertitel aktivieren') || - text.includes('liveuntertitel einschalten') - ) { - (btn as HTMLElement).click(); - return { found: true, clicked: true, info: text.substring(0, 60) }; - } - } return { found: false, clicked: false, info: '' }; }); this._logger.info(`Captions toggle result: ${JSON.stringify(toggleResult)}`); - if (toggleResult.found && toggleResult.clicked) { await this._page.waitForTimeout(1500); } - - // Close the panel await this._page.keyboard.press('Escape'); - if (toggleResult.found) return; this._logger.warn('Language panel opened but no captions toggle found'); @@ -269,32 +309,9 @@ export class CaptionsProcedure { } } - // Strategy 3: Generic text-based fallbacks - const textFallbacks = [ - 'button:has-text("Turn on live captions")', - 'button:has-text("Live captions")', - 'button[aria-label*="captions" i]', - '[role="menuitem"]:has-text("captions")', - '[role="menuitemcheckbox"]:has-text("captions")', - ]; - - for (const selector of textFallbacks) { - try { - const button = await this._page.$(selector); - if (button) { - await button.click(); - this._logger.info(`Clicked captions (text fallback): ${selector}`); - await this._page.waitForTimeout(1000); - return; - } - } catch { - // Continue - } - } - - // Strategy 4: DOM scan — find any element mentioning "caption" in the open menu + // ── Strategy 5: DOM scan for anything containing "caption" / "transcri" ── const found = await this._page.evaluate(() => { - const keywords = ['caption', 'captions', 'untertitel', 'live caption']; + const keywords = ['caption', 'captions', 'untertitel', 'live caption', 'transcri', 'transkri']; const candidates = document.querySelectorAll( '[role="menuitem"], [role="menuitemcheckbox"], [role="menuitemradio"], button, li, div[role="option"]' ); @@ -312,22 +329,138 @@ export class CaptionsProcedure { }); if (found.clicked) { - this._logger.info(`Clicked captions via DOM scan: "${found.clicked}"`); + this._logger.info(`Clicked via DOM scan: "${found.clicked}"`); await this._page.waitForTimeout(1500); - // Check if this opened a submenu — look for "Turn on" or "enable" inside const turnOnBtn = await this._page.$('button:has-text("Turn on"), [role="menuitem"]:has-text("Turn on")'); if (turnOnBtn) { await turnOnBtn.click(); - this._logger.info('Clicked "Turn on" in captions submenu'); + this._logger.info('Clicked "Turn on" in submenu'); await this._page.waitForTimeout(1000); } return; } - // Nothing found await this._page.keyboard.press('Escape'); - this._logger.warn(`Could not find captions option. DOM scan matches: ${JSON.stringify(found.allMatches)}`); + this._logger.warn(`Could not find captions/transcription option. DOM scan: ${JSON.stringify(found.allMatches)}`); + } + + /** + * Handle the "What language is everyone speaking?" dialog. + * This dialog appears after clicking "Start transcription" in authenticated Teams. + * + * DOM structure (from user-provided HTML): + * [data-tid="spoken-language-selection-dialog"] — alertdialog + * button[data-tid="callingCaptions-spokenLanguages"] — combobox (current lang) + * button[data-tid="calling_captions_change_language_dialog_confirm_button"] + * button[data-tid="calling_captions_change_language_dialog_cancel_button"] + */ + private async _handleLanguageDialog(): Promise { + try { + const dialogSelector = '[data-tid="spoken-language-selection-dialog"]'; + await this._page.waitForSelector(dialogSelector, { timeout: 8000 }); + this._logger.info('Spoken language selection dialog appeared'); + + // Read current language from dropdown button + const dropdownSelector = 'button[data-tid="callingCaptions-spokenLanguages"]'; + const dropdown = await this._page.$(dropdownSelector); + + if (dropdown) { + const currentLang = await dropdown.evaluate( + (el) => el.textContent?.trim() || '', + ); + this._logger.info(`Dialog current language: "${currentLang}"`); + + // Check if the language is already correct + const targetNames = this._getLanguageDisplayNames(); + const isCorrect = targetNames.some((name) => + currentLang.toLowerCase().includes(name.toLowerCase()), + ); + + if (!isCorrect) { + this._logger.info(`Need to change language to: ${targetNames.join(', ')}`); + + // Open dropdown + await dropdown.click(); + await this._page.waitForTimeout(800); + + // Select the correct option + let selected = false; + for (const name of targetNames) { + if (selected) break; + const optionSelectors = [ + `[role="option"]:has-text("${name}")`, + `li:has-text("${name}")`, + `div[role="option"]:has-text("${name}")`, + ]; + for (const optSel of optionSelectors) { + try { + const option = await this._page.$(optSel); + if (option) { + await option.click(); + this._logger.info(`Selected language: ${name} (via ${optSel})`); + selected = true; + break; + } + } catch { + // Continue + } + } + } + + if (!selected) { + this._logger.warn('Could not select language in dialog dropdown'); + } + await this._page.waitForTimeout(500); + } else { + this._logger.info('Language already correct in dialog'); + } + } + + // Click "Confirm" + const confirmSelectors = [ + 'button[data-tid="calling_captions_change_language_dialog_confirm_button"]', + 'button:has-text("Confirm")', + 'button:has-text("Bestätigen")', + ]; + + for (const sel of confirmSelectors) { + try { + const btn = await this._page.$(sel); + if (btn) { + await btn.click(); + this._logger.info(`Clicked "Confirm" in language dialog: ${sel}`); + await this._page.waitForTimeout(1500); + return true; + } + } catch { + // Continue + } + } + + this._logger.warn('Language dialog found but could not click Confirm'); + return false; + } catch { + // No dialog appeared — using captions mode, not transcription + this._logger.info('No language selection dialog appeared (may be using captions mode)'); + return false; + } + } + + /** + * Get display names for the configured language (used in dropdown selection). + */ + private _getLanguageDisplayNames(): string[] { + const languageDisplayNames: Record = { + 'de-DE': ['German (Germany)', 'Deutsch (Deutschland)', 'German'], + 'de-CH': ['German (Switzerland)', 'Deutsch (Schweiz)', 'German'], + 'en-US': ['English (United States)', 'English (US)', 'English'], + 'en-GB': ['English (United Kingdom)', 'English (UK)'], + 'fr-FR': ['French (France)', 'Français (France)', 'French'], + 'fr-CH': ['French (Switzerland)', 'Français (Suisse)'], + 'it-IT': ['Italian (Italy)', 'Italiano (Italia)', 'Italian'], + }; + return languageDisplayNames[this._language] || [this._language]; } /** @@ -360,19 +493,35 @@ export class CaptionsProcedure { 'div[data-tid="closed-caption-renderer-wrapper"]', 'div[data-tid="live-captions-renderer"]', '[data-tid="caption-area"]', + // Transcript panel (authenticated Teams "Record and transcribe" flow) + '[data-tid="transcript-pane"]', + '[data-tid="transcript-view"]', + '[data-tid*="transcript"]', ]; for (const selector of containerSelectors) { try { - await this._page.waitForSelector(selector, { timeout: 15000 }); - this._logger.info(`Found captions container: ${selector}`); + await this._page.waitForSelector(selector, { timeout: 10000 }); + this._logger.info(`Found captions/transcript container: ${selector}`); return; } catch { // Try next } } - this._logger.warn('Could not find captions container - captions may not have enabled or may use a different selector'); + // Log visible data-tid elements for debugging + const tids = await this._page.evaluate(() => { + const els = document.querySelectorAll('[data-tid]'); + return Array.from(els) + .map(e => e.getAttribute('data-tid') || '') + .filter(t => t.includes('caption') || t.includes('transcript') || t.includes('subtitle')) + .slice(0, 10); + }); + if (tids.length > 0) { + this._logger.info(`Related data-tid elements found: ${JSON.stringify(tids)}`); + } + + this._logger.warn('Could not find captions/transcript container with known selectors'); } /** @@ -823,11 +972,13 @@ export class CaptionsProcedure { } } - // Try specific caption container selectors first + // Try specific caption/transcript container selectors first const containerSelectors = [ 'div[data-tid="closed-caption-renderer-wrapper"]', 'div[data-tid="live-captions-renderer"]', '[data-tid="caption-area"]', + '[data-tid="transcript-pane"]', + '[data-tid="transcript-view"]', ]; let targetNode: Element | null = null;