import { Page } from 'playwright'; import { Logger } from 'winston'; import { TranscriptEntry } from '../types'; /** * Handles enabling and scraping captions from Teams meetings. * Based on Recall.ai's open-source implementation. * * Teams web UI selectors (updated Jan 2025): * - More button: button[id="callingButtons-showMoreBtn"] * - Captions button: div[id="closed-captions-button"] * - Captions container: div[data-tid="closed-caption-renderer-wrapper"] * - Caption author: span[data-tid="author"] * - Caption text: span[data-tid="closed-caption-text"] * - Caption message: .fui-ChatMessageCompact */ export class CaptionsProcedure { private _page: Page; private _logger: Logger; private _onTranscript: (entry: TranscriptEntry) => void; private _isSubscribed: boolean = false; private _lastCaptionText: string = ''; private _language: string; constructor( page: Page, logger: Logger, onTranscript: (entry: TranscriptEntry) => void, language?: string ) { this._page = page; this._logger = logger; this._onTranscript = onTranscript; this._language = language || 'de-DE'; } /** * Enable live captions in the meeting. * Opens the "More" menu and clicks the captions button, then sets the spoken language. */ async enableCaptionsFlow(): Promise { this._logger.info('Enabling captions/transcription...'); // First, open the "More actions" menu await this._openMoreMenu(); // Then click on the captions/transcription button await this._clickEnableCaptions(); // Handle language dialog (appears after "Start transcription" in authenticated Teams) const dialogHandled = await this._handleLanguageDialog(); // Wait for the captions/transcript container to appear await this._waitForCaptionsContainer(); this._logger.info('Captions/transcription enabled'); // Language setting skipped: spoken language is managed by the meeting organizer. // The previous _setSpokenLanguage() call added ~30s delay searching for a dropdown // that was never found, while captions already worked in the correct language. } /** * Open the "More actions" (...) menu in the call controls. * Works for both anonymous (light-meetings) and authenticated (full Teams) UI. */ private async _openMoreMenu(): Promise { const allSelectors = [ 'button[id="callingButtons-showMoreBtn"]', '[data-tid="callingButtons-showMoreBtn"]', 'button[aria-label*="More actions"]', 'button[aria-label*="More"]', '[data-tid="more-button"]', ]; for (const selector of allSelectors) { try { const button = await this._page.$(selector); if (button) { await button.click(); this._logger.info(`Clicked "More" button: ${selector}`); await this._page.waitForTimeout(1000); return; } } catch { // Continue } } // Last resort: wait for the primary selector with a short timeout try { await this._page.waitForSelector(allSelectors[0], { timeout: 10000 }); await this._page.click(allSelectors[0]); this._logger.info('Found "More" button (after wait)'); await this._page.waitForTimeout(1000); return; } catch { // Continue } throw new Error('Could not find More actions menu'); } /** * Enable captions or transcription from the "More" menu. * * Strategies in priority order: * 1. Direct captions button (anonymous / light-meetings UI — closed-captions-button in main menu) * 2. "Language and speech" → SUBMENU → "Show live captions" (authenticated Teams 2025+) * The submenu item is a menuitemcheckbox with id="closed-captions-button" * aria-checked="false" + "Show live captions" → click to enable * aria-checked="true" + "Hide live captions" → already on, don't click * 3. "Record and transcribe" → "Start transcription" (fallback with transcript panel) * 4. Generic text / DOM scan fallback */ private async _clickEnableCaptions(): Promise { await this._logVisibleMenuItems(); // ── Strategy 1: Direct captions button (anonymous / light-meetings UI) ── // In anonymous mode, closed-captions-button appears directly in the More menu const directBtn = await this._page.$('#closed-captions-button'); if (directBtn) { const ariaChecked = await directBtn.getAttribute('aria-checked'); const text = await directBtn.evaluate(el => el.textContent?.trim() || ''); this._logger.info(`Direct captions button found: aria-checked="${ariaChecked}", text="${text}"`); if (ariaChecked === 'true' || text.toLowerCase().includes('hide')) { this._logger.info('Live captions already ON (found "Hide live captions")'); await this._page.keyboard.press('Escape'); return; } await directBtn.click(); this._logger.info('Clicked "Show live captions" (direct button)'); await this._page.waitForTimeout(1000); return; } // ── Strategy 2: "Language and speech" → submenu → "Show live captions" ── // Authenticated Teams 2025+: "Language and speech" has aria-haspopup="menu" // Opening it reveals a submenu with closed-captions-button as menuitemcheckbox const langSpeechSelectors = [ '#LanguageSpeechMenuControl-id', '[data-tid="LanguageSpeechMenuControl-id"]', 'div[role="menuitem"]:has-text("Language and speech")', 'div[role="menuitem"]:has-text("Sprache und Spracheingabe")', ]; for (const selector of langSpeechSelectors) { try { const item = await this._page.$(selector); if (item) { await item.click(); this._logger.info(`Clicked "Language and speech": ${selector}`); await this._page.waitForTimeout(1500); // Now look for closed-captions-button in the submenu const captionsBtn = await this._page.$('#closed-captions-button'); if (captionsBtn) { const ariaChecked = await captionsBtn.getAttribute('aria-checked'); const btnText = await captionsBtn.evaluate(el => el.textContent?.trim() || ''); this._logger.info(`Submenu captions button: aria-checked="${ariaChecked}", text="${btnText}"`); if (ariaChecked === 'true' || btnText.toLowerCase().includes('hide')) { this._logger.info('Live captions already ON (found "Hide live captions")'); await this._page.keyboard.press('Escape'); return; } await captionsBtn.click(); this._logger.info('Clicked "Show live captions" in Language and speech submenu'); await this._page.waitForTimeout(1000); return; } // Fallback: try menuitemcheckbox with captions-related text const fallbackSelectors = [ '[role="menuitemcheckbox"]:has-text("captions")', '[role="menuitemcheckbox"]:has-text("Untertitel")', '[role="menuitemcheckbox"]:has-text("caption")', ]; for (const fbSel of fallbackSelectors) { try { const fbBtn = await this._page.$(fbSel); if (fbBtn) { const ariaChecked = await fbBtn.getAttribute('aria-checked'); const fbText = await fbBtn.evaluate(el => el.textContent?.trim() || ''); this._logger.info(`Fallback captions button: aria-checked="${ariaChecked}", text="${fbText}"`); if (ariaChecked === 'true' || fbText.toLowerCase().includes('hide')) { this._logger.info('Live captions already ON'); await this._page.keyboard.press('Escape'); return; } await fbBtn.click(); this._logger.info(`Clicked captions button: ${fbSel}`); await this._page.waitForTimeout(1000); return; } } catch { // Continue } } this._logger.warn('Language and speech submenu opened but no captions button found'); await this._page.keyboard.press('Escape'); break; } } catch { // Continue } } // ── Strategy 3 (fallback): "Record and transcribe" → "Start transcription" + panel ── this._logger.info('Live captions not found, trying transcription fallback...'); // Clean menu state first await this._page.keyboard.press('Escape'); await this._page.waitForTimeout(500); await this._page.keyboard.press('Escape'); await this._page.waitForTimeout(500); try { await this._openMoreMenu(); } catch { this._logger.warn('Could not re-open More menu for transcription fallback'); } const recordMenuSelectors = [ '[data-tid="RecordingMenuControl-id"]', '#RecordingMenuControl-id', 'div[role="menuitem"]:has-text("Record and transcribe")', 'div[role="menuitem"]:has-text("Aufzeichnen und transkribieren")', ]; for (const selector of recordMenuSelectors) { try { const item = await this._page.$(selector); if (item) { await item.click(); this._logger.info(`Clicked "Record and transcribe": ${selector}`); await this._page.waitForTimeout(1500); await this._logVisibleMenuItems(); // Check if transcription is ALREADY running ("Stop transcription" visible) const stopSelectors = [ '[data-tid="call-transcript-button"]:has-text("Stop")', '[role="menuitem"]:has-text("Stop transcription")', '[role="menuitem"]:has-text("Transkription beenden")', '[role="menuitem"]:has-text("Transkription stoppen")', ]; let alreadyRunning = false; for (const stopSel of stopSelectors) { try { const stopBtn = await this._page.$(stopSel); if (stopBtn) { this._logger.info('Transcription already running (found "Stop transcription") — not clicking'); alreadyRunning = true; break; } } catch { // Continue } } if (!alreadyRunning) { const startSelectors = [ '[data-tid="call-transcript-button"]:has-text("Start")', '[role="menuitem"]:has-text("Start transcription")', '[role="menuitem"]:has-text("Transkription starten")', ]; let started = false; for (const startSel of startSelectors) { try { const startBtn = await this._page.$(startSel); if (startBtn) { await startBtn.click(); this._logger.info(`Clicked "Start transcription": ${startSel}`); await this._page.waitForTimeout(2000); started = true; break; } } catch { // Continue } } if (!started) { this._logger.warn('"Record and transcribe" opened but "Start transcription" not found'); } } // Click "Show transcript" to open the transcript panel for scraping const showTranscriptSelectors = [ '[data-tid="transcript-panel-button"]', '[role="menuitem"]:has-text("Show transcript")', '[role="menuitem"]:has-text("Transkript anzeigen")', ]; for (const showSel of showTranscriptSelectors) { try { const showBtn = await this._page.$(showSel); if (showBtn) { await showBtn.click(); this._logger.info(`Clicked "Show transcript": ${showSel}`); await this._page.waitForTimeout(2000); break; } } catch { // Continue } } await this._page.keyboard.press('Escape'); await this._page.waitForTimeout(500); return; } } catch { // Continue } } // ── Strategy 4: DOM scan for anything containing "caption" / "transcri" ── const found = await this._page.evaluate(() => { const keywords = ['caption', 'captions', 'untertitel', 'live caption', 'transcri', 'transkri']; const candidates = document.querySelectorAll( '[role="menuitem"], [role="menuitemcheckbox"], [role="menuitemradio"], button, li, div[role="option"]' ); const results: string[] = []; for (let i = 0; i < candidates.length; i++) { const el = candidates[i] as HTMLElement; const text = el.innerText?.toLowerCase()?.trim() || ''; if (text && keywords.some(kw => text.includes(kw))) { results.push(text.substring(0, 60)); el.click(); return { clicked: text.substring(0, 60), allMatches: results }; } } return { clicked: null, allMatches: results }; }); if (found.clicked) { this._logger.info(`Clicked via DOM scan: "${found.clicked}"`); await this._page.waitForTimeout(1500); return; } await this._page.keyboard.press('Escape'); this._logger.warn(`Could not find captions/transcription option. DOM scan: ${JSON.stringify(found.allMatches)}`); } /** * Handle the "What language is everyone speaking?" dialog. * This dialog appears after clicking "Start transcription" in authenticated Teams. * * DOM structure (from user-provided HTML): * [data-tid="spoken-language-selection-dialog"] — alertdialog * button[data-tid="callingCaptions-spokenLanguages"] — combobox (current lang) * button[data-tid="calling_captions_change_language_dialog_confirm_button"] * button[data-tid="calling_captions_change_language_dialog_cancel_button"] */ private async _handleLanguageDialog(): Promise { try { const dialogSelector = '[data-tid="spoken-language-selection-dialog"]'; await this._page.waitForSelector(dialogSelector, { timeout: 3000 }); this._logger.info('Spoken language selection dialog appeared'); // Read current language from dropdown button const dropdownSelector = 'button[data-tid="callingCaptions-spokenLanguages"]'; const dropdown = await this._page.$(dropdownSelector); if (dropdown) { const currentLang = await dropdown.evaluate( (el) => el.textContent?.trim() || '', ); this._logger.info(`Dialog current language: "${currentLang}"`); // Check if the language is already correct const targetNames = this._getLanguageDisplayNames(); const isCorrect = targetNames.some((name) => currentLang.toLowerCase().includes(name.toLowerCase()), ); if (!isCorrect) { this._logger.info(`Need to change language to: ${targetNames.join(', ')}`); // Open dropdown await dropdown.click(); await this._page.waitForTimeout(800); // Select the correct option let selected = false; for (const name of targetNames) { if (selected) break; const optionSelectors = [ `[role="option"]:has-text("${name}")`, `li:has-text("${name}")`, `div[role="option"]:has-text("${name}")`, ]; for (const optSel of optionSelectors) { try { const option = await this._page.$(optSel); if (option) { await option.click(); this._logger.info(`Selected language: ${name} (via ${optSel})`); selected = true; break; } } catch { // Continue } } } if (!selected) { this._logger.warn('Could not select language in dialog dropdown'); } await this._page.waitForTimeout(500); } else { this._logger.info('Language already correct in dialog'); } } // Click "Confirm" const confirmSelectors = [ 'button[data-tid="calling_captions_change_language_dialog_confirm_button"]', 'button:has-text("Confirm")', 'button:has-text("Bestätigen")', ]; for (const sel of confirmSelectors) { try { const btn = await this._page.$(sel); if (btn) { await btn.click(); this._logger.info(`Clicked "Confirm" in language dialog: ${sel}`); await this._page.waitForTimeout(1500); return true; } } catch { // Continue } } this._logger.warn('Language dialog found but could not click Confirm'); return false; } catch { // No dialog appeared — using captions mode, not transcription this._logger.info('No language selection dialog appeared (may be using captions mode)'); return false; } } /** * Get display names for the configured language (used in dropdown selection). */ private _getLanguageDisplayNames(): string[] { const languageDisplayNames: Record = { 'de-DE': ['German (Germany)', 'Deutsch (Deutschland)', 'German'], 'de-CH': ['German (Switzerland)', 'Deutsch (Schweiz)', 'German'], 'en-US': ['English (United States)', 'English (US)', 'English'], 'en-GB': ['English (United Kingdom)', 'English (UK)'], 'fr-FR': ['French (France)', 'Français (France)', 'French'], 'fr-CH': ['French (Switzerland)', 'Français (Suisse)'], 'it-IT': ['Italian (Italy)', 'Italiano (Italia)', 'Italian'], }; return languageDisplayNames[this._language] || [this._language]; } /** * Log visible menu items for debugging when captions button is not found. */ private async _logVisibleMenuItems(): Promise { try { const menuItems = await this._page.evaluate(() => { const items = document.querySelectorAll( '[role="menuitem"], [role="menuitemcheckbox"], [role="menuitemradio"]' ); return Array.from(items).map(el => { const text = (el as HTMLElement).innerText?.trim()?.substring(0, 50) || ''; const tid = el.getAttribute('data-tid') || ''; const id = el.id || ''; return `[${tid || id || 'no-id'}] ${text}`; }).filter(t => t.length > 5); }); this._logger.info(`Visible menu items (${menuItems.length}): ${JSON.stringify(menuItems)}`); } catch { // Not critical } } /** * Wait for the captions container to become visible after enabling. */ private async _waitForCaptionsContainer(): Promise { const containerSelectors = [ 'div[data-tid="closed-caption-renderer-wrapper"]', 'div[data-tid="live-captions-renderer"]', '[data-tid="caption-area"]', '[data-tid="transcript-pane"]', '[data-tid="transcript-view"]', '[data-tid="transcript-content"]', ]; for (const selector of containerSelectors) { try { await this._page.waitForSelector(selector, { timeout: 8000 }); this._logger.info(`Found captions/transcript container: ${selector}`); return; } catch { // Try next } } // Log ALL transcript/caption related data-tid elements for debugging const tids = await this._page.evaluate(() => { const els = document.querySelectorAll('[data-tid]'); return Array.from(els) .map(e => ({ tid: e.getAttribute('data-tid') || '', tag: e.tagName, h: (e as HTMLElement).offsetHeight, w: (e as HTMLElement).offsetWidth, })) .filter(t => t.tid.includes('caption') || t.tid.includes('transcript') || t.tid.includes('subtitle'), ) .slice(0, 15); }); this._logger.info(`Transcript/caption data-tid elements: ${JSON.stringify(tids)}`); this._logger.warn('Could not find captions/transcript container with known selectors'); } /** * Set the spoken language for captions. * * Teams defaults to English for anonymous users. This method attempts to * change the "Meeting spoken language" to the configured language (e.g. "de-DE"). * * Flow (per Microsoft docs): * 1. Click "Caption settings" (gear/settings icon near captions area) * 2. Click "Language settings" * 3. Change "Meeting spoken language" dropdown * 4. Click "Update" * * Note: Changing spoken language affects ALL meeting participants. */ private async _setSpokenLanguage(): Promise { // Map BCP-47 codes to Teams display names for the spoken language dropdown const languageDisplayNames: Record = { 'de-DE': ['German (Germany)', 'Deutsch (Deutschland)', 'German'], 'de-CH': ['German (Switzerland)', 'Deutsch (Schweiz)', 'German'], 'en-US': ['English (United States)', 'English (US)', 'English'], 'en-GB': ['English (United Kingdom)', 'English (UK)'], 'fr-FR': ['French (France)', 'Français (France)', 'French'], 'fr-CH': ['French (Switzerland)', 'Français (Suisse)'], 'it-IT': ['Italian (Italy)', 'Italiano (Italia)', 'Italian'], }; const targetNames = languageDisplayNames[this._language] || [this._language]; this._logger.info(`Setting spoken language to: ${this._language} (looking for: ${targetNames.join(', ')})`); try { // Wait a moment for the captions UI to stabilize await this._page.waitForTimeout(2000); let settingsOpened = false; // Strategy 1: Try "Caption settings" gear button near the captions area const captionSettingsSelectors = [ 'button[aria-label*="Caption settings" i]', 'button[aria-label*="Captions settings" i]', 'button[aria-label*="Untertiteleinstellungen" i]', 'button[data-tid="caption-settings-button"]', 'button[id="caption-settings-button"]', // Teams 2025+: settings icon inside the captions banner 'button[aria-label*="Settings" i][data-tid*="caption" i]', ]; for (const selector of captionSettingsSelectors) { try { const button = await this._page.$(selector); if (button) { await button.click(); this._logger.info(`Clicked caption settings: ${selector}`); settingsOpened = true; await this._page.waitForTimeout(1000); break; } } catch { // Continue } } // Strategy 2: More menu > "Language and speech" / "Captions & transcripts" if (!settingsOpened) { this._logger.info('Caption settings button not found, trying More menu > Language and speech...'); await this._openMoreMenu(); await this._page.waitForTimeout(1000); // All selectors must have an element prefix for Playwright const languageMenuSelectors = [ '[data-tid="captions-and-transcripts-button"]', '[data-tid="language-and-speech-button"]', 'div[role="menuitem"]:has-text("Captions & transcripts")', 'div[role="menuitem"]:has-text("Captions and transcripts")', 'div[role="menuitem"]:has-text("Untertitel und Transkripte")', 'div[role="menuitem"]:has-text("Language and speech")', 'div[role="menuitem"]:has-text("Sprache und Spracheingabe")', 'button:has-text("Captions & transcripts")', 'button:has-text("Captions and transcripts")', 'button:has-text("Language and speech")', 'button:has-text("Sprache und Spracheingabe")', 'li:has-text("Captions")', 'li:has-text("Language")', 'li:has-text("Untertitel")', 'li:has-text("Sprache")', ]; for (const selector of languageMenuSelectors) { try { const item = await this._page.$(selector); if (item) { await item.click(); this._logger.info(`Clicked language menu: ${selector}`); settingsOpened = true; await this._page.waitForTimeout(1000); break; } } catch { // Continue } } } // Strategy 3: Search all visible menu items by evaluating text content if (!settingsOpened) { this._logger.info('Standard selectors failed, scanning menu items by text...'); const found = await this._page.evaluate(() => { const keywords = [ 'caption', 'captions', 'untertitel', 'language', 'sprache', 'spoken', ]; // Search all menu items, buttons, and clickable elements const candidates = document.querySelectorAll( '[role="menuitem"], [role="menuitemcheckbox"], [role="menuitemradio"], button, li' ); const elArray = Array.from(candidates); for (let i = 0; i < elArray.length; i++) { const el = elArray[i] as HTMLElement; const text = el.innerText?.toLowerCase() || ''; if (keywords.some(kw => text.includes(kw))) { el.click(); return text; } } return null; }); if (found) { this._logger.info(`Clicked menu item by text scan: "${found}"`); settingsOpened = true; await this._page.waitForTimeout(1000); } } if (!settingsOpened) { this._logger.warn('Could not open language settings - captions will use default language (English)'); return; } // Look for sub-options like "Change spoken language" / "Language settings" const langSettingsSelectors = [ 'button:has-text("Change spoken language")', 'button:has-text("Gesprochene Sprache ändern")', 'button:has-text("Language settings")', 'button:has-text("Spracheinstellungen")', 'button:has-text("Spoken language")', 'button:has-text("Gesprochene Sprache")', 'div[role="menuitem"]:has-text("Change spoken language")', 'div[role="menuitem"]:has-text("Spoken language")', 'div[role="menuitem"]:has-text("Gesprochene Sprache")', 'a:has-text("Change spoken language")', 'a:has-text("Spoken language")', ]; for (const selector of langSettingsSelectors) { try { const item = await this._page.$(selector); if (item) { await item.click(); this._logger.info(`Clicked language settings: ${selector}`); await this._page.waitForTimeout(1000); break; } } catch { // Continue - might already be on the language settings page } } // Look for the spoken language dropdown/combobox let languageSet = false; // First, log what's visible in the settings panel for debugging const panelInfo = await this._page.evaluate(() => { const selects = document.querySelectorAll('select'); const comboboxes = document.querySelectorAll('[role="combobox"]'); const listboxes = document.querySelectorAll('[role="listbox"]'); const dropdowns = document.querySelectorAll('[class*="dropdown" i], [class*="Dropdown" i]'); const allButtons = document.querySelectorAll('button'); const buttonsWithText = Array.from(allButtons) .map(b => `${b.tagName}[${b.getAttribute('aria-label') || b.textContent?.trim().substring(0, 40)}]`) .filter(t => t.length > 10) .slice(0, 10); return { selects: selects.length, comboboxes: comboboxes.length, listboxes: listboxes.length, dropdowns: dropdowns.length, buttons: buttonsWithText, bodySnippet: document.body?.innerText?.substring(0, 800) || '', }; }); this._logger.info(`Caption settings panel - selects: ${panelInfo.selects}, comboboxes: ${panelInfo.comboboxes}, listboxes: ${panelInfo.listboxes}, dropdowns: ${panelInfo.dropdowns}`); this._logger.info(`Panel buttons: ${JSON.stringify(panelInfo.buttons)}`); this._logger.debug(`Panel text: ${panelInfo.bodySnippet.substring(0, 300)}`); // Strategy A: Standard selectors const dropdownSelectors = [ 'select[aria-label*="spoken language" i]', 'select[aria-label*="Meeting spoken language" i]', 'select[aria-label*="Gesprochene Sprache" i]', '[data-tid="spoken-language-dropdown"]', 'div[role="combobox"]', 'div[role="listbox"]', 'select', ]; for (const selector of dropdownSelectors) { if (languageSet) break; try { const dropdown = await this._page.$(selector); if (dropdown) { const tagName = await dropdown.evaluate(el => el.tagName.toLowerCase()); if (tagName === 'select') { for (const name of targetNames) { try { await this._page.selectOption(selector, { label: name }); this._logger.info(`Selected spoken language: ${name}`); languageSet = true; break; } catch { // Try next name variant } } } else { // Fluent UI dropdown/combobox await dropdown.click(); await this._page.waitForTimeout(500); for (const name of targetNames) { try { const optionSelectors = [ `[role="option"]:has-text("${name}")`, `li:has-text("${name}")`, `div[role="option"]:has-text("${name}")`, `span:has-text("${name}")`, ]; for (const optSel of optionSelectors) { const option = await this._page.$(optSel); if (option) { await option.click(); this._logger.info(`Selected spoken language: ${name} (via ${optSel})`); languageSet = true; break; } } if (languageSet) break; } catch { // Try next name variant } } } if (languageSet) break; } } catch { // Continue } } // Strategy B: DOM evaluation fallback - find any dropdown-like element and interact if (!languageSet) { this._logger.info('Standard dropdown selectors failed, trying DOM evaluation fallback...'); languageSet = await this._page.evaluate((names: string[]) => { // Find all elements that could be dropdowns (Fluent UI uses various patterns) const candidates = document.querySelectorAll( '[role="combobox"], [role="listbox"], select, ' + '[class*="dropdown" i], [class*="Dropdown"], ' + 'button[aria-haspopup="listbox"], button[aria-haspopup="true"], ' + '[aria-expanded]' ); for (let i = 0; i < candidates.length; i++) { const el = candidates[i] as HTMLElement; const label = el.getAttribute('aria-label') || ''; const nearbyText = el.parentElement?.innerText || ''; // Check if this dropdown is related to language const isLanguageRelated = label.toLowerCase().includes('language') || label.toLowerCase().includes('sprache') || nearbyText.toLowerCase().includes('spoken language') || nearbyText.toLowerCase().includes('gesprochene sprache'); if (isLanguageRelated || candidates.length === 1) { // Click to open the dropdown el.click(); // Wait a frame for options to render return new Promise((resolve) => { requestAnimationFrame(() => { requestAnimationFrame(() => { // Look for options const options = document.querySelectorAll( '[role="option"], [role="menuitem"], li[class*="option" i]' ); for (let j = 0; j < options.length; j++) { const opt = options[j] as HTMLElement; const optText = opt.innerText?.trim() || ''; if (names.some(n => optText.includes(n))) { opt.click(); resolve(true); return; } } resolve(false); }); }); }); } } return Promise.resolve(false); }, targetNames); if (languageSet) { this._logger.info('Selected spoken language via DOM evaluation fallback'); await this._page.waitForTimeout(500); } } if (!languageSet) { this._logger.warn('Could not find/select spoken language in dropdown'); } // Click "Update" / "Apply" / "Confirm" button const updateSelectors = [ 'button:has-text("Update")', 'button:has-text("Apply")', 'button:has-text("Confirm")', 'button:has-text("Aktualisieren")', 'button:has-text("Übernehmen")', 'button:has-text("Bestätigen")', 'button[data-tid="language-update-button"]', ]; for (const selector of updateSelectors) { try { const button = await this._page.$(selector); if (button) { await button.click(); this._logger.info(`Clicked update button: ${selector}`); await this._page.waitForTimeout(1000); break; } } catch { // Continue } } // Close any open dialogs/menus await this._page.keyboard.press('Escape'); this._logger.info(`Spoken language setting attempt completed (set: ${languageSet})`); } catch (error) { this._logger.warn(`Could not set spoken language to ${this._language}: ${error}`); // Not fatal - captions will still work, just in the wrong language } } /** * Start watching the captions DOM for updates using Recall.ai's approach. * * Uses page.exposeFunction() + MutationObserver for real-time caption detection. * Captions in Teams are rendered inside .fui-ChatMessageCompact elements with: * - span[data-tid="author"] for the speaker name * - span[data-tid="closed-caption-text"] for the caption text * * Teams updates captions in real-time as the user speaks, adding punctuation * only when the caption is finalized. We use this to detect final captions. */ async subscribeToCaptions(): Promise { if (this._isSubscribed) { this._logger.warn('Already subscribed to captions'); return; } this._isSubscribed = true; this._logger.info('Subscribing to captions...'); // Expose callback functions from Node.js to the browser context await this._page.exposeFunction('__onCaptionEvent', (caption: { speaker: string; text: string; timestamp: string; }) => { this._handleCaptionEvent(caption); }); // Debug callback: logs transcript DOM structure to help identify selectors try { await this._page.exposeFunction('__onCaptionDebug', (info: { tag: string; tid: string; classes: string; text: string; children: number; html: string; }) => { this._logger.info( `TranscriptDOM: <${info.tag} data-tid="${info.tid}"> ` + `children=${info.children}, text="${info.text}"`, ); this._logger.debug(`TranscriptDOM html: ${info.html}`); }); } catch { // May already be exposed } // Wait for a known container const waitSelectors = [ 'div[data-tid="closed-caption-renderer-wrapper"]', 'div[data-tid="live-captions-renderer"]', '[data-tid="caption-area"]', '[data-tid="transcript-pane"]', '[data-tid="transcript-view"]', '[data-tid="transcript-content"]', ]; let containerFound = false; for (const sel of waitSelectors) { try { await this._page.waitForSelector(sel, { timeout: 8000 }); containerFound = true; this._logger.info(`Captions/transcript container found: ${sel}`); break; } catch { // Try next } } if (!containerFound) { // Log all transcript/caption related elements for debugging const transcriptTids = await this._page.evaluate(() => { const els = document.querySelectorAll('[data-tid]'); return Array.from(els) .map(e => ({ tid: e.getAttribute('data-tid') || '', tag: e.tagName, h: (e as HTMLElement).offsetHeight, w: (e as HTMLElement).offsetWidth, children: e.children?.length || 0, })) .filter(t => t.tid.includes('caption') || t.tid.includes('transcript') || t.tid.includes('subtitle'), ) .slice(0, 20); }); this._logger.info( `No exact container match. Transcript/caption elements: ${JSON.stringify(transcriptTids)}`, ); this._logger.warn('Captions/transcript container not found, subscribing with body fallback'); } this._logger.info('Setting up MutationObserver for captions/transcription...'); const observerTarget = await this._page.evaluate(() => { // ── Helper: extract caption data (anonymous/light-meetings captions) ── function _extractCaption(element: HTMLElement): boolean { const captionMessage = element.querySelector('.fui-ChatMessageCompact') || (element.classList?.contains('fui-ChatMessageCompact') ? element : null); if (captionMessage) { const authorElement = captionMessage.querySelector('span[data-tid="author"]'); const contentElement = captionMessage.querySelector('span[data-tid="closed-caption-text"]'); if (authorElement && contentElement) { const textObserver = new MutationObserver(() => { const speaker = authorElement.textContent?.trim() ?? 'Unknown'; const text = (contentElement as any).innerText?.trim() ?? ''; (window as any).__onCaptionEvent({ speaker, text, timestamp: new Date().toISOString(), }); }); textObserver.observe(contentElement, { childList: true, subtree: true, characterData: true, }); return true; } } return false; } // ── Helper: extract transcript entry (authenticated Teams transcript panel) ── function _extractTranscript(element: HTMLElement): boolean { const text = element.innerText?.trim(); if (!text || text.length < 2) return false; // Strategy A: data-tid based speaker/text elements const speakerEl = element.querySelector( '[data-tid*="speaker"], [data-tid*="author"], [data-tid*="name"], ' + '[data-tid*="participant"]' ); const textEl = element.querySelector( '[data-tid*="text"], [data-tid*="content"], [data-tid*="body"], ' + '[data-tid*="message"]' ); if (speakerEl && textEl) { const speaker = speakerEl.textContent?.trim() || 'Unknown'; const content = (textEl as HTMLElement).innerText?.trim() || ''; if (content) { (window as any).__onCaptionEvent({ speaker, text: content, timestamp: new Date().toISOString(), }); return true; } } // Strategy B: structural — first short text child = speaker, rest = text const directChildren = Array.from(element.children) as HTMLElement[]; if (directChildren.length >= 2) { const first = directChildren[0].innerText?.trim() || ''; const rest = directChildren .slice(1) .map(c => c.innerText?.trim()) .filter(Boolean) .join(' ') .trim(); if (first && first.length < 60 && rest && rest.length > 2) { (window as any).__onCaptionEvent({ speaker: first, text: rest, timestamp: new Date().toISOString(), }); return true; } } return false; } // ── Noise filter: skip elements that are clearly NOT captions/transcript ── const _noisePatterns = [ 'meeting ended', 'meeting started', 'was invited', 'left the chat', 'doesn\'t have a teams account', 'new notification', 'is typing', 'last read', 'verify their identity', 'left the meeting', 'joined the meeting', 'apply and restart', ]; function _isNoise(text: string): boolean { const lower = text.toLowerCase(); return _noisePatterns.some(p => lower.includes(p)); } // ── Combined handler for mutation observer ── function _handleAddedNode(node: Node): void { if (node.nodeType !== Node.ELEMENT_NODE) return; const el = node as HTMLElement; // Skip tiny/empty elements const text = el.innerText?.trim(); if (!text || text.length < 2) return; // Skip noise (chat history, notifications, system messages) if (_isNoise(text)) return; // Skip elements from the chat area (data-tid="typing-indicator" etc.) const tid = el.getAttribute('data-tid') || ''; if (tid === 'typing-indicator') return; // Try caption extraction first (anonymous UI) if (_extractCaption(el)) return; // Try transcript extraction (authenticated UI) if (_extractTranscript(el)) return; // Not recognized — log for debugging (only elements with meaningful text) if (text.length > 3) { (window as any).__onCaptionDebug?.({ tag: el.tagName, tid: el.getAttribute('data-tid') || '', classes: (el.className || '').substring(0, 100), text: text.substring(0, 200), children: el.children?.length || 0, html: el.innerHTML?.substring(0, 500) || '', }); } } // ── Find container ── const containerSelectors = [ 'div[data-tid="closed-caption-renderer-wrapper"]', 'div[data-tid="live-captions-renderer"]', '[data-tid="caption-area"]', '[data-tid="transcript-pane"]', '[data-tid="transcript-view"]', ]; let targetNode: Element | null = null; let targetSelector = ''; for (const sel of containerSelectors) { targetNode = document.querySelector(sel); if (targetNode) { targetSelector = sel; break; } } // Also try wildcard match (transcript container — exclude buttons/controls) if (!targetNode) { const candidates = document.querySelectorAll('[data-tid*="transcript"]'); for (const c of Array.from(candidates)) { const tid = c.getAttribute('data-tid') || ''; const tag = c.tagName; const height = (c as HTMLElement).offsetHeight || 0; // Skip buttons, small elements, and control-related elements if ( tag === 'BUTTON' || tag === 'SPAN' || tag === 'SVG' || tid.includes('button') || tid.includes('cancel') || tid.includes('stop') || height < 100 ) continue; targetNode = c; targetSelector = `[data-tid="${tid}"]`; break; } } if (targetNode) { const tid = targetNode.getAttribute('data-tid') || ''; const observer = new MutationObserver((mutationsList) => { for (const mutation of mutationsList) { if (mutation.type === 'childList') { mutation.addedNodes.forEach(_handleAddedNode); } } }); observer.observe(targetNode, { childList: true, subtree: true }); (window as any).__captionsObserver = observer; return `container:${tid}`; } // ── Fallback: observe document.body ── const allSelectors = [...containerSelectors]; function _isTranscriptContainer(el: Element): boolean { const tid = el.getAttribute('data-tid') || ''; if (!tid.includes('transcript')) return false; if (el.tagName === 'BUTTON' || el.tagName === 'SPAN' || el.tagName === 'SVG') return false; if (tid.includes('button') || tid.includes('cancel') || tid.includes('stop')) return false; if ((el as HTMLElement).offsetHeight < 100) return false; return true; } const bodyObserver = new MutationObserver((mutationsList) => { for (const mutation of mutationsList) { if (mutation.type !== 'childList') continue; mutation.addedNodes.forEach((node) => { if (node.nodeType !== Node.ELEMENT_NODE) return; const el = node as HTMLElement; // Check if a known container just appeared for (const sel of allSelectors) { const container = el.matches?.(sel) ? el : el.querySelector?.(sel); if (container) { bodyObserver.disconnect(); const tid = container.getAttribute('data-tid') || ''; const targeted = new MutationObserver((muts) => { for (const m of muts) { if (m.type === 'childList') { m.addedNodes.forEach(_handleAddedNode); } } }); targeted.observe(container, { childList: true, subtree: true }); (window as any).__captionsObserver = targeted; return; } } // Check if a transcript container appeared dynamically if (_isTranscriptContainer(el)) { bodyObserver.disconnect(); const tid = el.getAttribute('data-tid') || ''; const targeted = new MutationObserver((muts) => { for (const m of muts) { if (m.type === 'childList') { m.addedNodes.forEach(_handleAddedNode); } } }); targeted.observe(el, { childList: true, subtree: true }); (window as any).__captionsObserver = targeted; return; } // Also check inside the added node for transcript containers const transcriptChild = el.querySelector?.('[data-tid*="transcript"]'); if (transcriptChild && _isTranscriptContainer(transcriptChild)) { bodyObserver.disconnect(); const tid = transcriptChild.getAttribute('data-tid') || ''; const targeted = new MutationObserver((muts) => { for (const m of muts) { if (m.type === 'childList') { m.addedNodes.forEach(_handleAddedNode); } } }); targeted.observe(transcriptChild, { childList: true, subtree: true }); (window as any).__captionsObserver = targeted; return; } _handleAddedNode(node); }); } }); bodyObserver.observe(document.body, { childList: true, subtree: true }); (window as any).__captionsObserver = bodyObserver; return 'body-fallback'; }); this._logger.info(`MutationObserver set up for captions (target: ${observerTarget})`); } /** * Handle a caption event from the browser MutationObserver. * Teams updates captions in real-time. We detect finalized captions by * checking for terminal punctuation (. , ! ?). */ private _handleCaptionEvent(caption: { speaker: string; text: string; timestamp: string }): void { if (!this._isSubscribed || !caption.text) { return; } // Teams adds punctuation only to finalized captions const terminalPunctuationRegex = /[.,!?]/; if (!terminalPunctuationRegex.test(caption.text)) { return; // Not finalized yet } // Dedup: strip punctuation and compare to last caption const punctuationRegex = /[.,'"!?~\-]/g; const newTextStripped = caption.text.replace(punctuationRegex, ''); const lastTextStripped = this._lastCaptionText.replace(punctuationRegex, ''); if (newTextStripped === lastTextStripped) { return; // Duplicate } this._lastCaptionText = caption.text; this._logger.info(`Caption: [${caption.speaker}] ${caption.text}`); this._onTranscript({ speaker: caption.speaker, text: caption.text, timestamp: new Date(caption.timestamp), isFinal: true, }); } /** * Stop watching for captions. */ async unsubscribe(): Promise { this._isSubscribed = false; try { await this._page.evaluate(() => { if ((window as any).__captionsObserver) { (window as any).__captionsObserver.disconnect(); } }); } catch { // Page might already be closed } this._logger.info('Unsubscribed from captions'); } }