1342 lines
50 KiB
TypeScript
1342 lines
50 KiB
TypeScript
import { Page } from 'playwright';
|
|
import { Logger } from 'winston';
|
|
import { TranscriptEntry } from '../types';
|
|
|
|
/**
|
|
* Handles enabling and scraping captions from Teams meetings.
|
|
* Based on Recall.ai's open-source implementation.
|
|
*
|
|
* Teams web UI selectors (updated Jan 2025):
|
|
* - More button: button[id="callingButtons-showMoreBtn"]
|
|
* - Captions button: div[id="closed-captions-button"]
|
|
* - Captions container: div[data-tid="closed-caption-renderer-wrapper"]
|
|
* - Caption author: span[data-tid="author"]
|
|
* - Caption text: span[data-tid="closed-caption-text"]
|
|
* - Caption message: .fui-ChatMessageCompact
|
|
*/
|
|
export class CaptionsProcedure {
|
|
private _page: Page;
|
|
private _logger: Logger;
|
|
private _onTranscript: (entry: TranscriptEntry) => void;
|
|
private _isSubscribed: boolean = false;
|
|
private _lastCaptionText: string = '';
|
|
private _language: string;
|
|
|
|
constructor(
|
|
page: Page,
|
|
logger: Logger,
|
|
onTranscript: (entry: TranscriptEntry) => void,
|
|
language?: string
|
|
) {
|
|
this._page = page;
|
|
this._logger = logger;
|
|
this._onTranscript = onTranscript;
|
|
this._language = language || 'de-DE';
|
|
}
|
|
|
|
/**
|
|
* Enable live captions in the meeting.
|
|
* Opens the "More" menu and clicks the captions button, then sets the spoken language.
|
|
*/
|
|
async enableCaptionsFlow(): Promise<void> {
|
|
this._logger.info('Enabling captions/transcription...');
|
|
|
|
// First, open the "More actions" menu
|
|
await this._openMoreMenu();
|
|
|
|
// Then click on the captions/transcription button
|
|
await this._clickEnableCaptions();
|
|
|
|
// Handle language dialog (appears after "Start transcription" in authenticated Teams)
|
|
const dialogHandled = await this._handleLanguageDialog();
|
|
|
|
// Wait for the captions/transcript container to appear
|
|
await this._waitForCaptionsContainer();
|
|
|
|
this._logger.info('Captions/transcription enabled');
|
|
|
|
// Only try separate language setting if dialog wasn't already handled
|
|
if (!dialogHandled) {
|
|
await this._setSpokenLanguage();
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Open the "More actions" (...) menu in the call controls.
|
|
* Works for both anonymous (light-meetings) and authenticated (full Teams) UI.
|
|
*/
|
|
private async _openMoreMenu(): Promise<void> {
|
|
const allSelectors = [
|
|
'button[id="callingButtons-showMoreBtn"]',
|
|
'[data-tid="callingButtons-showMoreBtn"]',
|
|
'button[aria-label*="More actions"]',
|
|
'button[aria-label*="More"]',
|
|
'[data-tid="more-button"]',
|
|
];
|
|
|
|
for (const selector of allSelectors) {
|
|
try {
|
|
const button = await this._page.$(selector);
|
|
if (button) {
|
|
await button.click();
|
|
this._logger.info(`Clicked "More" button: ${selector}`);
|
|
await this._page.waitForTimeout(1000);
|
|
return;
|
|
}
|
|
} catch {
|
|
// Continue
|
|
}
|
|
}
|
|
|
|
// Last resort: wait for the primary selector with a short timeout
|
|
try {
|
|
await this._page.waitForSelector(allSelectors[0], { timeout: 10000 });
|
|
await this._page.click(allSelectors[0]);
|
|
this._logger.info('Found "More" button (after wait)');
|
|
await this._page.waitForTimeout(1000);
|
|
return;
|
|
} catch {
|
|
// Continue
|
|
}
|
|
|
|
throw new Error('Could not find More actions menu');
|
|
}
|
|
|
|
/**
|
|
* Enable captions or transcription from the "More" menu.
|
|
*
|
|
* Strategies in priority order:
|
|
* 1. Direct captions button (anonymous / light-meetings UI)
|
|
* 2. "Language and speech" → live captions toggle (authenticated, no panel needed)
|
|
* 3. "Captions & transcripts" submenu (older authenticated Teams)
|
|
* 4. "Record and transcribe" → "Start transcription" (authenticated, fallback with panel)
|
|
* → triggers spoken-language-selection-dialog handled by _handleLanguageDialog()
|
|
* 5. Generic text / DOM scan fallback
|
|
*/
|
|
private async _clickEnableCaptions(): Promise<void> {
|
|
await this._logVisibleMenuItems();
|
|
|
|
// ── Strategy 1: Direct captions button (anonymous / light-meetings UI) ──
|
|
const directSelectors = [
|
|
'div[id="closed-captions-button"]',
|
|
'[data-tid="closed-captions-button"]',
|
|
'[data-tid="captions-toggle"]',
|
|
];
|
|
|
|
for (const selector of directSelectors) {
|
|
try {
|
|
const button = await this._page.$(selector);
|
|
if (button) {
|
|
await button.click();
|
|
this._logger.info(`Clicked direct captions button: ${selector}`);
|
|
await this._page.waitForTimeout(1000);
|
|
return;
|
|
}
|
|
} catch {
|
|
// Continue
|
|
}
|
|
}
|
|
|
|
// ── Strategy 2: "Language and speech" → live captions toggle (no panel) ──
|
|
// Preferred for authenticated joins: enables caption overlay at bottom (same as anonymous)
|
|
const langSpeechSelectors = [
|
|
'[data-tid="LanguageSpeechMenuControl-id"]',
|
|
'div[role="menuitem"]:has-text("Language and speech")',
|
|
'div[role="menuitem"]:has-text("Sprache und Spracheingabe")',
|
|
];
|
|
|
|
for (const selector of langSpeechSelectors) {
|
|
try {
|
|
const item = await this._page.$(selector);
|
|
if (item) {
|
|
await item.click();
|
|
this._logger.info(`Clicked "Language and speech": ${selector}`);
|
|
await this._page.waitForTimeout(2000);
|
|
|
|
const panelToggles = await this._page.evaluate(() => {
|
|
const switches = document.querySelectorAll(
|
|
'input[role="switch"], [role="switch"], input[type="checkbox"]'
|
|
);
|
|
return Array.from(switches).map(s => ({
|
|
tid: s.getAttribute('data-tid') || '',
|
|
label: s.getAttribute('aria-label') || '',
|
|
checked: (s as HTMLInputElement).checked,
|
|
nearText: ((s.closest('div, label') as HTMLElement)?.textContent || '')
|
|
.trim().substring(0, 80),
|
|
}));
|
|
});
|
|
this._logger.info(`Panel toggles: ${JSON.stringify(panelToggles)}`);
|
|
|
|
const toggleResult = await this._page.evaluate(() => {
|
|
const switches = document.querySelectorAll(
|
|
'input[role="switch"], [role="switch"], input[type="checkbox"]'
|
|
);
|
|
for (const sw of Array.from(switches)) {
|
|
const label = (sw.getAttribute('aria-label') || '').toLowerCase();
|
|
const tid = (sw.getAttribute('data-tid') || '').toLowerCase();
|
|
const parentEl = sw.closest('div, label, span') as HTMLElement;
|
|
const nearText = (parentEl?.textContent || '').toLowerCase();
|
|
const isCaptions =
|
|
label.includes('caption') || label.includes('untertitel') ||
|
|
tid.includes('caption') || tid.includes('subtitle') ||
|
|
nearText.includes('live caption') || nearText.includes('liveuntertitel');
|
|
if (isCaptions) {
|
|
if (!(sw as HTMLInputElement).checked) {
|
|
(sw as HTMLElement).click();
|
|
return { found: true, clicked: true, info: label || tid || nearText.substring(0, 60) };
|
|
}
|
|
return { found: true, clicked: false, info: `already on: ${label || tid}` };
|
|
}
|
|
}
|
|
return { found: false, clicked: false, info: '' };
|
|
});
|
|
|
|
this._logger.info(`Captions toggle result: ${JSON.stringify(toggleResult)}`);
|
|
if (toggleResult.found && toggleResult.clicked) {
|
|
await this._page.waitForTimeout(1500);
|
|
}
|
|
await this._page.keyboard.press('Escape');
|
|
if (toggleResult.found) return;
|
|
|
|
this._logger.warn('Language panel opened but no captions toggle found — trying next strategy');
|
|
break;
|
|
}
|
|
} catch {
|
|
// Continue
|
|
}
|
|
}
|
|
|
|
// ── Strategy 3: "Captions & transcripts" submenu (older Teams) ──
|
|
const submenuSelectors = [
|
|
'[data-tid="captions-and-transcripts-button"]',
|
|
'[role="menuitem"]:has-text("Captions & transcripts")',
|
|
'[role="menuitem"]:has-text("Captions and transcripts")',
|
|
'[role="menuitem"]:has-text("Untertitel und Transkripte")',
|
|
'[role="menuitem"]:has-text("Untertitel")',
|
|
];
|
|
|
|
for (const selector of submenuSelectors) {
|
|
try {
|
|
const item = await this._page.$(selector);
|
|
if (item) {
|
|
await item.click();
|
|
this._logger.info(`Clicked captions submenu: ${selector}`);
|
|
await this._page.waitForTimeout(1500);
|
|
|
|
const enableSelectors = [
|
|
'button:has-text("Turn on live captions")',
|
|
'button:has-text("Live captions")',
|
|
'button:has-text("Live-Untertitel aktivieren")',
|
|
'[role="menuitem"]:has-text("Turn on live captions")',
|
|
'[role="menuitem"]:has-text("Live captions")',
|
|
'[role="menuitemcheckbox"]:has-text("captions")',
|
|
'[data-tid="toggle-captions"]',
|
|
];
|
|
|
|
for (const enableSel of enableSelectors) {
|
|
try {
|
|
const enableBtn = await this._page.$(enableSel);
|
|
if (enableBtn) {
|
|
await enableBtn.click();
|
|
this._logger.info(`Clicked enable captions: ${enableSel}`);
|
|
await this._page.waitForTimeout(1000);
|
|
return;
|
|
}
|
|
} catch {
|
|
// Continue
|
|
}
|
|
}
|
|
|
|
this._logger.info('Opened captions submenu but could not find enable button');
|
|
break;
|
|
}
|
|
} catch {
|
|
// Continue
|
|
}
|
|
}
|
|
|
|
// ── Strategy 4 (fallback): "Record and transcribe" → "Start transcription" ──
|
|
// Requires transcript panel to be visible for scraping. Only used if live captions failed.
|
|
this._logger.info('Live captions not available, trying transcription fallback...');
|
|
|
|
// Re-open More menu (previous strategies may have closed it)
|
|
try {
|
|
await this._openMoreMenu();
|
|
} catch {
|
|
this._logger.warn('Could not re-open More menu for transcription fallback');
|
|
}
|
|
|
|
const recordMenuSelectors = [
|
|
'[data-tid="RecordingMenuControl-id"]',
|
|
'div[role="menuitem"]:has-text("Record and transcribe")',
|
|
'div[role="menuitem"]:has-text("Aufzeichnen und transkribieren")',
|
|
'div[role="menuitem"]:has-text("Aufnehmen und transkribieren")',
|
|
];
|
|
|
|
for (const selector of recordMenuSelectors) {
|
|
try {
|
|
const item = await this._page.$(selector);
|
|
if (item) {
|
|
await item.click();
|
|
this._logger.info(`Clicked "Record and transcribe": ${selector}`);
|
|
await this._page.waitForTimeout(1500);
|
|
|
|
await this._logVisibleMenuItems();
|
|
|
|
// Check if transcription is ALREADY running ("Stop transcription" visible)
|
|
const stopSelectors = [
|
|
'[data-tid="call-transcript-button"]:has-text("Stop")',
|
|
'[role="menuitem"]:has-text("Stop transcription")',
|
|
'[role="menuitem"]:has-text("Transkription beenden")',
|
|
'[role="menuitem"]:has-text("Transkription stoppen")',
|
|
];
|
|
|
|
let alreadyRunning = false;
|
|
for (const stopSel of stopSelectors) {
|
|
try {
|
|
const stopBtn = await this._page.$(stopSel);
|
|
if (stopBtn) {
|
|
this._logger.info('Transcription already running (found "Stop transcription") — not clicking');
|
|
alreadyRunning = true;
|
|
break;
|
|
}
|
|
} catch {
|
|
// Continue
|
|
}
|
|
}
|
|
|
|
if (!alreadyRunning) {
|
|
// Click "Start transcription" (only explicit "Start" selectors)
|
|
const startSelectors = [
|
|
'[data-tid="call-transcript-button"]:has-text("Start")',
|
|
'[role="menuitem"]:has-text("Start transcription")',
|
|
'[role="menuitem"]:has-text("Transkription starten")',
|
|
'button:has-text("Start transcription")',
|
|
'button:has-text("Transkription starten")',
|
|
];
|
|
|
|
let started = false;
|
|
for (const startSel of startSelectors) {
|
|
try {
|
|
const startBtn = await this._page.$(startSel);
|
|
if (startBtn) {
|
|
await startBtn.click();
|
|
this._logger.info(`Clicked "Start transcription": ${startSel}`);
|
|
await this._page.waitForTimeout(2000);
|
|
started = true;
|
|
break;
|
|
}
|
|
} catch {
|
|
// Continue
|
|
}
|
|
}
|
|
|
|
if (!started) {
|
|
this._logger.warn('"Record and transcribe" opened but "Start transcription" not found');
|
|
}
|
|
}
|
|
|
|
// Click "Show transcript" to open the transcript panel for scraping
|
|
const showTranscriptSelectors = [
|
|
'[data-tid="transcript-panel-button"]',
|
|
'[role="menuitem"]:has-text("Show transcript")',
|
|
'[role="menuitem"]:has-text("Transkript anzeigen")',
|
|
'[role="menuitem"]:has-text("Transkript")',
|
|
];
|
|
|
|
for (const showSel of showTranscriptSelectors) {
|
|
try {
|
|
const showBtn = await this._page.$(showSel);
|
|
if (showBtn) {
|
|
await showBtn.click();
|
|
this._logger.info(`Clicked "Show transcript": ${showSel}`);
|
|
await this._page.waitForTimeout(2000);
|
|
break;
|
|
}
|
|
} catch {
|
|
// Continue
|
|
}
|
|
}
|
|
|
|
return;
|
|
}
|
|
} catch {
|
|
// Continue
|
|
}
|
|
}
|
|
|
|
// ── Strategy 5: DOM scan for anything containing "caption" / "transcri" ──
|
|
const found = await this._page.evaluate(() => {
|
|
const keywords = ['caption', 'captions', 'untertitel', 'live caption', 'transcri', 'transkri'];
|
|
const candidates = document.querySelectorAll(
|
|
'[role="menuitem"], [role="menuitemcheckbox"], [role="menuitemradio"], button, li, div[role="option"]'
|
|
);
|
|
const results: string[] = [];
|
|
for (let i = 0; i < candidates.length; i++) {
|
|
const el = candidates[i] as HTMLElement;
|
|
const text = el.innerText?.toLowerCase()?.trim() || '';
|
|
if (text && keywords.some(kw => text.includes(kw))) {
|
|
results.push(text.substring(0, 60));
|
|
el.click();
|
|
return { clicked: text.substring(0, 60), allMatches: results };
|
|
}
|
|
}
|
|
return { clicked: null, allMatches: results };
|
|
});
|
|
|
|
if (found.clicked) {
|
|
this._logger.info(`Clicked via DOM scan: "${found.clicked}"`);
|
|
await this._page.waitForTimeout(1500);
|
|
|
|
const turnOnBtn = await this._page.$('button:has-text("Turn on"), [role="menuitem"]:has-text("Turn on")');
|
|
if (turnOnBtn) {
|
|
await turnOnBtn.click();
|
|
this._logger.info('Clicked "Turn on" in submenu');
|
|
await this._page.waitForTimeout(1000);
|
|
}
|
|
return;
|
|
}
|
|
|
|
await this._page.keyboard.press('Escape');
|
|
this._logger.warn(`Could not find captions/transcription option. DOM scan: ${JSON.stringify(found.allMatches)}`);
|
|
}
|
|
|
|
/**
|
|
* Handle the "What language is everyone speaking?" dialog.
|
|
* This dialog appears after clicking "Start transcription" in authenticated Teams.
|
|
*
|
|
* DOM structure (from user-provided HTML):
|
|
* [data-tid="spoken-language-selection-dialog"] — alertdialog
|
|
* button[data-tid="callingCaptions-spokenLanguages"] — combobox (current lang)
|
|
* button[data-tid="calling_captions_change_language_dialog_confirm_button"]
|
|
* button[data-tid="calling_captions_change_language_dialog_cancel_button"]
|
|
*/
|
|
private async _handleLanguageDialog(): Promise<boolean> {
|
|
try {
|
|
const dialogSelector = '[data-tid="spoken-language-selection-dialog"]';
|
|
await this._page.waitForSelector(dialogSelector, { timeout: 8000 });
|
|
this._logger.info('Spoken language selection dialog appeared');
|
|
|
|
// Read current language from dropdown button
|
|
const dropdownSelector = 'button[data-tid="callingCaptions-spokenLanguages"]';
|
|
const dropdown = await this._page.$(dropdownSelector);
|
|
|
|
if (dropdown) {
|
|
const currentLang = await dropdown.evaluate(
|
|
(el) => el.textContent?.trim() || '',
|
|
);
|
|
this._logger.info(`Dialog current language: "${currentLang}"`);
|
|
|
|
// Check if the language is already correct
|
|
const targetNames = this._getLanguageDisplayNames();
|
|
const isCorrect = targetNames.some((name) =>
|
|
currentLang.toLowerCase().includes(name.toLowerCase()),
|
|
);
|
|
|
|
if (!isCorrect) {
|
|
this._logger.info(`Need to change language to: ${targetNames.join(', ')}`);
|
|
|
|
// Open dropdown
|
|
await dropdown.click();
|
|
await this._page.waitForTimeout(800);
|
|
|
|
// Select the correct option
|
|
let selected = false;
|
|
for (const name of targetNames) {
|
|
if (selected) break;
|
|
const optionSelectors = [
|
|
`[role="option"]:has-text("${name}")`,
|
|
`li:has-text("${name}")`,
|
|
`div[role="option"]:has-text("${name}")`,
|
|
];
|
|
for (const optSel of optionSelectors) {
|
|
try {
|
|
const option = await this._page.$(optSel);
|
|
if (option) {
|
|
await option.click();
|
|
this._logger.info(`Selected language: ${name} (via ${optSel})`);
|
|
selected = true;
|
|
break;
|
|
}
|
|
} catch {
|
|
// Continue
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!selected) {
|
|
this._logger.warn('Could not select language in dialog dropdown');
|
|
}
|
|
await this._page.waitForTimeout(500);
|
|
} else {
|
|
this._logger.info('Language already correct in dialog');
|
|
}
|
|
}
|
|
|
|
// Click "Confirm"
|
|
const confirmSelectors = [
|
|
'button[data-tid="calling_captions_change_language_dialog_confirm_button"]',
|
|
'button:has-text("Confirm")',
|
|
'button:has-text("Bestätigen")',
|
|
];
|
|
|
|
for (const sel of confirmSelectors) {
|
|
try {
|
|
const btn = await this._page.$(sel);
|
|
if (btn) {
|
|
await btn.click();
|
|
this._logger.info(`Clicked "Confirm" in language dialog: ${sel}`);
|
|
await this._page.waitForTimeout(1500);
|
|
return true;
|
|
}
|
|
} catch {
|
|
// Continue
|
|
}
|
|
}
|
|
|
|
this._logger.warn('Language dialog found but could not click Confirm');
|
|
return false;
|
|
} catch {
|
|
// No dialog appeared — using captions mode, not transcription
|
|
this._logger.info('No language selection dialog appeared (may be using captions mode)');
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get display names for the configured language (used in dropdown selection).
|
|
*/
|
|
private _getLanguageDisplayNames(): string[] {
|
|
const languageDisplayNames: Record<string, string[]> = {
|
|
'de-DE': ['German (Germany)', 'Deutsch (Deutschland)', 'German'],
|
|
'de-CH': ['German (Switzerland)', 'Deutsch (Schweiz)', 'German'],
|
|
'en-US': ['English (United States)', 'English (US)', 'English'],
|
|
'en-GB': ['English (United Kingdom)', 'English (UK)'],
|
|
'fr-FR': ['French (France)', 'Français (France)', 'French'],
|
|
'fr-CH': ['French (Switzerland)', 'Français (Suisse)'],
|
|
'it-IT': ['Italian (Italy)', 'Italiano (Italia)', 'Italian'],
|
|
};
|
|
return languageDisplayNames[this._language] || [this._language];
|
|
}
|
|
|
|
/**
|
|
* Log visible menu items for debugging when captions button is not found.
|
|
*/
|
|
private async _logVisibleMenuItems(): Promise<void> {
|
|
try {
|
|
const menuItems = await this._page.evaluate(() => {
|
|
const items = document.querySelectorAll(
|
|
'[role="menuitem"], [role="menuitemcheckbox"], [role="menuitemradio"]'
|
|
);
|
|
return Array.from(items).map(el => {
|
|
const text = (el as HTMLElement).innerText?.trim()?.substring(0, 50) || '';
|
|
const tid = el.getAttribute('data-tid') || '';
|
|
const id = el.id || '';
|
|
return `[${tid || id || 'no-id'}] ${text}`;
|
|
}).filter(t => t.length > 5);
|
|
});
|
|
this._logger.info(`Visible menu items (${menuItems.length}): ${JSON.stringify(menuItems)}`);
|
|
} catch {
|
|
// Not critical
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Wait for the captions container to become visible after enabling.
|
|
*/
|
|
private async _waitForCaptionsContainer(): Promise<void> {
|
|
const containerSelectors = [
|
|
'div[data-tid="closed-caption-renderer-wrapper"]',
|
|
'div[data-tid="live-captions-renderer"]',
|
|
'[data-tid="caption-area"]',
|
|
'[data-tid="transcript-pane"]',
|
|
'[data-tid="transcript-view"]',
|
|
'[data-tid="transcript-content"]',
|
|
];
|
|
|
|
for (const selector of containerSelectors) {
|
|
try {
|
|
await this._page.waitForSelector(selector, { timeout: 8000 });
|
|
this._logger.info(`Found captions/transcript container: ${selector}`);
|
|
return;
|
|
} catch {
|
|
// Try next
|
|
}
|
|
}
|
|
|
|
// Log ALL transcript/caption related data-tid elements for debugging
|
|
const tids = await this._page.evaluate(() => {
|
|
const els = document.querySelectorAll('[data-tid]');
|
|
return Array.from(els)
|
|
.map(e => ({
|
|
tid: e.getAttribute('data-tid') || '',
|
|
tag: e.tagName,
|
|
h: (e as HTMLElement).offsetHeight,
|
|
w: (e as HTMLElement).offsetWidth,
|
|
}))
|
|
.filter(t =>
|
|
t.tid.includes('caption') || t.tid.includes('transcript') || t.tid.includes('subtitle'),
|
|
)
|
|
.slice(0, 15);
|
|
});
|
|
this._logger.info(`Transcript/caption data-tid elements: ${JSON.stringify(tids)}`);
|
|
|
|
this._logger.warn('Could not find captions/transcript container with known selectors');
|
|
}
|
|
|
|
/**
|
|
* Set the spoken language for captions.
|
|
*
|
|
* Teams defaults to English for anonymous users. This method attempts to
|
|
* change the "Meeting spoken language" to the configured language (e.g. "de-DE").
|
|
*
|
|
* Flow (per Microsoft docs):
|
|
* 1. Click "Caption settings" (gear/settings icon near captions area)
|
|
* 2. Click "Language settings"
|
|
* 3. Change "Meeting spoken language" dropdown
|
|
* 4. Click "Update"
|
|
*
|
|
* Note: Changing spoken language affects ALL meeting participants.
|
|
*/
|
|
private async _setSpokenLanguage(): Promise<void> {
|
|
// Map BCP-47 codes to Teams display names for the spoken language dropdown
|
|
const languageDisplayNames: Record<string, string[]> = {
|
|
'de-DE': ['German (Germany)', 'Deutsch (Deutschland)', 'German'],
|
|
'de-CH': ['German (Switzerland)', 'Deutsch (Schweiz)', 'German'],
|
|
'en-US': ['English (United States)', 'English (US)', 'English'],
|
|
'en-GB': ['English (United Kingdom)', 'English (UK)'],
|
|
'fr-FR': ['French (France)', 'Français (France)', 'French'],
|
|
'fr-CH': ['French (Switzerland)', 'Français (Suisse)'],
|
|
'it-IT': ['Italian (Italy)', 'Italiano (Italia)', 'Italian'],
|
|
};
|
|
|
|
const targetNames = languageDisplayNames[this._language] || [this._language];
|
|
this._logger.info(`Setting spoken language to: ${this._language} (looking for: ${targetNames.join(', ')})`);
|
|
|
|
try {
|
|
// Wait a moment for the captions UI to stabilize
|
|
await this._page.waitForTimeout(2000);
|
|
|
|
let settingsOpened = false;
|
|
|
|
// Strategy 1: Try "Caption settings" gear button near the captions area
|
|
const captionSettingsSelectors = [
|
|
'button[aria-label*="Caption settings" i]',
|
|
'button[aria-label*="Captions settings" i]',
|
|
'button[aria-label*="Untertiteleinstellungen" i]',
|
|
'button[data-tid="caption-settings-button"]',
|
|
'button[id="caption-settings-button"]',
|
|
// Teams 2025+: settings icon inside the captions banner
|
|
'button[aria-label*="Settings" i][data-tid*="caption" i]',
|
|
];
|
|
|
|
for (const selector of captionSettingsSelectors) {
|
|
try {
|
|
const button = await this._page.$(selector);
|
|
if (button) {
|
|
await button.click();
|
|
this._logger.info(`Clicked caption settings: ${selector}`);
|
|
settingsOpened = true;
|
|
await this._page.waitForTimeout(1000);
|
|
break;
|
|
}
|
|
} catch {
|
|
// Continue
|
|
}
|
|
}
|
|
|
|
// Strategy 2: More menu > "Language and speech" / "Captions & transcripts"
|
|
if (!settingsOpened) {
|
|
this._logger.info('Caption settings button not found, trying More menu > Language and speech...');
|
|
|
|
await this._openMoreMenu();
|
|
await this._page.waitForTimeout(1000);
|
|
|
|
// All selectors must have an element prefix for Playwright
|
|
const languageMenuSelectors = [
|
|
'[data-tid="captions-and-transcripts-button"]',
|
|
'[data-tid="language-and-speech-button"]',
|
|
'div[role="menuitem"]:has-text("Captions & transcripts")',
|
|
'div[role="menuitem"]:has-text("Captions and transcripts")',
|
|
'div[role="menuitem"]:has-text("Untertitel und Transkripte")',
|
|
'div[role="menuitem"]:has-text("Language and speech")',
|
|
'div[role="menuitem"]:has-text("Sprache und Spracheingabe")',
|
|
'button:has-text("Captions & transcripts")',
|
|
'button:has-text("Captions and transcripts")',
|
|
'button:has-text("Language and speech")',
|
|
'button:has-text("Sprache und Spracheingabe")',
|
|
'li:has-text("Captions")',
|
|
'li:has-text("Language")',
|
|
'li:has-text("Untertitel")',
|
|
'li:has-text("Sprache")',
|
|
];
|
|
|
|
for (const selector of languageMenuSelectors) {
|
|
try {
|
|
const item = await this._page.$(selector);
|
|
if (item) {
|
|
await item.click();
|
|
this._logger.info(`Clicked language menu: ${selector}`);
|
|
settingsOpened = true;
|
|
await this._page.waitForTimeout(1000);
|
|
break;
|
|
}
|
|
} catch {
|
|
// Continue
|
|
}
|
|
}
|
|
}
|
|
|
|
// Strategy 3: Search all visible menu items by evaluating text content
|
|
if (!settingsOpened) {
|
|
this._logger.info('Standard selectors failed, scanning menu items by text...');
|
|
|
|
const found = await this._page.evaluate(() => {
|
|
const keywords = [
|
|
'caption', 'captions', 'untertitel',
|
|
'language', 'sprache', 'spoken',
|
|
];
|
|
// Search all menu items, buttons, and clickable elements
|
|
const candidates = document.querySelectorAll(
|
|
'[role="menuitem"], [role="menuitemcheckbox"], [role="menuitemradio"], button, li'
|
|
);
|
|
const elArray = Array.from(candidates);
|
|
for (let i = 0; i < elArray.length; i++) {
|
|
const el = elArray[i] as HTMLElement;
|
|
const text = el.innerText?.toLowerCase() || '';
|
|
if (keywords.some(kw => text.includes(kw))) {
|
|
el.click();
|
|
return text;
|
|
}
|
|
}
|
|
return null;
|
|
});
|
|
|
|
if (found) {
|
|
this._logger.info(`Clicked menu item by text scan: "${found}"`);
|
|
settingsOpened = true;
|
|
await this._page.waitForTimeout(1000);
|
|
}
|
|
}
|
|
|
|
if (!settingsOpened) {
|
|
this._logger.warn('Could not open language settings - captions will use default language (English)');
|
|
return;
|
|
}
|
|
|
|
// Look for sub-options like "Change spoken language" / "Language settings"
|
|
const langSettingsSelectors = [
|
|
'button:has-text("Change spoken language")',
|
|
'button:has-text("Gesprochene Sprache ändern")',
|
|
'button:has-text("Language settings")',
|
|
'button:has-text("Spracheinstellungen")',
|
|
'button:has-text("Spoken language")',
|
|
'button:has-text("Gesprochene Sprache")',
|
|
'div[role="menuitem"]:has-text("Change spoken language")',
|
|
'div[role="menuitem"]:has-text("Spoken language")',
|
|
'div[role="menuitem"]:has-text("Gesprochene Sprache")',
|
|
'a:has-text("Change spoken language")',
|
|
'a:has-text("Spoken language")',
|
|
];
|
|
|
|
for (const selector of langSettingsSelectors) {
|
|
try {
|
|
const item = await this._page.$(selector);
|
|
if (item) {
|
|
await item.click();
|
|
this._logger.info(`Clicked language settings: ${selector}`);
|
|
await this._page.waitForTimeout(1000);
|
|
break;
|
|
}
|
|
} catch {
|
|
// Continue - might already be on the language settings page
|
|
}
|
|
}
|
|
|
|
// Look for the spoken language dropdown/combobox
|
|
let languageSet = false;
|
|
|
|
// First, log what's visible in the settings panel for debugging
|
|
const panelInfo = await this._page.evaluate(() => {
|
|
const selects = document.querySelectorAll('select');
|
|
const comboboxes = document.querySelectorAll('[role="combobox"]');
|
|
const listboxes = document.querySelectorAll('[role="listbox"]');
|
|
const dropdowns = document.querySelectorAll('[class*="dropdown" i], [class*="Dropdown" i]');
|
|
const allButtons = document.querySelectorAll('button');
|
|
const buttonsWithText = Array.from(allButtons)
|
|
.map(b => `${b.tagName}[${b.getAttribute('aria-label') || b.textContent?.trim().substring(0, 40)}]`)
|
|
.filter(t => t.length > 10)
|
|
.slice(0, 10);
|
|
return {
|
|
selects: selects.length,
|
|
comboboxes: comboboxes.length,
|
|
listboxes: listboxes.length,
|
|
dropdowns: dropdowns.length,
|
|
buttons: buttonsWithText,
|
|
bodySnippet: document.body?.innerText?.substring(0, 800) || '',
|
|
};
|
|
});
|
|
this._logger.info(`Caption settings panel - selects: ${panelInfo.selects}, comboboxes: ${panelInfo.comboboxes}, listboxes: ${panelInfo.listboxes}, dropdowns: ${panelInfo.dropdowns}`);
|
|
this._logger.info(`Panel buttons: ${JSON.stringify(panelInfo.buttons)}`);
|
|
this._logger.debug(`Panel text: ${panelInfo.bodySnippet.substring(0, 300)}`);
|
|
|
|
// Strategy A: Standard selectors
|
|
const dropdownSelectors = [
|
|
'select[aria-label*="spoken language" i]',
|
|
'select[aria-label*="Meeting spoken language" i]',
|
|
'select[aria-label*="Gesprochene Sprache" i]',
|
|
'[data-tid="spoken-language-dropdown"]',
|
|
'div[role="combobox"]',
|
|
'div[role="listbox"]',
|
|
'select',
|
|
];
|
|
|
|
for (const selector of dropdownSelectors) {
|
|
if (languageSet) break;
|
|
try {
|
|
const dropdown = await this._page.$(selector);
|
|
if (dropdown) {
|
|
const tagName = await dropdown.evaluate(el => el.tagName.toLowerCase());
|
|
|
|
if (tagName === 'select') {
|
|
for (const name of targetNames) {
|
|
try {
|
|
await this._page.selectOption(selector, { label: name });
|
|
this._logger.info(`Selected spoken language: ${name}`);
|
|
languageSet = true;
|
|
break;
|
|
} catch {
|
|
// Try next name variant
|
|
}
|
|
}
|
|
} else {
|
|
// Fluent UI dropdown/combobox
|
|
await dropdown.click();
|
|
await this._page.waitForTimeout(500);
|
|
|
|
for (const name of targetNames) {
|
|
try {
|
|
const optionSelectors = [
|
|
`[role="option"]:has-text("${name}")`,
|
|
`li:has-text("${name}")`,
|
|
`div[role="option"]:has-text("${name}")`,
|
|
`span:has-text("${name}")`,
|
|
];
|
|
for (const optSel of optionSelectors) {
|
|
const option = await this._page.$(optSel);
|
|
if (option) {
|
|
await option.click();
|
|
this._logger.info(`Selected spoken language: ${name} (via ${optSel})`);
|
|
languageSet = true;
|
|
break;
|
|
}
|
|
}
|
|
if (languageSet) break;
|
|
} catch {
|
|
// Try next name variant
|
|
}
|
|
}
|
|
}
|
|
if (languageSet) break;
|
|
}
|
|
} catch {
|
|
// Continue
|
|
}
|
|
}
|
|
|
|
// Strategy B: DOM evaluation fallback - find any dropdown-like element and interact
|
|
if (!languageSet) {
|
|
this._logger.info('Standard dropdown selectors failed, trying DOM evaluation fallback...');
|
|
|
|
languageSet = await this._page.evaluate((names: string[]) => {
|
|
// Find all elements that could be dropdowns (Fluent UI uses various patterns)
|
|
const candidates = document.querySelectorAll(
|
|
'[role="combobox"], [role="listbox"], select, ' +
|
|
'[class*="dropdown" i], [class*="Dropdown"], ' +
|
|
'button[aria-haspopup="listbox"], button[aria-haspopup="true"], ' +
|
|
'[aria-expanded]'
|
|
);
|
|
|
|
for (let i = 0; i < candidates.length; i++) {
|
|
const el = candidates[i] as HTMLElement;
|
|
const label = el.getAttribute('aria-label') || '';
|
|
const nearbyText = el.parentElement?.innerText || '';
|
|
|
|
// Check if this dropdown is related to language
|
|
const isLanguageRelated =
|
|
label.toLowerCase().includes('language') ||
|
|
label.toLowerCase().includes('sprache') ||
|
|
nearbyText.toLowerCase().includes('spoken language') ||
|
|
nearbyText.toLowerCase().includes('gesprochene sprache');
|
|
|
|
if (isLanguageRelated || candidates.length === 1) {
|
|
// Click to open the dropdown
|
|
el.click();
|
|
|
|
// Wait a frame for options to render
|
|
return new Promise<boolean>((resolve) => {
|
|
requestAnimationFrame(() => {
|
|
requestAnimationFrame(() => {
|
|
// Look for options
|
|
const options = document.querySelectorAll(
|
|
'[role="option"], [role="menuitem"], li[class*="option" i]'
|
|
);
|
|
|
|
for (let j = 0; j < options.length; j++) {
|
|
const opt = options[j] as HTMLElement;
|
|
const optText = opt.innerText?.trim() || '';
|
|
|
|
if (names.some(n => optText.includes(n))) {
|
|
opt.click();
|
|
resolve(true);
|
|
return;
|
|
}
|
|
}
|
|
resolve(false);
|
|
});
|
|
});
|
|
});
|
|
}
|
|
}
|
|
return Promise.resolve(false);
|
|
}, targetNames);
|
|
|
|
if (languageSet) {
|
|
this._logger.info('Selected spoken language via DOM evaluation fallback');
|
|
await this._page.waitForTimeout(500);
|
|
}
|
|
}
|
|
|
|
if (!languageSet) {
|
|
this._logger.warn('Could not find/select spoken language in dropdown');
|
|
}
|
|
|
|
// Click "Update" / "Apply" / "Confirm" button
|
|
const updateSelectors = [
|
|
'button:has-text("Update")',
|
|
'button:has-text("Apply")',
|
|
'button:has-text("Confirm")',
|
|
'button:has-text("Aktualisieren")',
|
|
'button:has-text("Übernehmen")',
|
|
'button:has-text("Bestätigen")',
|
|
'button[data-tid="language-update-button"]',
|
|
];
|
|
|
|
for (const selector of updateSelectors) {
|
|
try {
|
|
const button = await this._page.$(selector);
|
|
if (button) {
|
|
await button.click();
|
|
this._logger.info(`Clicked update button: ${selector}`);
|
|
await this._page.waitForTimeout(1000);
|
|
break;
|
|
}
|
|
} catch {
|
|
// Continue
|
|
}
|
|
}
|
|
|
|
// Close any open dialogs/menus
|
|
await this._page.keyboard.press('Escape');
|
|
this._logger.info(`Spoken language setting attempt completed (set: ${languageSet})`);
|
|
|
|
} catch (error) {
|
|
this._logger.warn(`Could not set spoken language to ${this._language}: ${error}`);
|
|
// Not fatal - captions will still work, just in the wrong language
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Start watching the captions DOM for updates using Recall.ai's approach.
|
|
*
|
|
* Uses page.exposeFunction() + MutationObserver for real-time caption detection.
|
|
* Captions in Teams are rendered inside .fui-ChatMessageCompact elements with:
|
|
* - span[data-tid="author"] for the speaker name
|
|
* - span[data-tid="closed-caption-text"] for the caption text
|
|
*
|
|
* Teams updates captions in real-time as the user speaks, adding punctuation
|
|
* only when the caption is finalized. We use this to detect final captions.
|
|
*/
|
|
async subscribeToCaptions(): Promise<void> {
|
|
if (this._isSubscribed) {
|
|
this._logger.warn('Already subscribed to captions');
|
|
return;
|
|
}
|
|
|
|
this._isSubscribed = true;
|
|
this._logger.info('Subscribing to captions...');
|
|
|
|
// Expose callback functions from Node.js to the browser context
|
|
await this._page.exposeFunction('__onCaptionEvent', (caption: {
|
|
speaker: string;
|
|
text: string;
|
|
timestamp: string;
|
|
}) => {
|
|
this._handleCaptionEvent(caption);
|
|
});
|
|
|
|
// Debug callback: logs transcript DOM structure to help identify selectors
|
|
try {
|
|
await this._page.exposeFunction('__onCaptionDebug', (info: {
|
|
tag: string;
|
|
tid: string;
|
|
classes: string;
|
|
text: string;
|
|
children: number;
|
|
html: string;
|
|
}) => {
|
|
this._logger.info(
|
|
`TranscriptDOM: <${info.tag} data-tid="${info.tid}"> ` +
|
|
`children=${info.children}, text="${info.text}"`,
|
|
);
|
|
this._logger.debug(`TranscriptDOM html: ${info.html}`);
|
|
});
|
|
} catch {
|
|
// May already be exposed
|
|
}
|
|
|
|
// Wait for a known container
|
|
const waitSelectors = [
|
|
'div[data-tid="closed-caption-renderer-wrapper"]',
|
|
'div[data-tid="live-captions-renderer"]',
|
|
'[data-tid="caption-area"]',
|
|
'[data-tid="transcript-pane"]',
|
|
'[data-tid="transcript-view"]',
|
|
'[data-tid="transcript-content"]',
|
|
];
|
|
let containerFound = false;
|
|
for (const sel of waitSelectors) {
|
|
try {
|
|
await this._page.waitForSelector(sel, { timeout: 8000 });
|
|
containerFound = true;
|
|
this._logger.info(`Captions/transcript container found: ${sel}`);
|
|
break;
|
|
} catch {
|
|
// Try next
|
|
}
|
|
}
|
|
|
|
if (!containerFound) {
|
|
// Log all transcript/caption related elements for debugging
|
|
const transcriptTids = await this._page.evaluate(() => {
|
|
const els = document.querySelectorAll('[data-tid]');
|
|
return Array.from(els)
|
|
.map(e => ({
|
|
tid: e.getAttribute('data-tid') || '',
|
|
tag: e.tagName,
|
|
h: (e as HTMLElement).offsetHeight,
|
|
w: (e as HTMLElement).offsetWidth,
|
|
children: e.children?.length || 0,
|
|
}))
|
|
.filter(t =>
|
|
t.tid.includes('caption') || t.tid.includes('transcript') || t.tid.includes('subtitle'),
|
|
)
|
|
.slice(0, 20);
|
|
});
|
|
this._logger.info(
|
|
`No exact container match. Transcript/caption elements: ${JSON.stringify(transcriptTids)}`,
|
|
);
|
|
this._logger.warn('Captions/transcript container not found, subscribing with body fallback');
|
|
}
|
|
|
|
this._logger.info('Setting up MutationObserver for captions/transcription...');
|
|
|
|
const observerTarget = await this._page.evaluate(() => {
|
|
// ── Helper: extract caption data (anonymous/light-meetings captions) ──
|
|
function _extractCaption(element: HTMLElement): boolean {
|
|
const captionMessage = element.querySelector('.fui-ChatMessageCompact')
|
|
|| (element.classList?.contains('fui-ChatMessageCompact') ? element : null);
|
|
|
|
if (captionMessage) {
|
|
const authorElement = captionMessage.querySelector('span[data-tid="author"]');
|
|
const contentElement = captionMessage.querySelector('span[data-tid="closed-caption-text"]');
|
|
|
|
if (authorElement && contentElement) {
|
|
const textObserver = new MutationObserver(() => {
|
|
const speaker = authorElement.textContent?.trim() ?? 'Unknown';
|
|
const text = (contentElement as any).innerText?.trim() ?? '';
|
|
(window as any).__onCaptionEvent({
|
|
speaker,
|
|
text,
|
|
timestamp: new Date().toISOString(),
|
|
});
|
|
});
|
|
textObserver.observe(contentElement, {
|
|
childList: true,
|
|
subtree: true,
|
|
characterData: true,
|
|
});
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// ── Helper: extract transcript entry (authenticated Teams transcript panel) ──
|
|
function _extractTranscript(element: HTMLElement): boolean {
|
|
const text = element.innerText?.trim();
|
|
if (!text || text.length < 2) return false;
|
|
|
|
// Strategy A: data-tid based speaker/text elements
|
|
const speakerEl = element.querySelector(
|
|
'[data-tid*="speaker"], [data-tid*="author"], [data-tid*="name"], ' +
|
|
'[data-tid*="participant"]'
|
|
);
|
|
const textEl = element.querySelector(
|
|
'[data-tid*="text"], [data-tid*="content"], [data-tid*="body"], ' +
|
|
'[data-tid*="message"]'
|
|
);
|
|
if (speakerEl && textEl) {
|
|
const speaker = speakerEl.textContent?.trim() || 'Unknown';
|
|
const content = (textEl as HTMLElement).innerText?.trim() || '';
|
|
if (content) {
|
|
(window as any).__onCaptionEvent({
|
|
speaker,
|
|
text: content,
|
|
timestamp: new Date().toISOString(),
|
|
});
|
|
return true;
|
|
}
|
|
}
|
|
|
|
// Strategy B: structural — first short text child = speaker, rest = text
|
|
const directChildren = Array.from(element.children) as HTMLElement[];
|
|
if (directChildren.length >= 2) {
|
|
const first = directChildren[0].innerText?.trim() || '';
|
|
const rest = directChildren
|
|
.slice(1)
|
|
.map(c => c.innerText?.trim())
|
|
.filter(Boolean)
|
|
.join(' ')
|
|
.trim();
|
|
if (first && first.length < 60 && rest && rest.length > 2) {
|
|
(window as any).__onCaptionEvent({
|
|
speaker: first,
|
|
text: rest,
|
|
timestamp: new Date().toISOString(),
|
|
});
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
// ── Combined handler for mutation observer ──
|
|
function _handleAddedNode(node: Node): void {
|
|
if (node.nodeType !== Node.ELEMENT_NODE) return;
|
|
const el = node as HTMLElement;
|
|
|
|
// Skip tiny/empty elements
|
|
const text = el.innerText?.trim();
|
|
if (!text || text.length < 2) return;
|
|
|
|
// Try caption extraction first (anonymous UI)
|
|
if (_extractCaption(el)) return;
|
|
|
|
// Try transcript extraction (authenticated UI)
|
|
if (_extractTranscript(el)) return;
|
|
|
|
// Not recognized — log for debugging (only elements with meaningful text)
|
|
if (text.length > 3) {
|
|
(window as any).__onCaptionDebug?.({
|
|
tag: el.tagName,
|
|
tid: el.getAttribute('data-tid') || '',
|
|
classes: (el.className || '').substring(0, 100),
|
|
text: text.substring(0, 200),
|
|
children: el.children?.length || 0,
|
|
html: el.innerHTML?.substring(0, 500) || '',
|
|
});
|
|
}
|
|
}
|
|
|
|
// ── Find container ──
|
|
const containerSelectors = [
|
|
'div[data-tid="closed-caption-renderer-wrapper"]',
|
|
'div[data-tid="live-captions-renderer"]',
|
|
'[data-tid="caption-area"]',
|
|
'[data-tid="transcript-pane"]',
|
|
'[data-tid="transcript-view"]',
|
|
];
|
|
|
|
let targetNode: Element | null = null;
|
|
let targetSelector = '';
|
|
for (const sel of containerSelectors) {
|
|
targetNode = document.querySelector(sel);
|
|
if (targetNode) {
|
|
targetSelector = sel;
|
|
break;
|
|
}
|
|
}
|
|
|
|
// Also try wildcard match (transcript container — exclude buttons/controls)
|
|
if (!targetNode) {
|
|
const candidates = document.querySelectorAll('[data-tid*="transcript"]');
|
|
for (const c of Array.from(candidates)) {
|
|
const tid = c.getAttribute('data-tid') || '';
|
|
const tag = c.tagName;
|
|
const height = (c as HTMLElement).offsetHeight || 0;
|
|
// Skip buttons, small elements, and control-related elements
|
|
if (
|
|
tag === 'BUTTON' || tag === 'SPAN' || tag === 'SVG' ||
|
|
tid.includes('button') || tid.includes('cancel') || tid.includes('stop') ||
|
|
height < 100
|
|
) continue;
|
|
targetNode = c;
|
|
targetSelector = `[data-tid="${tid}"]`;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (targetNode) {
|
|
const tid = targetNode.getAttribute('data-tid') || '';
|
|
const observer = new MutationObserver((mutationsList) => {
|
|
for (const mutation of mutationsList) {
|
|
if (mutation.type === 'childList') {
|
|
mutation.addedNodes.forEach(_handleAddedNode);
|
|
}
|
|
}
|
|
});
|
|
observer.observe(targetNode, { childList: true, subtree: true });
|
|
(window as any).__captionsObserver = observer;
|
|
return `container:${tid}`;
|
|
}
|
|
|
|
// ── Fallback: observe document.body ──
|
|
const allSelectors = [...containerSelectors];
|
|
function _isTranscriptContainer(el: Element): boolean {
|
|
const tid = el.getAttribute('data-tid') || '';
|
|
if (!tid.includes('transcript')) return false;
|
|
if (el.tagName === 'BUTTON' || el.tagName === 'SPAN' || el.tagName === 'SVG') return false;
|
|
if (tid.includes('button') || tid.includes('cancel') || tid.includes('stop')) return false;
|
|
if ((el as HTMLElement).offsetHeight < 100) return false;
|
|
return true;
|
|
}
|
|
const bodyObserver = new MutationObserver((mutationsList) => {
|
|
for (const mutation of mutationsList) {
|
|
if (mutation.type !== 'childList') continue;
|
|
mutation.addedNodes.forEach((node) => {
|
|
if (node.nodeType !== Node.ELEMENT_NODE) return;
|
|
const el = node as HTMLElement;
|
|
|
|
// Check if a known container just appeared
|
|
for (const sel of allSelectors) {
|
|
const container = el.matches?.(sel) ? el : el.querySelector?.(sel);
|
|
if (container) {
|
|
bodyObserver.disconnect();
|
|
const tid = container.getAttribute('data-tid') || '';
|
|
const targeted = new MutationObserver((muts) => {
|
|
for (const m of muts) {
|
|
if (m.type === 'childList') {
|
|
m.addedNodes.forEach(_handleAddedNode);
|
|
}
|
|
}
|
|
});
|
|
targeted.observe(container, { childList: true, subtree: true });
|
|
(window as any).__captionsObserver = targeted;
|
|
return;
|
|
}
|
|
}
|
|
|
|
// Check if a transcript container appeared dynamically
|
|
if (_isTranscriptContainer(el)) {
|
|
bodyObserver.disconnect();
|
|
const tid = el.getAttribute('data-tid') || '';
|
|
const targeted = new MutationObserver((muts) => {
|
|
for (const m of muts) {
|
|
if (m.type === 'childList') {
|
|
m.addedNodes.forEach(_handleAddedNode);
|
|
}
|
|
}
|
|
});
|
|
targeted.observe(el, { childList: true, subtree: true });
|
|
(window as any).__captionsObserver = targeted;
|
|
return;
|
|
}
|
|
|
|
// Also check inside the added node for transcript containers
|
|
const transcriptChild = el.querySelector?.('[data-tid*="transcript"]');
|
|
if (transcriptChild && _isTranscriptContainer(transcriptChild)) {
|
|
bodyObserver.disconnect();
|
|
const tid = transcriptChild.getAttribute('data-tid') || '';
|
|
const targeted = new MutationObserver((muts) => {
|
|
for (const m of muts) {
|
|
if (m.type === 'childList') {
|
|
m.addedNodes.forEach(_handleAddedNode);
|
|
}
|
|
}
|
|
});
|
|
targeted.observe(transcriptChild, { childList: true, subtree: true });
|
|
(window as any).__captionsObserver = targeted;
|
|
return;
|
|
}
|
|
|
|
_handleAddedNode(node);
|
|
});
|
|
}
|
|
});
|
|
|
|
bodyObserver.observe(document.body, { childList: true, subtree: true });
|
|
(window as any).__captionsObserver = bodyObserver;
|
|
return 'body-fallback';
|
|
});
|
|
|
|
this._logger.info(`MutationObserver set up for captions (target: ${observerTarget})`);
|
|
}
|
|
|
|
/**
|
|
* Handle a caption event from the browser MutationObserver.
|
|
* Teams updates captions in real-time. We detect finalized captions by
|
|
* checking for terminal punctuation (. , ! ?).
|
|
*/
|
|
private _handleCaptionEvent(caption: { speaker: string; text: string; timestamp: string }): void {
|
|
if (!this._isSubscribed || !caption.text) {
|
|
return;
|
|
}
|
|
|
|
// Teams adds punctuation only to finalized captions
|
|
const terminalPunctuationRegex = /[.,!?]/;
|
|
if (!terminalPunctuationRegex.test(caption.text)) {
|
|
return; // Not finalized yet
|
|
}
|
|
|
|
// Dedup: strip punctuation and compare to last caption
|
|
const punctuationRegex = /[.,'"!?~\-]/g;
|
|
const newTextStripped = caption.text.replace(punctuationRegex, '');
|
|
const lastTextStripped = this._lastCaptionText.replace(punctuationRegex, '');
|
|
|
|
if (newTextStripped === lastTextStripped) {
|
|
return; // Duplicate
|
|
}
|
|
|
|
this._lastCaptionText = caption.text;
|
|
|
|
this._logger.info(`Caption: [${caption.speaker}] ${caption.text}`);
|
|
|
|
this._onTranscript({
|
|
speaker: caption.speaker,
|
|
text: caption.text,
|
|
timestamp: new Date(caption.timestamp),
|
|
isFinal: true,
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Stop watching for captions.
|
|
*/
|
|
async unsubscribe(): Promise<void> {
|
|
this._isSubscribed = false;
|
|
|
|
try {
|
|
await this._page.evaluate(() => {
|
|
if ((window as any).__captionsObserver) {
|
|
(window as any).__captionsObserver.disconnect();
|
|
}
|
|
});
|
|
} catch {
|
|
// Page might already be closed
|
|
}
|
|
|
|
this._logger.info('Unsubscribed from captions');
|
|
}
|
|
}
|