diff --git a/src/bot/captionsProcedure.ts b/src/bot/captionsProcedure.ts index edb0913..b0799fb 100644 --- a/src/bot/captionsProcedure.ts +++ b/src/bot/captionsProcedure.ts @@ -107,10 +107,10 @@ export class CaptionsProcedure { * * Strategies in priority order: * 1. Direct captions button (anonymous / light-meetings UI) - * 2. "Record and transcribe" → "Start transcription" (authenticated Teams 2025+) - * → triggers spoken-language-selection-dialog handled by _handleLanguageDialog() + * 2. "Language and speech" → live captions toggle (authenticated, no panel needed) * 3. "Captions & transcripts" submenu (older authenticated Teams) - * 4. "Language and speech" panel toggle (alternative path) + * 4. "Record and transcribe" → "Start transcription" (authenticated, fallback with panel) + * → triggers spoken-language-selection-dialog handled by _handleLanguageDialog() * 5. Generic text / DOM scan fallback */ private async _clickEnableCaptions(): Promise { @@ -137,9 +137,135 @@ export class CaptionsProcedure { } } - // ── Strategy 2: "Record and transcribe" → "Start transcription" ── - // Authenticated Teams 2025+: More → Record and transcribe → Start transcription - // After clicking, a spoken-language-selection-dialog appears (handled later). + // ── Strategy 2: "Language and speech" → live captions toggle (no panel) ── + // Preferred for authenticated joins: enables caption overlay at bottom (same as anonymous) + const langSpeechSelectors = [ + '[data-tid="LanguageSpeechMenuControl-id"]', + 'div[role="menuitem"]:has-text("Language and speech")', + 'div[role="menuitem"]:has-text("Sprache und Spracheingabe")', + ]; + + for (const selector of langSpeechSelectors) { + try { + const item = await this._page.$(selector); + if (item) { + await item.click(); + this._logger.info(`Clicked "Language and speech": ${selector}`); + await this._page.waitForTimeout(2000); + + const panelToggles = await this._page.evaluate(() => { + const switches = document.querySelectorAll( + 'input[role="switch"], [role="switch"], input[type="checkbox"]' + ); + return Array.from(switches).map(s => ({ + tid: s.getAttribute('data-tid') || '', + label: s.getAttribute('aria-label') || '', + checked: (s as HTMLInputElement).checked, + nearText: ((s.closest('div, label') as HTMLElement)?.textContent || '') + .trim().substring(0, 80), + })); + }); + this._logger.info(`Panel toggles: ${JSON.stringify(panelToggles)}`); + + const toggleResult = await this._page.evaluate(() => { + const switches = document.querySelectorAll( + 'input[role="switch"], [role="switch"], input[type="checkbox"]' + ); + for (const sw of Array.from(switches)) { + const label = (sw.getAttribute('aria-label') || '').toLowerCase(); + const tid = (sw.getAttribute('data-tid') || '').toLowerCase(); + const parentEl = sw.closest('div, label, span') as HTMLElement; + const nearText = (parentEl?.textContent || '').toLowerCase(); + const isCaptions = + label.includes('caption') || label.includes('untertitel') || + tid.includes('caption') || tid.includes('subtitle') || + nearText.includes('live caption') || nearText.includes('liveuntertitel'); + if (isCaptions) { + if (!(sw as HTMLInputElement).checked) { + (sw as HTMLElement).click(); + return { found: true, clicked: true, info: label || tid || nearText.substring(0, 60) }; + } + return { found: true, clicked: false, info: `already on: ${label || tid}` }; + } + } + return { found: false, clicked: false, info: '' }; + }); + + this._logger.info(`Captions toggle result: ${JSON.stringify(toggleResult)}`); + if (toggleResult.found && toggleResult.clicked) { + await this._page.waitForTimeout(1500); + } + await this._page.keyboard.press('Escape'); + if (toggleResult.found) return; + + this._logger.warn('Language panel opened but no captions toggle found — trying next strategy'); + break; + } + } catch { + // Continue + } + } + + // ── Strategy 3: "Captions & transcripts" submenu (older Teams) ── + const submenuSelectors = [ + '[data-tid="captions-and-transcripts-button"]', + '[role="menuitem"]:has-text("Captions & transcripts")', + '[role="menuitem"]:has-text("Captions and transcripts")', + '[role="menuitem"]:has-text("Untertitel und Transkripte")', + '[role="menuitem"]:has-text("Untertitel")', + ]; + + for (const selector of submenuSelectors) { + try { + const item = await this._page.$(selector); + if (item) { + await item.click(); + this._logger.info(`Clicked captions submenu: ${selector}`); + await this._page.waitForTimeout(1500); + + const enableSelectors = [ + 'button:has-text("Turn on live captions")', + 'button:has-text("Live captions")', + 'button:has-text("Live-Untertitel aktivieren")', + '[role="menuitem"]:has-text("Turn on live captions")', + '[role="menuitem"]:has-text("Live captions")', + '[role="menuitemcheckbox"]:has-text("captions")', + '[data-tid="toggle-captions"]', + ]; + + for (const enableSel of enableSelectors) { + try { + const enableBtn = await this._page.$(enableSel); + if (enableBtn) { + await enableBtn.click(); + this._logger.info(`Clicked enable captions: ${enableSel}`); + await this._page.waitForTimeout(1000); + return; + } + } catch { + // Continue + } + } + + this._logger.info('Opened captions submenu but could not find enable button'); + break; + } + } catch { + // Continue + } + } + + // ── Strategy 4 (fallback): "Record and transcribe" → "Start transcription" ── + // Requires transcript panel to be visible for scraping. Only used if live captions failed. + this._logger.info('Live captions not available, trying transcription fallback...'); + + // Re-open More menu (previous strategies may have closed it) + try { + await this._openMoreMenu(); + } catch { + this._logger.warn('Could not re-open More menu for transcription fallback'); + } + const recordMenuSelectors = [ '[data-tid="RecordingMenuControl-id"]', 'div[role="menuitem"]:has-text("Record and transcribe")', @@ -239,123 +365,6 @@ export class CaptionsProcedure { } } - // ── Strategy 3: "Captions & transcripts" submenu (older Teams) ── - const submenuSelectors = [ - '[data-tid="captions-and-transcripts-button"]', - '[role="menuitem"]:has-text("Captions & transcripts")', - '[role="menuitem"]:has-text("Captions and transcripts")', - '[role="menuitem"]:has-text("Untertitel und Transkripte")', - '[role="menuitem"]:has-text("Untertitel")', - ]; - - for (const selector of submenuSelectors) { - try { - const item = await this._page.$(selector); - if (item) { - await item.click(); - this._logger.info(`Clicked captions submenu: ${selector}`); - await this._page.waitForTimeout(1500); - - const enableSelectors = [ - 'button:has-text("Turn on live captions")', - 'button:has-text("Live captions")', - 'button:has-text("Live-Untertitel aktivieren")', - '[role="menuitem"]:has-text("Turn on live captions")', - '[role="menuitem"]:has-text("Live captions")', - '[role="menuitemcheckbox"]:has-text("captions")', - '[data-tid="toggle-captions"]', - ]; - - for (const enableSel of enableSelectors) { - try { - const enableBtn = await this._page.$(enableSel); - if (enableBtn) { - await enableBtn.click(); - this._logger.info(`Clicked enable captions: ${enableSel}`); - await this._page.waitForTimeout(1000); - return; - } - } catch { - // Continue - } - } - - this._logger.info('Opened captions submenu but could not find enable button'); - break; - } - } catch { - // Continue - } - } - - // ── Strategy 4: "Language and speech" panel toggle ── - const langSpeechSelectors = [ - '[data-tid="LanguageSpeechMenuControl-id"]', - 'div[role="menuitem"]:has-text("Language and speech")', - 'div[role="menuitem"]:has-text("Sprache und Spracheingabe")', - ]; - - for (const selector of langSpeechSelectors) { - try { - const item = await this._page.$(selector); - if (item) { - await item.click(); - this._logger.info(`Clicked "Language and speech": ${selector}`); - await this._page.waitForTimeout(2000); - - const panelToggles = await this._page.evaluate(() => { - const switches = document.querySelectorAll( - 'input[role="switch"], [role="switch"], input[type="checkbox"]' - ); - return Array.from(switches).map(s => ({ - tid: s.getAttribute('data-tid') || '', - label: s.getAttribute('aria-label') || '', - checked: (s as HTMLInputElement).checked, - nearText: ((s.closest('div, label') as HTMLElement)?.textContent || '') - .trim().substring(0, 80), - })); - }); - this._logger.info(`Panel toggles: ${JSON.stringify(panelToggles)}`); - - const toggleResult = await this._page.evaluate(() => { - const switches = document.querySelectorAll( - 'input[role="switch"], [role="switch"], input[type="checkbox"]' - ); - for (const sw of Array.from(switches)) { - const label = (sw.getAttribute('aria-label') || '').toLowerCase(); - const tid = (sw.getAttribute('data-tid') || '').toLowerCase(); - const parentEl = sw.closest('div, label, span') as HTMLElement; - const nearText = (parentEl?.textContent || '').toLowerCase(); - const isCaptions = - label.includes('caption') || label.includes('untertitel') || - tid.includes('caption') || tid.includes('subtitle') || - nearText.includes('live caption') || nearText.includes('liveuntertitel'); - if (isCaptions) { - if (!(sw as HTMLInputElement).checked) { - (sw as HTMLElement).click(); - return { found: true, clicked: true, info: label || tid || nearText.substring(0, 60) }; - } - return { found: true, clicked: false, info: `already on: ${label || tid}` }; - } - } - return { found: false, clicked: false, info: '' }; - }); - - this._logger.info(`Captions toggle result: ${JSON.stringify(toggleResult)}`); - if (toggleResult.found && toggleResult.clicked) { - await this._page.waitForTimeout(1500); - } - await this._page.keyboard.press('Escape'); - if (toggleResult.found) return; - - this._logger.warn('Language panel opened but no captions toggle found'); - break; - } - } catch { - // Continue - } - } - // ── Strategy 5: DOM scan for anything containing "caption" / "transcri" ── const found = await this._page.evaluate(() => { const keywords = ['caption', 'captions', 'untertitel', 'live caption', 'transcri', 'transkri']; diff --git a/src/bot/orchestrator.ts b/src/bot/orchestrator.ts index 26a645b..828ae4f 100644 --- a/src/bot/orchestrator.ts +++ b/src/bot/orchestrator.ts @@ -3,6 +3,7 @@ import { Logger } from 'winston'; import { v4 as uuidv4 } from 'uuid'; import path from 'path'; import fs from 'fs'; +import os from 'os'; import WebSocket from 'ws'; import { config } from '../config'; @@ -16,6 +17,38 @@ import { ChatProcedure, ChatMessageEntry } from './chatProcedure'; import { AuthProcedure } from './authProcedure'; import { isValidMeetingUrl } from './meetingUrlParser'; +/** + * Generate a solid-white Y4M video file for use as fake camera input. + * Chromium loops this single frame at 30fps, so participants see a static white image. + * Later this can be replaced with a custom image (avatar/background). + */ +function _generateFakeVideoFile(): string { + const width = 1280; + const height = 720; + const filePath = path.join(os.tmpdir(), 'bot-video-white.y4m'); + + if (fs.existsSync(filePath)) return filePath; + + const header = `YUV4MPEG2 W${width} H${height} F30:1 Ip A0:0 C420jpeg\n`; + const frameHeader = 'FRAME\n'; + + // White in YUV: Y=235, U=128, V=128 + const yPlane = Buffer.alloc(width * height, 235); + const uvSize = (width / 2) * (height / 2); + const uPlane = Buffer.alloc(uvSize, 128); + const vPlane = Buffer.alloc(uvSize, 128); + + const fd = fs.openSync(filePath, 'w'); + fs.writeSync(fd, header); + fs.writeSync(fd, frameHeader); + fs.writeSync(fd, yPlane); + fs.writeSync(fd, uPlane); + fs.writeSync(fd, vPlane); + fs.closeSync(fd); + + return filePath; +} + export interface OrchestratorCallbacks { onStateChange: (state: BotState, message?: string) => void; onTranscript: (entry: TranscriptEntry) => void; @@ -770,16 +803,22 @@ export class BotOrchestrator { private async _launchBrowser(authMode: boolean = false): Promise { this._logger.info(`Launching browser (authMode=${authMode})...`); + // Generate a solid white Y4M video file so participants see a clean image + const fakeVideoPath = _generateFakeVideoFile(); + this._logger.info(`Fake video file: ${fakeVideoPath}`); + const args = authMode ? [ // Chromium Minimal: only --no-sandbox + fake media (proven to work for authenticated join) '--no-sandbox', '--use-fake-ui-for-media-stream', '--use-fake-device-for-media-stream', + `--use-file-for-fake-video-capture=${fakeVideoPath}`, ] : [ '--use-fake-ui-for-media-stream', '--use-fake-device-for-media-stream', + `--use-file-for-fake-video-capture=${fakeVideoPath}`, '--disable-web-security', '--disable-features=IsolateOrigins,site-per-process', '--autoplay-policy=no-user-gesture-required',