From 252775a4b40006d7c8f7aadaaed825abccdd55cb Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Tue, 17 Feb 2026 21:10:00 +0100 Subject: [PATCH] fix: enable captions via Language and speech panel, add body fallback observers, add keepalive Co-authored-by: Cursor --- src/bot/captionsProcedure.ts | 230 ++++++++++++++++++++++++++++------- src/bot/chatProcedure.ts | 122 +++++++++---------- src/bot/orchestrator.ts | 54 ++++++++ 3 files changed, 301 insertions(+), 105 deletions(-) diff --git a/src/bot/captionsProcedure.ts b/src/bot/captionsProcedure.ts index 5f60c6a..8a10ad5 100644 --- a/src/bot/captionsProcedure.ts +++ b/src/bot/captionsProcedure.ts @@ -181,6 +181,94 @@ export class CaptionsProcedure { } } + // Strategy 2b: "Language and speech" submenu (authenticated Teams 2025+) + // In the new Teams, captions are under "Language and speech" → toggle inside panel + const langSpeechSelectors = [ + '[data-tid="LanguageSpeechMenuControl-id"]', + 'div[role="menuitem"]:has-text("Language and speech")', + 'div[role="menuitem"]:has-text("Sprache und Spracheingabe")', + ]; + + for (const selector of langSpeechSelectors) { + try { + const item = await this._page.$(selector); + if (item) { + await item.click(); + this._logger.info(`Clicked "Language and speech": ${selector}`); + await this._page.waitForTimeout(2000); + + // Log panel toggles for debugging + const panelToggles = await this._page.evaluate(() => { + const switches = document.querySelectorAll( + 'input[role="switch"], [role="switch"], input[type="checkbox"]' + ); + return Array.from(switches).map(s => ({ + tid: s.getAttribute('data-tid') || '', + label: s.getAttribute('aria-label') || '', + checked: (s as HTMLInputElement).checked, + nearText: ((s.closest('div, label') as HTMLElement)?.textContent || '') + .trim().substring(0, 80), + })); + }); + this._logger.info(`Panel toggles: ${JSON.stringify(panelToggles)}`); + + // Find and click the live captions toggle + const toggleResult = await this._page.evaluate(() => { + const switches = document.querySelectorAll( + 'input[role="switch"], [role="switch"], input[type="checkbox"]' + ); + for (const sw of Array.from(switches)) { + const label = (sw.getAttribute('aria-label') || '').toLowerCase(); + const tid = (sw.getAttribute('data-tid') || '').toLowerCase(); + const parentEl = sw.closest('div, label, span') as HTMLElement; + const nearText = (parentEl?.textContent || '').toLowerCase(); + const isCaptions = + label.includes('caption') || label.includes('untertitel') || + tid.includes('caption') || tid.includes('subtitle') || + nearText.includes('live caption') || nearText.includes('liveuntertitel'); + if (isCaptions) { + if (!(sw as HTMLInputElement).checked) { + (sw as HTMLElement).click(); + return { found: true, clicked: true, info: label || tid || nearText.substring(0, 60) }; + } + return { found: true, clicked: false, info: `already on: ${label || tid}` }; + } + } + // Fallback: any button/link mentioning captions + const btns = document.querySelectorAll('button, [role="menuitem"], [role="option"], a'); + for (const btn of Array.from(btns)) { + const text = ((btn as HTMLElement).textContent || '').toLowerCase(); + if ( + text.includes('turn on live caption') || + text.includes('liveuntertitel aktivieren') || + text.includes('liveuntertitel einschalten') + ) { + (btn as HTMLElement).click(); + return { found: true, clicked: true, info: text.substring(0, 60) }; + } + } + return { found: false, clicked: false, info: '' }; + }); + + this._logger.info(`Captions toggle result: ${JSON.stringify(toggleResult)}`); + + if (toggleResult.found && toggleResult.clicked) { + await this._page.waitForTimeout(1500); + } + + // Close the panel + await this._page.keyboard.press('Escape'); + + if (toggleResult.found) return; + + this._logger.warn('Language panel opened but no captions toggle found'); + break; + } + } catch { + // Continue + } + } + // Strategy 3: Generic text-based fallbacks const textFallbacks = [ 'button:has-text("Turn on live captions")', @@ -702,60 +790,114 @@ export class CaptionsProcedure { this._logger.info('Setting up MutationObserver for captions...'); // Set up MutationObserver in the browser (Recall.ai approach) - await this._page.evaluate(() => { - const targetNode = document.querySelector('div[data-tid="closed-caption-renderer-wrapper"]'); - if (!targetNode) { - return; - } + // Falls back to document.body when the specific container is not found + // (authenticated Teams may use different container selectors) + const observerTarget = await this._page.evaluate(() => { + // Helper: extract caption data from an element tree + function _extractCaption(element: HTMLElement): void { + const captionMessage = element.querySelector('.fui-ChatMessageCompact') + || (element.classList?.contains('fui-ChatMessageCompact') ? element : null); - const observer = new MutationObserver((mutationsList) => { - for (const mutation of mutationsList) { - if (mutation.type === 'childList') { - mutation.addedNodes.forEach((node) => { - if (node.nodeType === Node.ELEMENT_NODE) { - const element = node as HTMLElement; + if (captionMessage) { + const authorElement = captionMessage.querySelector('span[data-tid="author"]'); + const contentElement = captionMessage.querySelector('span[data-tid="closed-caption-text"]'); - // Check if this is a caption element (.fui-ChatMessageCompact) - const captionMessage = element.querySelector('.fui-ChatMessageCompact'); - if (!captionMessage) { - return; - } + if (authorElement && contentElement) { + // Watch for real-time updates in the caption text + const textObserver = new MutationObserver(() => { + const speaker = authorElement.textContent?.trim() ?? 'Unknown'; + const text = (contentElement as any).innerText?.trim() ?? ''; + (window as any).__onCaptionEvent({ + speaker, + text, + timestamp: new Date().toISOString(), + }); + }); - const authorElement = captionMessage.querySelector('span[data-tid="author"]'); - const contentElement = captionMessage.querySelector('span[data-tid="closed-caption-text"]'); - - if (authorElement && contentElement) { - // Watch for real-time updates in the caption text - const textObserver = new MutationObserver(() => { - const speaker = authorElement.textContent?.trim() ?? 'Unknown'; - const text = (contentElement as any).innerText?.trim() ?? ''; - - (window as any).__onCaptionEvent({ - speaker, - text, - timestamp: new Date().toISOString(), - }); - }); - - textObserver.observe(contentElement, { - childList: true, - subtree: true, - characterData: true, - }); - } - } + textObserver.observe(contentElement, { + childList: true, + subtree: true, + characterData: true, }); } } + } + + // Try specific caption container selectors first + const containerSelectors = [ + 'div[data-tid="closed-caption-renderer-wrapper"]', + 'div[data-tid="live-captions-renderer"]', + '[data-tid="caption-area"]', + ]; + + let targetNode: Element | null = null; + for (const sel of containerSelectors) { + targetNode = document.querySelector(sel); + if (targetNode) break; + } + + if (targetNode) { + // Targeted observer on the specific caption container + const observer = new MutationObserver((mutationsList) => { + for (const mutation of mutationsList) { + if (mutation.type === 'childList') { + mutation.addedNodes.forEach((node) => { + if (node.nodeType === Node.ELEMENT_NODE) { + _extractCaption(node as HTMLElement); + } + }); + } + } + }); + observer.observe(targetNode, { childList: true, subtree: true }); + (window as any).__captionsObserver = observer; + return 'container'; + } + + // Fallback: observe document.body and look for caption elements anywhere + // Also watches for the caption container to appear later (e.g. after enabling) + const bodyObserver = new MutationObserver((mutationsList) => { + for (const mutation of mutationsList) { + if (mutation.type !== 'childList') continue; + mutation.addedNodes.forEach((node) => { + if (node.nodeType !== Node.ELEMENT_NODE) return; + const el = node as HTMLElement; + + // Check if a specific caption container just appeared + for (const sel of containerSelectors) { + const container = el.matches?.(sel) ? el : el.querySelector?.(sel); + if (container) { + // Switch to targeted observation + bodyObserver.disconnect(); + const targeted = new MutationObserver((muts) => { + for (const m of muts) { + if (m.type === 'childList') { + m.addedNodes.forEach((n) => { + if (n.nodeType === Node.ELEMENT_NODE) { + _extractCaption(n as HTMLElement); + } + }); + } + } + }); + targeted.observe(container, { childList: true, subtree: true }); + (window as any).__captionsObserver = targeted; + return; + } + } + + // Direct caption element detection (body-level) + _extractCaption(el); + }); + } }); - observer.observe(targetNode, { childList: true, subtree: true }); - - // Store observer reference for cleanup - (window as any).__captionsObserver = observer; + bodyObserver.observe(document.body, { childList: true, subtree: true }); + (window as any).__captionsObserver = bodyObserver; + return 'body-fallback'; }); - this._logger.info('MutationObserver set up for captions'); + this._logger.info(`MutationObserver set up for captions (target: ${observerTarget})`); } /** diff --git a/src/bot/chatProcedure.ts b/src/bot/chatProcedure.ts index 0de16c8..23620dc 100644 --- a/src/bot/chatProcedure.ts +++ b/src/bot/chatProcedure.ts @@ -101,7 +101,60 @@ export class ChatProcedure { } // Find chat container and set up observer - await this._page.evaluate(() => { + const chatObserverTarget = await this._page.evaluate(() => { + function _extractChatMessage(el: HTMLElement): void { + const messageSelectors = [ + '[data-tid="chat-message"]', + '.fui-ChatMessage', + '[data-tid*="message-body"]', + ]; + + let messageEl: HTMLElement | null = null; + for (const sel of messageSelectors) { + messageEl = el.matches?.(sel) ? el : el.querySelector(sel); + if (messageEl) break; + } + if (!messageEl) return; + + // Extract author + const authorSelectors = [ + '[data-tid="message-author"]', + '[data-tid="message-author-name"]', + '.fui-ChatMessage__author', + ]; + let author = 'Unknown'; + for (const sel of authorSelectors) { + const authorEl = messageEl.querySelector(sel) || el.querySelector(sel); + if (authorEl?.textContent) { + author = authorEl.textContent.trim(); + break; + } + } + + // Extract text + const bodySelectors = [ + '[data-tid="message-body"]', + '.fui-ChatMessage__body', + '[data-tid="chat-message-text"]', + ]; + let text = ''; + for (const sel of bodySelectors) { + const bodyEl = messageEl.querySelector(sel) || el.querySelector(sel); + if (bodyEl) { + text = (bodyEl as HTMLElement).innerText?.trim() || ''; + break; + } + } + + if (text && text.length > 0) { + (window as any).__onChatMessageEvent({ + speaker: author, + text, + timestamp: new Date().toISOString(), + }); + } + } + // Teams chat containers - try multiple selectors const chatContainerSelectors = [ '[data-tid="message-pane-list"]', @@ -118,86 +171,33 @@ export class ChatProcedure { } if (!chatContainer) { - // Fallback: find any scrollable container that looks like a chat const candidates = document.querySelectorAll('[data-tid*="chat"], [data-tid*="message"]'); if (candidates.length > 0) { chatContainer = candidates[0]; } } - if (!chatContainer) { - return; - } + // Use found container or fall back to document.body + const target = chatContainer || document.body; const observer = new MutationObserver((mutations) => { for (const mutation of mutations) { if (mutation.type === 'childList') { mutation.addedNodes.forEach((node) => { if (node.nodeType !== Node.ELEMENT_NODE) return; - const el = node as HTMLElement; - - // Look for message elements - const messageSelectors = [ - '[data-tid="chat-message"]', - '.fui-ChatMessage', - '[data-tid*="message-body"]', - ]; - - let messageEl: HTMLElement | null = null; - for (const sel of messageSelectors) { - messageEl = el.matches(sel) ? el : el.querySelector(sel); - if (messageEl) break; - } - - if (!messageEl) return; - - // Extract author - const authorSelectors = [ - '[data-tid="message-author"]', - '[data-tid="message-author-name"]', - '.fui-ChatMessage__author', - ]; - let author = 'Unknown'; - for (const sel of authorSelectors) { - const authorEl = messageEl.querySelector(sel) || el.querySelector(sel); - if (authorEl?.textContent) { - author = authorEl.textContent.trim(); - break; - } - } - - // Extract text - const bodySelectors = [ - '[data-tid="message-body"]', - '.fui-ChatMessage__body', - '[data-tid="chat-message-text"]', - ]; - let text = ''; - for (const sel of bodySelectors) { - const bodyEl = messageEl.querySelector(sel) || el.querySelector(sel); - if (bodyEl) { - text = (bodyEl as HTMLElement).innerText?.trim() || ''; - break; - } - } - - if (text && text.length > 0) { - (window as any).__onChatMessageEvent({ - speaker: author, - text, - timestamp: new Date().toISOString(), - }); - } + _extractChatMessage(node as HTMLElement); }); } } }); - observer.observe(chatContainer, { childList: true, subtree: true }); + observer.observe(target, { childList: true, subtree: true }); (window as any).__chatObserver = observer; + + return chatContainer ? 'container' : 'body-fallback'; }); - this._logger.info('Chat MutationObserver set up'); + this._logger.info(`Chat MutationObserver set up (target: ${chatObserverTarget})`); } /** diff --git a/src/bot/orchestrator.ts b/src/bot/orchestrator.ts index 4cb3813..1b3467c 100644 --- a/src/bot/orchestrator.ts +++ b/src/bot/orchestrator.ts @@ -64,6 +64,7 @@ export class BotOrchestrator { private _state: BotState = 'idle'; private _isShuttingDown: boolean = false; + private _keepAliveInterval: NodeJS.Timeout | null = null; constructor( sessionId: string, @@ -149,6 +150,9 @@ export class BotOrchestrator { this._setState('in_meeting'); this._logger.info(`Bot joined the meeting as "${this._botName}"`); + // Start keepalive to prevent idle disconnect + this._startKeepAlive(); + // Dismiss any post-join permission modals (e.g. "Manage windows on all displays") await this._joinProcedure!.dismissBrowserPermissionModals(); @@ -304,6 +308,9 @@ export class BotOrchestrator { this._setState('in_meeting'); this._logger.info(`Bot joined the meeting (authenticated as ${this._options.botAccountEmail})`); + // Start keepalive to prevent idle disconnect + this._startKeepAlive(); + // Initialize audio playback await this._audioProcedure!.initialize(); @@ -377,6 +384,50 @@ export class BotOrchestrator { } } + /** + * Start a keepalive timer that periodically moves the mouse and sends + * a WebSocket ping. Prevents Teams from detecting the bot as idle + * and kicking it from the meeting. + */ + private _startKeepAlive(): void { + if (this._keepAliveInterval) return; + + this._keepAliveInterval = setInterval(async () => { + if (this._isShuttingDown || !this._page) return; + + try { + // Small random mouse movement to simulate user activity + const x = 640 + Math.floor(Math.random() * 20 - 10); + const y = 360 + Math.floor(Math.random() * 20 - 10); + await this._page.mouse.move(x, y); + } catch { + // Page might be closed + } + + // WebSocket heartbeat + if (this._gatewayWs && this._gatewayWs.readyState === WebSocket.OPEN) { + try { + this._gatewayWs.send(JSON.stringify({ type: 'ping', sessionId: this._sessionId })); + } catch { + // Connection might be closing + } + } + }, 30000); + + this._logger.info('Keepalive started (30s interval)'); + } + + /** + * Stop the keepalive timer. + */ + private _stopKeepAlive(): void { + if (this._keepAliveInterval) { + clearInterval(this._keepAliveInterval); + this._keepAliveInterval = null; + this._logger.info('Keepalive stopped'); + } + } + /** * Connect to the Gateway WebSocket for this session. */ @@ -568,6 +619,9 @@ export class BotOrchestrator { this._isShuttingDown = true; this._logger.info('Stopping bot...'); + // Stop keepalive first + this._stopKeepAlive(); + try { this._setState('leaving');