From 252775a4b40006d7c8f7aadaaed825abccdd55cb Mon Sep 17 00:00:00 2001
From: ValueOn AG
Date: Tue, 17 Feb 2026 21:10:00 +0100
Subject: [PATCH] fix: enable captions via Language and speech panel, add body
fallback observers, add keepalive
Co-authored-by: Cursor
---
src/bot/captionsProcedure.ts | 230 ++++++++++++++++++++++++++++-------
src/bot/chatProcedure.ts | 122 +++++++++----------
src/bot/orchestrator.ts | 54 ++++++++
3 files changed, 301 insertions(+), 105 deletions(-)
diff --git a/src/bot/captionsProcedure.ts b/src/bot/captionsProcedure.ts
index 5f60c6a..8a10ad5 100644
--- a/src/bot/captionsProcedure.ts
+++ b/src/bot/captionsProcedure.ts
@@ -181,6 +181,94 @@ export class CaptionsProcedure {
}
}
+ // Strategy 2b: "Language and speech" submenu (authenticated Teams 2025+)
+ // In the new Teams, captions are under "Language and speech" → toggle inside panel
+ const langSpeechSelectors = [
+ '[data-tid="LanguageSpeechMenuControl-id"]',
+ 'div[role="menuitem"]:has-text("Language and speech")',
+ 'div[role="menuitem"]:has-text("Sprache und Spracheingabe")',
+ ];
+
+ for (const selector of langSpeechSelectors) {
+ try {
+ const item = await this._page.$(selector);
+ if (item) {
+ await item.click();
+ this._logger.info(`Clicked "Language and speech": ${selector}`);
+ await this._page.waitForTimeout(2000);
+
+ // Log panel toggles for debugging
+ const panelToggles = await this._page.evaluate(() => {
+ const switches = document.querySelectorAll(
+ 'input[role="switch"], [role="switch"], input[type="checkbox"]'
+ );
+ return Array.from(switches).map(s => ({
+ tid: s.getAttribute('data-tid') || '',
+ label: s.getAttribute('aria-label') || '',
+ checked: (s as HTMLInputElement).checked,
+ nearText: ((s.closest('div, label') as HTMLElement)?.textContent || '')
+ .trim().substring(0, 80),
+ }));
+ });
+ this._logger.info(`Panel toggles: ${JSON.stringify(panelToggles)}`);
+
+ // Find and click the live captions toggle
+ const toggleResult = await this._page.evaluate(() => {
+ const switches = document.querySelectorAll(
+ 'input[role="switch"], [role="switch"], input[type="checkbox"]'
+ );
+ for (const sw of Array.from(switches)) {
+ const label = (sw.getAttribute('aria-label') || '').toLowerCase();
+ const tid = (sw.getAttribute('data-tid') || '').toLowerCase();
+ const parentEl = sw.closest('div, label, span') as HTMLElement;
+ const nearText = (parentEl?.textContent || '').toLowerCase();
+ const isCaptions =
+ label.includes('caption') || label.includes('untertitel') ||
+ tid.includes('caption') || tid.includes('subtitle') ||
+ nearText.includes('live caption') || nearText.includes('liveuntertitel');
+ if (isCaptions) {
+ if (!(sw as HTMLInputElement).checked) {
+ (sw as HTMLElement).click();
+ return { found: true, clicked: true, info: label || tid || nearText.substring(0, 60) };
+ }
+ return { found: true, clicked: false, info: `already on: ${label || tid}` };
+ }
+ }
+ // Fallback: any button/link mentioning captions
+ const btns = document.querySelectorAll('button, [role="menuitem"], [role="option"], a');
+ for (const btn of Array.from(btns)) {
+ const text = ((btn as HTMLElement).textContent || '').toLowerCase();
+ if (
+ text.includes('turn on live caption') ||
+ text.includes('liveuntertitel aktivieren') ||
+ text.includes('liveuntertitel einschalten')
+ ) {
+ (btn as HTMLElement).click();
+ return { found: true, clicked: true, info: text.substring(0, 60) };
+ }
+ }
+ return { found: false, clicked: false, info: '' };
+ });
+
+ this._logger.info(`Captions toggle result: ${JSON.stringify(toggleResult)}`);
+
+ if (toggleResult.found && toggleResult.clicked) {
+ await this._page.waitForTimeout(1500);
+ }
+
+ // Close the panel
+ await this._page.keyboard.press('Escape');
+
+ if (toggleResult.found) return;
+
+ this._logger.warn('Language panel opened but no captions toggle found');
+ break;
+ }
+ } catch {
+ // Continue
+ }
+ }
+
// Strategy 3: Generic text-based fallbacks
const textFallbacks = [
'button:has-text("Turn on live captions")',
@@ -702,60 +790,114 @@ export class CaptionsProcedure {
this._logger.info('Setting up MutationObserver for captions...');
// Set up MutationObserver in the browser (Recall.ai approach)
- await this._page.evaluate(() => {
- const targetNode = document.querySelector('div[data-tid="closed-caption-renderer-wrapper"]');
- if (!targetNode) {
- return;
- }
+ // Falls back to document.body when the specific container is not found
+ // (authenticated Teams may use different container selectors)
+ const observerTarget = await this._page.evaluate(() => {
+ // Helper: extract caption data from an element tree
+ function _extractCaption(element: HTMLElement): void {
+ const captionMessage = element.querySelector('.fui-ChatMessageCompact')
+ || (element.classList?.contains('fui-ChatMessageCompact') ? element : null);
- const observer = new MutationObserver((mutationsList) => {
- for (const mutation of mutationsList) {
- if (mutation.type === 'childList') {
- mutation.addedNodes.forEach((node) => {
- if (node.nodeType === Node.ELEMENT_NODE) {
- const element = node as HTMLElement;
+ if (captionMessage) {
+ const authorElement = captionMessage.querySelector('span[data-tid="author"]');
+ const contentElement = captionMessage.querySelector('span[data-tid="closed-caption-text"]');
- // Check if this is a caption element (.fui-ChatMessageCompact)
- const captionMessage = element.querySelector('.fui-ChatMessageCompact');
- if (!captionMessage) {
- return;
- }
+ if (authorElement && contentElement) {
+ // Watch for real-time updates in the caption text
+ const textObserver = new MutationObserver(() => {
+ const speaker = authorElement.textContent?.trim() ?? 'Unknown';
+ const text = (contentElement as any).innerText?.trim() ?? '';
+ (window as any).__onCaptionEvent({
+ speaker,
+ text,
+ timestamp: new Date().toISOString(),
+ });
+ });
- const authorElement = captionMessage.querySelector('span[data-tid="author"]');
- const contentElement = captionMessage.querySelector('span[data-tid="closed-caption-text"]');
-
- if (authorElement && contentElement) {
- // Watch for real-time updates in the caption text
- const textObserver = new MutationObserver(() => {
- const speaker = authorElement.textContent?.trim() ?? 'Unknown';
- const text = (contentElement as any).innerText?.trim() ?? '';
-
- (window as any).__onCaptionEvent({
- speaker,
- text,
- timestamp: new Date().toISOString(),
- });
- });
-
- textObserver.observe(contentElement, {
- childList: true,
- subtree: true,
- characterData: true,
- });
- }
- }
+ textObserver.observe(contentElement, {
+ childList: true,
+ subtree: true,
+ characterData: true,
});
}
}
+ }
+
+ // Try specific caption container selectors first
+ const containerSelectors = [
+ 'div[data-tid="closed-caption-renderer-wrapper"]',
+ 'div[data-tid="live-captions-renderer"]',
+ '[data-tid="caption-area"]',
+ ];
+
+ let targetNode: Element | null = null;
+ for (const sel of containerSelectors) {
+ targetNode = document.querySelector(sel);
+ if (targetNode) break;
+ }
+
+ if (targetNode) {
+ // Targeted observer on the specific caption container
+ const observer = new MutationObserver((mutationsList) => {
+ for (const mutation of mutationsList) {
+ if (mutation.type === 'childList') {
+ mutation.addedNodes.forEach((node) => {
+ if (node.nodeType === Node.ELEMENT_NODE) {
+ _extractCaption(node as HTMLElement);
+ }
+ });
+ }
+ }
+ });
+ observer.observe(targetNode, { childList: true, subtree: true });
+ (window as any).__captionsObserver = observer;
+ return 'container';
+ }
+
+ // Fallback: observe document.body and look for caption elements anywhere
+ // Also watches for the caption container to appear later (e.g. after enabling)
+ const bodyObserver = new MutationObserver((mutationsList) => {
+ for (const mutation of mutationsList) {
+ if (mutation.type !== 'childList') continue;
+ mutation.addedNodes.forEach((node) => {
+ if (node.nodeType !== Node.ELEMENT_NODE) return;
+ const el = node as HTMLElement;
+
+ // Check if a specific caption container just appeared
+ for (const sel of containerSelectors) {
+ const container = el.matches?.(sel) ? el : el.querySelector?.(sel);
+ if (container) {
+ // Switch to targeted observation
+ bodyObserver.disconnect();
+ const targeted = new MutationObserver((muts) => {
+ for (const m of muts) {
+ if (m.type === 'childList') {
+ m.addedNodes.forEach((n) => {
+ if (n.nodeType === Node.ELEMENT_NODE) {
+ _extractCaption(n as HTMLElement);
+ }
+ });
+ }
+ }
+ });
+ targeted.observe(container, { childList: true, subtree: true });
+ (window as any).__captionsObserver = targeted;
+ return;
+ }
+ }
+
+ // Direct caption element detection (body-level)
+ _extractCaption(el);
+ });
+ }
});
- observer.observe(targetNode, { childList: true, subtree: true });
-
- // Store observer reference for cleanup
- (window as any).__captionsObserver = observer;
+ bodyObserver.observe(document.body, { childList: true, subtree: true });
+ (window as any).__captionsObserver = bodyObserver;
+ return 'body-fallback';
});
- this._logger.info('MutationObserver set up for captions');
+ this._logger.info(`MutationObserver set up for captions (target: ${observerTarget})`);
}
/**
diff --git a/src/bot/chatProcedure.ts b/src/bot/chatProcedure.ts
index 0de16c8..23620dc 100644
--- a/src/bot/chatProcedure.ts
+++ b/src/bot/chatProcedure.ts
@@ -101,7 +101,60 @@ export class ChatProcedure {
}
// Find chat container and set up observer
- await this._page.evaluate(() => {
+ const chatObserverTarget = await this._page.evaluate(() => {
+ function _extractChatMessage(el: HTMLElement): void {
+ const messageSelectors = [
+ '[data-tid="chat-message"]',
+ '.fui-ChatMessage',
+ '[data-tid*="message-body"]',
+ ];
+
+ let messageEl: HTMLElement | null = null;
+ for (const sel of messageSelectors) {
+ messageEl = el.matches?.(sel) ? el : el.querySelector(sel);
+ if (messageEl) break;
+ }
+ if (!messageEl) return;
+
+ // Extract author
+ const authorSelectors = [
+ '[data-tid="message-author"]',
+ '[data-tid="message-author-name"]',
+ '.fui-ChatMessage__author',
+ ];
+ let author = 'Unknown';
+ for (const sel of authorSelectors) {
+ const authorEl = messageEl.querySelector(sel) || el.querySelector(sel);
+ if (authorEl?.textContent) {
+ author = authorEl.textContent.trim();
+ break;
+ }
+ }
+
+ // Extract text
+ const bodySelectors = [
+ '[data-tid="message-body"]',
+ '.fui-ChatMessage__body',
+ '[data-tid="chat-message-text"]',
+ ];
+ let text = '';
+ for (const sel of bodySelectors) {
+ const bodyEl = messageEl.querySelector(sel) || el.querySelector(sel);
+ if (bodyEl) {
+ text = (bodyEl as HTMLElement).innerText?.trim() || '';
+ break;
+ }
+ }
+
+ if (text && text.length > 0) {
+ (window as any).__onChatMessageEvent({
+ speaker: author,
+ text,
+ timestamp: new Date().toISOString(),
+ });
+ }
+ }
+
// Teams chat containers - try multiple selectors
const chatContainerSelectors = [
'[data-tid="message-pane-list"]',
@@ -118,86 +171,33 @@ export class ChatProcedure {
}
if (!chatContainer) {
- // Fallback: find any scrollable container that looks like a chat
const candidates = document.querySelectorAll('[data-tid*="chat"], [data-tid*="message"]');
if (candidates.length > 0) {
chatContainer = candidates[0];
}
}
- if (!chatContainer) {
- return;
- }
+ // Use found container or fall back to document.body
+ const target = chatContainer || document.body;
const observer = new MutationObserver((mutations) => {
for (const mutation of mutations) {
if (mutation.type === 'childList') {
mutation.addedNodes.forEach((node) => {
if (node.nodeType !== Node.ELEMENT_NODE) return;
- const el = node as HTMLElement;
-
- // Look for message elements
- const messageSelectors = [
- '[data-tid="chat-message"]',
- '.fui-ChatMessage',
- '[data-tid*="message-body"]',
- ];
-
- let messageEl: HTMLElement | null = null;
- for (const sel of messageSelectors) {
- messageEl = el.matches(sel) ? el : el.querySelector(sel);
- if (messageEl) break;
- }
-
- if (!messageEl) return;
-
- // Extract author
- const authorSelectors = [
- '[data-tid="message-author"]',
- '[data-tid="message-author-name"]',
- '.fui-ChatMessage__author',
- ];
- let author = 'Unknown';
- for (const sel of authorSelectors) {
- const authorEl = messageEl.querySelector(sel) || el.querySelector(sel);
- if (authorEl?.textContent) {
- author = authorEl.textContent.trim();
- break;
- }
- }
-
- // Extract text
- const bodySelectors = [
- '[data-tid="message-body"]',
- '.fui-ChatMessage__body',
- '[data-tid="chat-message-text"]',
- ];
- let text = '';
- for (const sel of bodySelectors) {
- const bodyEl = messageEl.querySelector(sel) || el.querySelector(sel);
- if (bodyEl) {
- text = (bodyEl as HTMLElement).innerText?.trim() || '';
- break;
- }
- }
-
- if (text && text.length > 0) {
- (window as any).__onChatMessageEvent({
- speaker: author,
- text,
- timestamp: new Date().toISOString(),
- });
- }
+ _extractChatMessage(node as HTMLElement);
});
}
}
});
- observer.observe(chatContainer, { childList: true, subtree: true });
+ observer.observe(target, { childList: true, subtree: true });
(window as any).__chatObserver = observer;
+
+ return chatContainer ? 'container' : 'body-fallback';
});
- this._logger.info('Chat MutationObserver set up');
+ this._logger.info(`Chat MutationObserver set up (target: ${chatObserverTarget})`);
}
/**
diff --git a/src/bot/orchestrator.ts b/src/bot/orchestrator.ts
index 4cb3813..1b3467c 100644
--- a/src/bot/orchestrator.ts
+++ b/src/bot/orchestrator.ts
@@ -64,6 +64,7 @@ export class BotOrchestrator {
private _state: BotState = 'idle';
private _isShuttingDown: boolean = false;
+ private _keepAliveInterval: NodeJS.Timeout | null = null;
constructor(
sessionId: string,
@@ -149,6 +150,9 @@ export class BotOrchestrator {
this._setState('in_meeting');
this._logger.info(`Bot joined the meeting as "${this._botName}"`);
+ // Start keepalive to prevent idle disconnect
+ this._startKeepAlive();
+
// Dismiss any post-join permission modals (e.g. "Manage windows on all displays")
await this._joinProcedure!.dismissBrowserPermissionModals();
@@ -304,6 +308,9 @@ export class BotOrchestrator {
this._setState('in_meeting');
this._logger.info(`Bot joined the meeting (authenticated as ${this._options.botAccountEmail})`);
+ // Start keepalive to prevent idle disconnect
+ this._startKeepAlive();
+
// Initialize audio playback
await this._audioProcedure!.initialize();
@@ -377,6 +384,50 @@ export class BotOrchestrator {
}
}
+ /**
+ * Start a keepalive timer that periodically moves the mouse and sends
+ * a WebSocket ping. Prevents Teams from detecting the bot as idle
+ * and kicking it from the meeting.
+ */
+ private _startKeepAlive(): void {
+ if (this._keepAliveInterval) return;
+
+ this._keepAliveInterval = setInterval(async () => {
+ if (this._isShuttingDown || !this._page) return;
+
+ try {
+ // Small random mouse movement to simulate user activity
+ const x = 640 + Math.floor(Math.random() * 20 - 10);
+ const y = 360 + Math.floor(Math.random() * 20 - 10);
+ await this._page.mouse.move(x, y);
+ } catch {
+ // Page might be closed
+ }
+
+ // WebSocket heartbeat
+ if (this._gatewayWs && this._gatewayWs.readyState === WebSocket.OPEN) {
+ try {
+ this._gatewayWs.send(JSON.stringify({ type: 'ping', sessionId: this._sessionId }));
+ } catch {
+ // Connection might be closing
+ }
+ }
+ }, 30000);
+
+ this._logger.info('Keepalive started (30s interval)');
+ }
+
+ /**
+ * Stop the keepalive timer.
+ */
+ private _stopKeepAlive(): void {
+ if (this._keepAliveInterval) {
+ clearInterval(this._keepAliveInterval);
+ this._keepAliveInterval = null;
+ this._logger.info('Keepalive stopped');
+ }
+ }
+
/**
* Connect to the Gateway WebSocket for this session.
*/
@@ -568,6 +619,9 @@ export class BotOrchestrator {
this._isShuttingDown = true;
this._logger.info('Stopping bot...');
+ // Stop keepalive first
+ this._stopKeepAlive();
+
try {
this._setState('leaving');