From b0dffb49dda91b0496c8e6138eee13e9a64c25d8 Mon Sep 17 00:00:00 2001
From: ValueOn AG
Date: Tue, 17 Feb 2026 23:01:00 +0100
Subject: [PATCH] fix: revert to Record+transcribe as primary, add noise filter
for body fallback, fix chat debug logging
Co-authored-by: Cursor
---
src/bot/captionsProcedure.ts | 277 ++++++++++++++++++-----------------
src/bot/chatProcedure.ts | 175 +++++++++++++++++-----
2 files changed, 279 insertions(+), 173 deletions(-)
diff --git a/src/bot/captionsProcedure.ts b/src/bot/captionsProcedure.ts
index b0799fb..b98d34a 100644
--- a/src/bot/captionsProcedure.ts
+++ b/src/bot/captionsProcedure.ts
@@ -107,10 +107,11 @@ export class CaptionsProcedure {
*
* Strategies in priority order:
* 1. Direct captions button (anonymous / light-meetings UI)
- * 2. "Language and speech" → live captions toggle (authenticated, no panel needed)
- * 3. "Captions & transcripts" submenu (older authenticated Teams)
- * 4. "Record and transcribe" → "Start transcription" (authenticated, fallback with panel)
+ * 2. "Record and transcribe" → "Start transcription" (authenticated Teams 2025+)
* → triggers spoken-language-selection-dialog handled by _handleLanguageDialog()
+ * → then "Show transcript" to open scraping panel
+ * 3. "Captions & transcripts" submenu (older authenticated Teams)
+ * 4. "Language and speech" panel toggle (fallback)
* 5. Generic text / DOM scan fallback
*/
private async _clickEnableCaptions(): Promise {
@@ -137,135 +138,7 @@ export class CaptionsProcedure {
}
}
- // ── Strategy 2: "Language and speech" → live captions toggle (no panel) ──
- // Preferred for authenticated joins: enables caption overlay at bottom (same as anonymous)
- const langSpeechSelectors = [
- '[data-tid="LanguageSpeechMenuControl-id"]',
- 'div[role="menuitem"]:has-text("Language and speech")',
- 'div[role="menuitem"]:has-text("Sprache und Spracheingabe")',
- ];
-
- for (const selector of langSpeechSelectors) {
- try {
- const item = await this._page.$(selector);
- if (item) {
- await item.click();
- this._logger.info(`Clicked "Language and speech": ${selector}`);
- await this._page.waitForTimeout(2000);
-
- const panelToggles = await this._page.evaluate(() => {
- const switches = document.querySelectorAll(
- 'input[role="switch"], [role="switch"], input[type="checkbox"]'
- );
- return Array.from(switches).map(s => ({
- tid: s.getAttribute('data-tid') || '',
- label: s.getAttribute('aria-label') || '',
- checked: (s as HTMLInputElement).checked,
- nearText: ((s.closest('div, label') as HTMLElement)?.textContent || '')
- .trim().substring(0, 80),
- }));
- });
- this._logger.info(`Panel toggles: ${JSON.stringify(panelToggles)}`);
-
- const toggleResult = await this._page.evaluate(() => {
- const switches = document.querySelectorAll(
- 'input[role="switch"], [role="switch"], input[type="checkbox"]'
- );
- for (const sw of Array.from(switches)) {
- const label = (sw.getAttribute('aria-label') || '').toLowerCase();
- const tid = (sw.getAttribute('data-tid') || '').toLowerCase();
- const parentEl = sw.closest('div, label, span') as HTMLElement;
- const nearText = (parentEl?.textContent || '').toLowerCase();
- const isCaptions =
- label.includes('caption') || label.includes('untertitel') ||
- tid.includes('caption') || tid.includes('subtitle') ||
- nearText.includes('live caption') || nearText.includes('liveuntertitel');
- if (isCaptions) {
- if (!(sw as HTMLInputElement).checked) {
- (sw as HTMLElement).click();
- return { found: true, clicked: true, info: label || tid || nearText.substring(0, 60) };
- }
- return { found: true, clicked: false, info: `already on: ${label || tid}` };
- }
- }
- return { found: false, clicked: false, info: '' };
- });
-
- this._logger.info(`Captions toggle result: ${JSON.stringify(toggleResult)}`);
- if (toggleResult.found && toggleResult.clicked) {
- await this._page.waitForTimeout(1500);
- }
- await this._page.keyboard.press('Escape');
- if (toggleResult.found) return;
-
- this._logger.warn('Language panel opened but no captions toggle found — trying next strategy');
- break;
- }
- } catch {
- // Continue
- }
- }
-
- // ── Strategy 3: "Captions & transcripts" submenu (older Teams) ──
- const submenuSelectors = [
- '[data-tid="captions-and-transcripts-button"]',
- '[role="menuitem"]:has-text("Captions & transcripts")',
- '[role="menuitem"]:has-text("Captions and transcripts")',
- '[role="menuitem"]:has-text("Untertitel und Transkripte")',
- '[role="menuitem"]:has-text("Untertitel")',
- ];
-
- for (const selector of submenuSelectors) {
- try {
- const item = await this._page.$(selector);
- if (item) {
- await item.click();
- this._logger.info(`Clicked captions submenu: ${selector}`);
- await this._page.waitForTimeout(1500);
-
- const enableSelectors = [
- 'button:has-text("Turn on live captions")',
- 'button:has-text("Live captions")',
- 'button:has-text("Live-Untertitel aktivieren")',
- '[role="menuitem"]:has-text("Turn on live captions")',
- '[role="menuitem"]:has-text("Live captions")',
- '[role="menuitemcheckbox"]:has-text("captions")',
- '[data-tid="toggle-captions"]',
- ];
-
- for (const enableSel of enableSelectors) {
- try {
- const enableBtn = await this._page.$(enableSel);
- if (enableBtn) {
- await enableBtn.click();
- this._logger.info(`Clicked enable captions: ${enableSel}`);
- await this._page.waitForTimeout(1000);
- return;
- }
- } catch {
- // Continue
- }
- }
-
- this._logger.info('Opened captions submenu but could not find enable button');
- break;
- }
- } catch {
- // Continue
- }
- }
-
- // ── Strategy 4 (fallback): "Record and transcribe" → "Start transcription" ──
- // Requires transcript panel to be visible for scraping. Only used if live captions failed.
- this._logger.info('Live captions not available, trying transcription fallback...');
-
- // Re-open More menu (previous strategies may have closed it)
- try {
- await this._openMoreMenu();
- } catch {
- this._logger.warn('Could not re-open More menu for transcription fallback');
- }
-
+ // ── Strategy 2: "Record and transcribe" → "Start transcription" + "Show transcript" ──
const recordMenuSelectors = [
'[data-tid="RecordingMenuControl-id"]',
'div[role="menuitem"]:has-text("Record and transcribe")',
@@ -341,7 +214,6 @@ export class CaptionsProcedure {
'[data-tid="transcript-panel-button"]',
'[role="menuitem"]:has-text("Show transcript")',
'[role="menuitem"]:has-text("Transkript anzeigen")',
- '[role="menuitem"]:has-text("Transkript")',
];
for (const showSel of showTranscriptSelectors) {
@@ -358,6 +230,9 @@ export class CaptionsProcedure {
}
}
+ // Close any remaining menu overlay
+ await this._page.keyboard.press('Escape');
+ await this._page.waitForTimeout(500);
return;
}
} catch {
@@ -365,6 +240,123 @@ export class CaptionsProcedure {
}
}
+ // ── Strategy 3: "Captions & transcripts" submenu (older Teams) ──
+ const submenuSelectors = [
+ '[data-tid="captions-and-transcripts-button"]',
+ '[role="menuitem"]:has-text("Captions & transcripts")',
+ '[role="menuitem"]:has-text("Captions and transcripts")',
+ '[role="menuitem"]:has-text("Untertitel und Transkripte")',
+ '[role="menuitem"]:has-text("Untertitel")',
+ ];
+
+ for (const selector of submenuSelectors) {
+ try {
+ const item = await this._page.$(selector);
+ if (item) {
+ await item.click();
+ this._logger.info(`Clicked captions submenu: ${selector}`);
+ await this._page.waitForTimeout(1500);
+
+ const enableSelectors = [
+ 'button:has-text("Turn on live captions")',
+ 'button:has-text("Live captions")',
+ 'button:has-text("Live-Untertitel aktivieren")',
+ '[role="menuitem"]:has-text("Turn on live captions")',
+ '[role="menuitem"]:has-text("Live captions")',
+ '[role="menuitemcheckbox"]:has-text("captions")',
+ '[data-tid="toggle-captions"]',
+ ];
+
+ for (const enableSel of enableSelectors) {
+ try {
+ const enableBtn = await this._page.$(enableSel);
+ if (enableBtn) {
+ await enableBtn.click();
+ this._logger.info(`Clicked enable captions: ${enableSel}`);
+ await this._page.waitForTimeout(1000);
+ return;
+ }
+ } catch {
+ // Continue
+ }
+ }
+
+ this._logger.info('Opened captions submenu but could not find enable button');
+ break;
+ }
+ } catch {
+ // Continue
+ }
+ }
+
+ // ── Strategy 4 (fallback): "Language and speech" panel toggle ──
+ this._logger.info('Trying "Language and speech" as fallback...');
+
+ // Ensure clean menu state: close any open panels/menus first
+ await this._page.keyboard.press('Escape');
+ await this._page.waitForTimeout(500);
+ await this._page.keyboard.press('Escape');
+ await this._page.waitForTimeout(500);
+
+ try {
+ await this._openMoreMenu();
+ } catch {
+ this._logger.warn('Could not re-open More menu for Language and speech fallback');
+ }
+
+ const langSpeechSelectors = [
+ '[data-tid="LanguageSpeechMenuControl-id"]',
+ 'div[role="menuitem"]:has-text("Language and speech")',
+ 'div[role="menuitem"]:has-text("Sprache und Spracheingabe")',
+ ];
+
+ for (const selector of langSpeechSelectors) {
+ try {
+ const item = await this._page.$(selector);
+ if (item) {
+ await item.click();
+ this._logger.info(`Clicked "Language and speech": ${selector}`);
+ await this._page.waitForTimeout(2000);
+
+ const toggleResult = await this._page.evaluate(() => {
+ const switches = document.querySelectorAll(
+ 'input[role="switch"], [role="switch"], input[type="checkbox"]'
+ );
+ for (const sw of Array.from(switches)) {
+ const label = (sw.getAttribute('aria-label') || '').toLowerCase();
+ const tid = (sw.getAttribute('data-tid') || '').toLowerCase();
+ const parentEl = sw.closest('div, label, span') as HTMLElement;
+ const nearText = (parentEl?.textContent || '').toLowerCase();
+ const isCaptions =
+ label.includes('caption') || label.includes('untertitel') ||
+ tid.includes('caption') || tid.includes('subtitle') ||
+ nearText.includes('live caption') || nearText.includes('liveuntertitel');
+ if (isCaptions) {
+ if (!(sw as HTMLInputElement).checked) {
+ (sw as HTMLElement).click();
+ return { found: true, clicked: true, info: label || tid || nearText.substring(0, 60) };
+ }
+ return { found: true, clicked: false, info: `already on: ${label || tid}` };
+ }
+ }
+ return { found: false, clicked: false, info: '' };
+ });
+
+ this._logger.info(`Captions toggle result: ${JSON.stringify(toggleResult)}`);
+ if (toggleResult.found && toggleResult.clicked) {
+ await this._page.waitForTimeout(1500);
+ }
+ await this._page.keyboard.press('Escape');
+ if (toggleResult.found) return;
+
+ this._logger.warn('Language panel opened but no captions toggle found');
+ break;
+ }
+ } catch {
+ // Continue
+ }
+ }
+
// ── Strategy 5: DOM scan for anything containing "caption" / "transcri" ──
const found = await this._page.evaluate(() => {
const keywords = ['caption', 'captions', 'untertitel', 'live caption', 'transcri', 'transkri'];
@@ -1122,6 +1114,18 @@ export class CaptionsProcedure {
return false;
}
+ // ── Noise filter: skip elements that are clearly NOT captions/transcript ──
+ const _noisePatterns = [
+ 'meeting ended', 'meeting started', 'was invited', 'left the chat',
+ 'doesn\'t have a teams account', 'new notification', 'is typing',
+ 'last read', 'verify their identity', 'left the meeting',
+ 'joined the meeting', 'apply and restart',
+ ];
+ function _isNoise(text: string): boolean {
+ const lower = text.toLowerCase();
+ return _noisePatterns.some(p => lower.includes(p));
+ }
+
// ── Combined handler for mutation observer ──
function _handleAddedNode(node: Node): void {
if (node.nodeType !== Node.ELEMENT_NODE) return;
@@ -1131,6 +1135,13 @@ export class CaptionsProcedure {
const text = el.innerText?.trim();
if (!text || text.length < 2) return;
+ // Skip noise (chat history, notifications, system messages)
+ if (_isNoise(text)) return;
+
+ // Skip elements from the chat area (data-tid="typing-indicator" etc.)
+ const tid = el.getAttribute('data-tid') || '';
+ if (tid === 'typing-indicator') return;
+
// Try caption extraction first (anonymous UI)
if (_extractCaption(el)) return;
diff --git a/src/bot/chatProcedure.ts b/src/bot/chatProcedure.ts
index 23620dc..2710ea1 100644
--- a/src/bot/chatProcedure.ts
+++ b/src/bot/chatProcedure.ts
@@ -86,7 +86,7 @@ export class ChatProcedure {
this._isSubscribed = true;
this._logger.info('Subscribing to chat messages...');
- // Expose callback from Node.js to browser
+ // Expose callbacks from Node.js to browser
try {
await this._page.exposeFunction('__onChatMessageEvent', (msg: {
speaker: string;
@@ -97,16 +97,42 @@ export class ChatProcedure {
});
} catch {
// Function may already be exposed from a previous subscription
- this._logger.debug('__onChatMessageEvent already exposed');
+ }
+
+ try {
+ await this._page.exposeFunction('__onChatDebug', (info: {
+ tag: string;
+ tid: string;
+ text: string;
+ children: number;
+ html: string;
+ }) => {
+ this._logger.info(`ChatDOM: <${info.tag} data-tid="${info.tid}"> children=${info.children}, text="${info.text.substring(0, 120)}"`);
+ });
+ } catch {
+ // Already exposed
}
// Find chat container and set up observer
const chatObserverTarget = await this._page.evaluate(() => {
- function _extractChatMessage(el: HTMLElement): void {
+ // Noise patterns: system messages, not actual chat
+ const noisePatterns = [
+ 'meeting ended', 'meeting started', 'was invited', 'left the chat',
+ 'joined the meeting', 'left the meeting', 'doesn\'t have a teams account',
+ 'verify their identity', 'new notification', 'last read',
+ ];
+ function _isNoise(text: string): boolean {
+ const lower = text.toLowerCase();
+ return noisePatterns.some(p => lower.includes(p));
+ }
+
+ function _extractChatMessage(el: HTMLElement): boolean {
+ // Strategy 1: Standard selectors
const messageSelectors = [
'[data-tid="chat-message"]',
'.fui-ChatMessage',
'[data-tid*="message-body"]',
+ '[data-tid*="chat-pane-message"]',
];
let messageEl: HTMLElement | null = null;
@@ -114,45 +140,86 @@ export class ChatProcedure {
messageEl = el.matches?.(sel) ? el : el.querySelector(sel);
if (messageEl) break;
}
- if (!messageEl) return;
- // Extract author
- const authorSelectors = [
- '[data-tid="message-author"]',
- '[data-tid="message-author-name"]',
- '.fui-ChatMessage__author',
- ];
- let author = 'Unknown';
- for (const sel of authorSelectors) {
- const authorEl = messageEl.querySelector(sel) || el.querySelector(sel);
- if (authorEl?.textContent) {
- author = authorEl.textContent.trim();
- break;
+ if (messageEl) {
+ const authorSelectors = [
+ '[data-tid="message-author"]',
+ '[data-tid="message-author-name"]',
+ '.fui-ChatMessage__author',
+ '[data-tid*="author"]',
+ ];
+ let author = 'Unknown';
+ for (const sel of authorSelectors) {
+ const authorEl = messageEl.querySelector(sel) || el.querySelector(sel);
+ if (authorEl?.textContent) {
+ author = authorEl.textContent.trim();
+ break;
+ }
+ }
+
+ const bodySelectors = [
+ '[data-tid="message-body"]',
+ '.fui-ChatMessage__body',
+ '[data-tid="chat-message-text"]',
+ '[data-tid*="message-body"]',
+ ];
+ let text = '';
+ for (const sel of bodySelectors) {
+ const bodyEl = messageEl.querySelector(sel) || el.querySelector(sel);
+ if (bodyEl) {
+ text = (bodyEl as HTMLElement).innerText?.trim() || '';
+ break;
+ }
+ }
+
+ if (text && text.length > 0) {
+ (window as any).__onChatMessageEvent({
+ speaker: author,
+ text,
+ timestamp: new Date().toISOString(),
+ });
+ return true;
}
}
- // Extract text
- const bodySelectors = [
- '[data-tid="message-body"]',
- '.fui-ChatMessage__body',
- '[data-tid="chat-message-text"]',
- ];
- let text = '';
- for (const sel of bodySelectors) {
- const bodyEl = messageEl.querySelector(sel) || el.querySelector(sel);
- if (bodyEl) {
- text = (bodyEl as HTMLElement).innerText?.trim() || '';
- break;
+ // Strategy 2: Structural fallback for authenticated Teams chat
+ // Chat messages typically have: author element + body element as children
+ const fullText = el.innerText?.trim() || '';
+ if (!fullText || fullText.length < 2 || _isNoise(fullText)) return false;
+
+ // Skip typing indicators, system messages
+ const tid = el.getAttribute('data-tid') || '';
+ if (tid === 'typing-indicator') return false;
+
+ // Look for elements that look like user messages (have author-like + body-like children)
+ const children = Array.from(el.children) as HTMLElement[];
+ if (children.length >= 2) {
+ // Find an element that looks like a name (short text, no data-tid with "body")
+ for (let i = 0; i < children.length - 1; i++) {
+ const candidateName = children[i].innerText?.trim() || '';
+ const candidateBody = children.slice(i + 1).map(c => c.innerText?.trim()).filter(Boolean).join(' ').trim();
+
+ if (
+ candidateName.length > 1 && candidateName.length < 60 &&
+ candidateBody.length > 1 &&
+ !_isNoise(candidateBody) &&
+ !candidateName.includes('meeting') && !candidateName.includes('Meeting')
+ ) {
+ // Check if this looks like a time-stamped message (not just any two children)
+ const hasTid = children[i].getAttribute('data-tid') || '';
+ if (hasTid.includes('author') || hasTid.includes('name') || hasTid.includes('sender')) {
+ (window as any).__onChatMessageEvent({
+ speaker: candidateName,
+ text: candidateBody,
+ timestamp: new Date().toISOString(),
+ });
+ return true;
+ }
+ }
}
}
- if (text && text.length > 0) {
- (window as any).__onChatMessageEvent({
- speaker: author,
- text,
- timestamp: new Date().toISOString(),
- });
- }
+ return false;
}
// Teams chat containers - try multiple selectors
@@ -165,15 +232,25 @@ export class ChatProcedure {
];
let chatContainer: Element | null = null;
+ let matchedSelector = '';
for (const sel of chatContainerSelectors) {
chatContainer = document.querySelector(sel);
- if (chatContainer) break;
+ if (chatContainer) {
+ matchedSelector = sel;
+ break;
+ }
}
if (!chatContainer) {
const candidates = document.querySelectorAll('[data-tid*="chat"], [data-tid*="message"]');
- if (candidates.length > 0) {
- chatContainer = candidates[0];
+ for (const c of Array.from(candidates)) {
+ const cTid = c.getAttribute('data-tid') || '';
+ // Prefer larger containers, not buttons or small elements
+ if ((c as HTMLElement).offsetHeight > 50 && c.tagName !== 'BUTTON') {
+ chatContainer = c;
+ matchedSelector = `[data-tid="${cTid}"]`;
+ break;
+ }
}
}
@@ -185,7 +262,25 @@ export class ChatProcedure {
if (mutation.type === 'childList') {
mutation.addedNodes.forEach((node) => {
if (node.nodeType !== Node.ELEMENT_NODE) return;
- _extractChatMessage(node as HTMLElement);
+ const el = node as HTMLElement;
+ const text = el.innerText?.trim() || '';
+ if (!text || text.length < 2) return;
+
+ if (!_extractChatMessage(el)) {
+ // Log unrecognized elements for debugging (skip noise)
+ if (!_isNoise(text) && text.length > 3) {
+ const tid = el.getAttribute('data-tid') || '';
+ if (tid !== 'typing-indicator') {
+ (window as any).__onChatDebug?.({
+ tag: el.tagName,
+ tid,
+ text: text.substring(0, 200),
+ children: el.children?.length || 0,
+ html: el.innerHTML?.substring(0, 500) || '',
+ });
+ }
+ }
+ }
});
}
}
@@ -194,7 +289,7 @@ export class ChatProcedure {
observer.observe(target, { childList: true, subtree: true });
(window as any).__chatObserver = observer;
- return chatContainer ? 'container' : 'body-fallback';
+ return chatContainer ? `container:${matchedSelector}` : 'body-fallback';
});
this._logger.info(`Chat MutationObserver set up (target: ${chatObserverTarget})`);