From b0dffb49dda91b0496c8e6138eee13e9a64c25d8 Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Tue, 17 Feb 2026 23:01:00 +0100 Subject: [PATCH] fix: revert to Record+transcribe as primary, add noise filter for body fallback, fix chat debug logging Co-authored-by: Cursor --- src/bot/captionsProcedure.ts | 277 ++++++++++++++++++----------------- src/bot/chatProcedure.ts | 175 +++++++++++++++++----- 2 files changed, 279 insertions(+), 173 deletions(-) diff --git a/src/bot/captionsProcedure.ts b/src/bot/captionsProcedure.ts index b0799fb..b98d34a 100644 --- a/src/bot/captionsProcedure.ts +++ b/src/bot/captionsProcedure.ts @@ -107,10 +107,11 @@ export class CaptionsProcedure { * * Strategies in priority order: * 1. Direct captions button (anonymous / light-meetings UI) - * 2. "Language and speech" → live captions toggle (authenticated, no panel needed) - * 3. "Captions & transcripts" submenu (older authenticated Teams) - * 4. "Record and transcribe" → "Start transcription" (authenticated, fallback with panel) + * 2. "Record and transcribe" → "Start transcription" (authenticated Teams 2025+) * → triggers spoken-language-selection-dialog handled by _handleLanguageDialog() + * → then "Show transcript" to open scraping panel + * 3. "Captions & transcripts" submenu (older authenticated Teams) + * 4. "Language and speech" panel toggle (fallback) * 5. Generic text / DOM scan fallback */ private async _clickEnableCaptions(): Promise { @@ -137,135 +138,7 @@ export class CaptionsProcedure { } } - // ── Strategy 2: "Language and speech" → live captions toggle (no panel) ── - // Preferred for authenticated joins: enables caption overlay at bottom (same as anonymous) - const langSpeechSelectors = [ - '[data-tid="LanguageSpeechMenuControl-id"]', - 'div[role="menuitem"]:has-text("Language and speech")', - 'div[role="menuitem"]:has-text("Sprache und Spracheingabe")', - ]; - - for (const selector of langSpeechSelectors) { - try { - const item = await this._page.$(selector); - if (item) { - await item.click(); - this._logger.info(`Clicked "Language and speech": ${selector}`); - await this._page.waitForTimeout(2000); - - const panelToggles = await this._page.evaluate(() => { - const switches = document.querySelectorAll( - 'input[role="switch"], [role="switch"], input[type="checkbox"]' - ); - return Array.from(switches).map(s => ({ - tid: s.getAttribute('data-tid') || '', - label: s.getAttribute('aria-label') || '', - checked: (s as HTMLInputElement).checked, - nearText: ((s.closest('div, label') as HTMLElement)?.textContent || '') - .trim().substring(0, 80), - })); - }); - this._logger.info(`Panel toggles: ${JSON.stringify(panelToggles)}`); - - const toggleResult = await this._page.evaluate(() => { - const switches = document.querySelectorAll( - 'input[role="switch"], [role="switch"], input[type="checkbox"]' - ); - for (const sw of Array.from(switches)) { - const label = (sw.getAttribute('aria-label') || '').toLowerCase(); - const tid = (sw.getAttribute('data-tid') || '').toLowerCase(); - const parentEl = sw.closest('div, label, span') as HTMLElement; - const nearText = (parentEl?.textContent || '').toLowerCase(); - const isCaptions = - label.includes('caption') || label.includes('untertitel') || - tid.includes('caption') || tid.includes('subtitle') || - nearText.includes('live caption') || nearText.includes('liveuntertitel'); - if (isCaptions) { - if (!(sw as HTMLInputElement).checked) { - (sw as HTMLElement).click(); - return { found: true, clicked: true, info: label || tid || nearText.substring(0, 60) }; - } - return { found: true, clicked: false, info: `already on: ${label || tid}` }; - } - } - return { found: false, clicked: false, info: '' }; - }); - - this._logger.info(`Captions toggle result: ${JSON.stringify(toggleResult)}`); - if (toggleResult.found && toggleResult.clicked) { - await this._page.waitForTimeout(1500); - } - await this._page.keyboard.press('Escape'); - if (toggleResult.found) return; - - this._logger.warn('Language panel opened but no captions toggle found — trying next strategy'); - break; - } - } catch { - // Continue - } - } - - // ── Strategy 3: "Captions & transcripts" submenu (older Teams) ── - const submenuSelectors = [ - '[data-tid="captions-and-transcripts-button"]', - '[role="menuitem"]:has-text("Captions & transcripts")', - '[role="menuitem"]:has-text("Captions and transcripts")', - '[role="menuitem"]:has-text("Untertitel und Transkripte")', - '[role="menuitem"]:has-text("Untertitel")', - ]; - - for (const selector of submenuSelectors) { - try { - const item = await this._page.$(selector); - if (item) { - await item.click(); - this._logger.info(`Clicked captions submenu: ${selector}`); - await this._page.waitForTimeout(1500); - - const enableSelectors = [ - 'button:has-text("Turn on live captions")', - 'button:has-text("Live captions")', - 'button:has-text("Live-Untertitel aktivieren")', - '[role="menuitem"]:has-text("Turn on live captions")', - '[role="menuitem"]:has-text("Live captions")', - '[role="menuitemcheckbox"]:has-text("captions")', - '[data-tid="toggle-captions"]', - ]; - - for (const enableSel of enableSelectors) { - try { - const enableBtn = await this._page.$(enableSel); - if (enableBtn) { - await enableBtn.click(); - this._logger.info(`Clicked enable captions: ${enableSel}`); - await this._page.waitForTimeout(1000); - return; - } - } catch { - // Continue - } - } - - this._logger.info('Opened captions submenu but could not find enable button'); - break; - } - } catch { - // Continue - } - } - - // ── Strategy 4 (fallback): "Record and transcribe" → "Start transcription" ── - // Requires transcript panel to be visible for scraping. Only used if live captions failed. - this._logger.info('Live captions not available, trying transcription fallback...'); - - // Re-open More menu (previous strategies may have closed it) - try { - await this._openMoreMenu(); - } catch { - this._logger.warn('Could not re-open More menu for transcription fallback'); - } - + // ── Strategy 2: "Record and transcribe" → "Start transcription" + "Show transcript" ── const recordMenuSelectors = [ '[data-tid="RecordingMenuControl-id"]', 'div[role="menuitem"]:has-text("Record and transcribe")', @@ -341,7 +214,6 @@ export class CaptionsProcedure { '[data-tid="transcript-panel-button"]', '[role="menuitem"]:has-text("Show transcript")', '[role="menuitem"]:has-text("Transkript anzeigen")', - '[role="menuitem"]:has-text("Transkript")', ]; for (const showSel of showTranscriptSelectors) { @@ -358,6 +230,9 @@ export class CaptionsProcedure { } } + // Close any remaining menu overlay + await this._page.keyboard.press('Escape'); + await this._page.waitForTimeout(500); return; } } catch { @@ -365,6 +240,123 @@ export class CaptionsProcedure { } } + // ── Strategy 3: "Captions & transcripts" submenu (older Teams) ── + const submenuSelectors = [ + '[data-tid="captions-and-transcripts-button"]', + '[role="menuitem"]:has-text("Captions & transcripts")', + '[role="menuitem"]:has-text("Captions and transcripts")', + '[role="menuitem"]:has-text("Untertitel und Transkripte")', + '[role="menuitem"]:has-text("Untertitel")', + ]; + + for (const selector of submenuSelectors) { + try { + const item = await this._page.$(selector); + if (item) { + await item.click(); + this._logger.info(`Clicked captions submenu: ${selector}`); + await this._page.waitForTimeout(1500); + + const enableSelectors = [ + 'button:has-text("Turn on live captions")', + 'button:has-text("Live captions")', + 'button:has-text("Live-Untertitel aktivieren")', + '[role="menuitem"]:has-text("Turn on live captions")', + '[role="menuitem"]:has-text("Live captions")', + '[role="menuitemcheckbox"]:has-text("captions")', + '[data-tid="toggle-captions"]', + ]; + + for (const enableSel of enableSelectors) { + try { + const enableBtn = await this._page.$(enableSel); + if (enableBtn) { + await enableBtn.click(); + this._logger.info(`Clicked enable captions: ${enableSel}`); + await this._page.waitForTimeout(1000); + return; + } + } catch { + // Continue + } + } + + this._logger.info('Opened captions submenu but could not find enable button'); + break; + } + } catch { + // Continue + } + } + + // ── Strategy 4 (fallback): "Language and speech" panel toggle ── + this._logger.info('Trying "Language and speech" as fallback...'); + + // Ensure clean menu state: close any open panels/menus first + await this._page.keyboard.press('Escape'); + await this._page.waitForTimeout(500); + await this._page.keyboard.press('Escape'); + await this._page.waitForTimeout(500); + + try { + await this._openMoreMenu(); + } catch { + this._logger.warn('Could not re-open More menu for Language and speech fallback'); + } + + const langSpeechSelectors = [ + '[data-tid="LanguageSpeechMenuControl-id"]', + 'div[role="menuitem"]:has-text("Language and speech")', + 'div[role="menuitem"]:has-text("Sprache und Spracheingabe")', + ]; + + for (const selector of langSpeechSelectors) { + try { + const item = await this._page.$(selector); + if (item) { + await item.click(); + this._logger.info(`Clicked "Language and speech": ${selector}`); + await this._page.waitForTimeout(2000); + + const toggleResult = await this._page.evaluate(() => { + const switches = document.querySelectorAll( + 'input[role="switch"], [role="switch"], input[type="checkbox"]' + ); + for (const sw of Array.from(switches)) { + const label = (sw.getAttribute('aria-label') || '').toLowerCase(); + const tid = (sw.getAttribute('data-tid') || '').toLowerCase(); + const parentEl = sw.closest('div, label, span') as HTMLElement; + const nearText = (parentEl?.textContent || '').toLowerCase(); + const isCaptions = + label.includes('caption') || label.includes('untertitel') || + tid.includes('caption') || tid.includes('subtitle') || + nearText.includes('live caption') || nearText.includes('liveuntertitel'); + if (isCaptions) { + if (!(sw as HTMLInputElement).checked) { + (sw as HTMLElement).click(); + return { found: true, clicked: true, info: label || tid || nearText.substring(0, 60) }; + } + return { found: true, clicked: false, info: `already on: ${label || tid}` }; + } + } + return { found: false, clicked: false, info: '' }; + }); + + this._logger.info(`Captions toggle result: ${JSON.stringify(toggleResult)}`); + if (toggleResult.found && toggleResult.clicked) { + await this._page.waitForTimeout(1500); + } + await this._page.keyboard.press('Escape'); + if (toggleResult.found) return; + + this._logger.warn('Language panel opened but no captions toggle found'); + break; + } + } catch { + // Continue + } + } + // ── Strategy 5: DOM scan for anything containing "caption" / "transcri" ── const found = await this._page.evaluate(() => { const keywords = ['caption', 'captions', 'untertitel', 'live caption', 'transcri', 'transkri']; @@ -1122,6 +1114,18 @@ export class CaptionsProcedure { return false; } + // ── Noise filter: skip elements that are clearly NOT captions/transcript ── + const _noisePatterns = [ + 'meeting ended', 'meeting started', 'was invited', 'left the chat', + 'doesn\'t have a teams account', 'new notification', 'is typing', + 'last read', 'verify their identity', 'left the meeting', + 'joined the meeting', 'apply and restart', + ]; + function _isNoise(text: string): boolean { + const lower = text.toLowerCase(); + return _noisePatterns.some(p => lower.includes(p)); + } + // ── Combined handler for mutation observer ── function _handleAddedNode(node: Node): void { if (node.nodeType !== Node.ELEMENT_NODE) return; @@ -1131,6 +1135,13 @@ export class CaptionsProcedure { const text = el.innerText?.trim(); if (!text || text.length < 2) return; + // Skip noise (chat history, notifications, system messages) + if (_isNoise(text)) return; + + // Skip elements from the chat area (data-tid="typing-indicator" etc.) + const tid = el.getAttribute('data-tid') || ''; + if (tid === 'typing-indicator') return; + // Try caption extraction first (anonymous UI) if (_extractCaption(el)) return; diff --git a/src/bot/chatProcedure.ts b/src/bot/chatProcedure.ts index 23620dc..2710ea1 100644 --- a/src/bot/chatProcedure.ts +++ b/src/bot/chatProcedure.ts @@ -86,7 +86,7 @@ export class ChatProcedure { this._isSubscribed = true; this._logger.info('Subscribing to chat messages...'); - // Expose callback from Node.js to browser + // Expose callbacks from Node.js to browser try { await this._page.exposeFunction('__onChatMessageEvent', (msg: { speaker: string; @@ -97,16 +97,42 @@ export class ChatProcedure { }); } catch { // Function may already be exposed from a previous subscription - this._logger.debug('__onChatMessageEvent already exposed'); + } + + try { + await this._page.exposeFunction('__onChatDebug', (info: { + tag: string; + tid: string; + text: string; + children: number; + html: string; + }) => { + this._logger.info(`ChatDOM: <${info.tag} data-tid="${info.tid}"> children=${info.children}, text="${info.text.substring(0, 120)}"`); + }); + } catch { + // Already exposed } // Find chat container and set up observer const chatObserverTarget = await this._page.evaluate(() => { - function _extractChatMessage(el: HTMLElement): void { + // Noise patterns: system messages, not actual chat + const noisePatterns = [ + 'meeting ended', 'meeting started', 'was invited', 'left the chat', + 'joined the meeting', 'left the meeting', 'doesn\'t have a teams account', + 'verify their identity', 'new notification', 'last read', + ]; + function _isNoise(text: string): boolean { + const lower = text.toLowerCase(); + return noisePatterns.some(p => lower.includes(p)); + } + + function _extractChatMessage(el: HTMLElement): boolean { + // Strategy 1: Standard selectors const messageSelectors = [ '[data-tid="chat-message"]', '.fui-ChatMessage', '[data-tid*="message-body"]', + '[data-tid*="chat-pane-message"]', ]; let messageEl: HTMLElement | null = null; @@ -114,45 +140,86 @@ export class ChatProcedure { messageEl = el.matches?.(sel) ? el : el.querySelector(sel); if (messageEl) break; } - if (!messageEl) return; - // Extract author - const authorSelectors = [ - '[data-tid="message-author"]', - '[data-tid="message-author-name"]', - '.fui-ChatMessage__author', - ]; - let author = 'Unknown'; - for (const sel of authorSelectors) { - const authorEl = messageEl.querySelector(sel) || el.querySelector(sel); - if (authorEl?.textContent) { - author = authorEl.textContent.trim(); - break; + if (messageEl) { + const authorSelectors = [ + '[data-tid="message-author"]', + '[data-tid="message-author-name"]', + '.fui-ChatMessage__author', + '[data-tid*="author"]', + ]; + let author = 'Unknown'; + for (const sel of authorSelectors) { + const authorEl = messageEl.querySelector(sel) || el.querySelector(sel); + if (authorEl?.textContent) { + author = authorEl.textContent.trim(); + break; + } + } + + const bodySelectors = [ + '[data-tid="message-body"]', + '.fui-ChatMessage__body', + '[data-tid="chat-message-text"]', + '[data-tid*="message-body"]', + ]; + let text = ''; + for (const sel of bodySelectors) { + const bodyEl = messageEl.querySelector(sel) || el.querySelector(sel); + if (bodyEl) { + text = (bodyEl as HTMLElement).innerText?.trim() || ''; + break; + } + } + + if (text && text.length > 0) { + (window as any).__onChatMessageEvent({ + speaker: author, + text, + timestamp: new Date().toISOString(), + }); + return true; } } - // Extract text - const bodySelectors = [ - '[data-tid="message-body"]', - '.fui-ChatMessage__body', - '[data-tid="chat-message-text"]', - ]; - let text = ''; - for (const sel of bodySelectors) { - const bodyEl = messageEl.querySelector(sel) || el.querySelector(sel); - if (bodyEl) { - text = (bodyEl as HTMLElement).innerText?.trim() || ''; - break; + // Strategy 2: Structural fallback for authenticated Teams chat + // Chat messages typically have: author element + body element as children + const fullText = el.innerText?.trim() || ''; + if (!fullText || fullText.length < 2 || _isNoise(fullText)) return false; + + // Skip typing indicators, system messages + const tid = el.getAttribute('data-tid') || ''; + if (tid === 'typing-indicator') return false; + + // Look for elements that look like user messages (have author-like + body-like children) + const children = Array.from(el.children) as HTMLElement[]; + if (children.length >= 2) { + // Find an element that looks like a name (short text, no data-tid with "body") + for (let i = 0; i < children.length - 1; i++) { + const candidateName = children[i].innerText?.trim() || ''; + const candidateBody = children.slice(i + 1).map(c => c.innerText?.trim()).filter(Boolean).join(' ').trim(); + + if ( + candidateName.length > 1 && candidateName.length < 60 && + candidateBody.length > 1 && + !_isNoise(candidateBody) && + !candidateName.includes('meeting') && !candidateName.includes('Meeting') + ) { + // Check if this looks like a time-stamped message (not just any two children) + const hasTid = children[i].getAttribute('data-tid') || ''; + if (hasTid.includes('author') || hasTid.includes('name') || hasTid.includes('sender')) { + (window as any).__onChatMessageEvent({ + speaker: candidateName, + text: candidateBody, + timestamp: new Date().toISOString(), + }); + return true; + } + } } } - if (text && text.length > 0) { - (window as any).__onChatMessageEvent({ - speaker: author, - text, - timestamp: new Date().toISOString(), - }); - } + return false; } // Teams chat containers - try multiple selectors @@ -165,15 +232,25 @@ export class ChatProcedure { ]; let chatContainer: Element | null = null; + let matchedSelector = ''; for (const sel of chatContainerSelectors) { chatContainer = document.querySelector(sel); - if (chatContainer) break; + if (chatContainer) { + matchedSelector = sel; + break; + } } if (!chatContainer) { const candidates = document.querySelectorAll('[data-tid*="chat"], [data-tid*="message"]'); - if (candidates.length > 0) { - chatContainer = candidates[0]; + for (const c of Array.from(candidates)) { + const cTid = c.getAttribute('data-tid') || ''; + // Prefer larger containers, not buttons or small elements + if ((c as HTMLElement).offsetHeight > 50 && c.tagName !== 'BUTTON') { + chatContainer = c; + matchedSelector = `[data-tid="${cTid}"]`; + break; + } } } @@ -185,7 +262,25 @@ export class ChatProcedure { if (mutation.type === 'childList') { mutation.addedNodes.forEach((node) => { if (node.nodeType !== Node.ELEMENT_NODE) return; - _extractChatMessage(node as HTMLElement); + const el = node as HTMLElement; + const text = el.innerText?.trim() || ''; + if (!text || text.length < 2) return; + + if (!_extractChatMessage(el)) { + // Log unrecognized elements for debugging (skip noise) + if (!_isNoise(text) && text.length > 3) { + const tid = el.getAttribute('data-tid') || ''; + if (tid !== 'typing-indicator') { + (window as any).__onChatDebug?.({ + tag: el.tagName, + tid, + text: text.substring(0, 200), + children: el.children?.length || 0, + html: el.innerHTML?.substring(0, 500) || '', + }); + } + } + } }); } } @@ -194,7 +289,7 @@ export class ChatProcedure { observer.observe(target, { childList: true, subtree: true }); (window as any).__chatObserver = observer; - return chatContainer ? 'container' : 'body-fallback'; + return chatContainer ? `container:${matchedSelector}` : 'body-fallback'; }); this._logger.info(`Chat MutationObserver set up (target: ${chatObserverTarget})`);