fix: revert to Record+transcribe as primary, add noise filter for body fallback, fix chat debug logging

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
ValueOn AG 2026-02-17 23:01:00 +01:00
parent 80747d4aac
commit b0dffb49dd
2 changed files with 279 additions and 173 deletions

View file

@ -107,10 +107,11 @@ export class CaptionsProcedure {
*
* Strategies in priority order:
* 1. Direct captions button (anonymous / light-meetings UI)
* 2. "Language and speech" live captions toggle (authenticated, no panel needed)
* 3. "Captions & transcripts" submenu (older authenticated Teams)
* 4. "Record and transcribe" "Start transcription" (authenticated, fallback with panel)
* 2. "Record and transcribe" "Start transcription" (authenticated Teams 2025+)
* triggers spoken-language-selection-dialog handled by _handleLanguageDialog()
* then "Show transcript" to open scraping panel
* 3. "Captions & transcripts" submenu (older authenticated Teams)
* 4. "Language and speech" panel toggle (fallback)
* 5. Generic text / DOM scan fallback
*/
private async _clickEnableCaptions(): Promise<void> {
@ -137,135 +138,7 @@ export class CaptionsProcedure {
}
}
// ── Strategy 2: "Language and speech" → live captions toggle (no panel) ──
// Preferred for authenticated joins: enables caption overlay at bottom (same as anonymous)
const langSpeechSelectors = [
'[data-tid="LanguageSpeechMenuControl-id"]',
'div[role="menuitem"]:has-text("Language and speech")',
'div[role="menuitem"]:has-text("Sprache und Spracheingabe")',
];
for (const selector of langSpeechSelectors) {
try {
const item = await this._page.$(selector);
if (item) {
await item.click();
this._logger.info(`Clicked "Language and speech": ${selector}`);
await this._page.waitForTimeout(2000);
const panelToggles = await this._page.evaluate(() => {
const switches = document.querySelectorAll(
'input[role="switch"], [role="switch"], input[type="checkbox"]'
);
return Array.from(switches).map(s => ({
tid: s.getAttribute('data-tid') || '',
label: s.getAttribute('aria-label') || '',
checked: (s as HTMLInputElement).checked,
nearText: ((s.closest('div, label') as HTMLElement)?.textContent || '')
.trim().substring(0, 80),
}));
});
this._logger.info(`Panel toggles: ${JSON.stringify(panelToggles)}`);
const toggleResult = await this._page.evaluate(() => {
const switches = document.querySelectorAll(
'input[role="switch"], [role="switch"], input[type="checkbox"]'
);
for (const sw of Array.from(switches)) {
const label = (sw.getAttribute('aria-label') || '').toLowerCase();
const tid = (sw.getAttribute('data-tid') || '').toLowerCase();
const parentEl = sw.closest('div, label, span') as HTMLElement;
const nearText = (parentEl?.textContent || '').toLowerCase();
const isCaptions =
label.includes('caption') || label.includes('untertitel') ||
tid.includes('caption') || tid.includes('subtitle') ||
nearText.includes('live caption') || nearText.includes('liveuntertitel');
if (isCaptions) {
if (!(sw as HTMLInputElement).checked) {
(sw as HTMLElement).click();
return { found: true, clicked: true, info: label || tid || nearText.substring(0, 60) };
}
return { found: true, clicked: false, info: `already on: ${label || tid}` };
}
}
return { found: false, clicked: false, info: '' };
});
this._logger.info(`Captions toggle result: ${JSON.stringify(toggleResult)}`);
if (toggleResult.found && toggleResult.clicked) {
await this._page.waitForTimeout(1500);
}
await this._page.keyboard.press('Escape');
if (toggleResult.found) return;
this._logger.warn('Language panel opened but no captions toggle found — trying next strategy');
break;
}
} catch {
// Continue
}
}
// ── Strategy 3: "Captions & transcripts" submenu (older Teams) ──
const submenuSelectors = [
'[data-tid="captions-and-transcripts-button"]',
'[role="menuitem"]:has-text("Captions & transcripts")',
'[role="menuitem"]:has-text("Captions and transcripts")',
'[role="menuitem"]:has-text("Untertitel und Transkripte")',
'[role="menuitem"]:has-text("Untertitel")',
];
for (const selector of submenuSelectors) {
try {
const item = await this._page.$(selector);
if (item) {
await item.click();
this._logger.info(`Clicked captions submenu: ${selector}`);
await this._page.waitForTimeout(1500);
const enableSelectors = [
'button:has-text("Turn on live captions")',
'button:has-text("Live captions")',
'button:has-text("Live-Untertitel aktivieren")',
'[role="menuitem"]:has-text("Turn on live captions")',
'[role="menuitem"]:has-text("Live captions")',
'[role="menuitemcheckbox"]:has-text("captions")',
'[data-tid="toggle-captions"]',
];
for (const enableSel of enableSelectors) {
try {
const enableBtn = await this._page.$(enableSel);
if (enableBtn) {
await enableBtn.click();
this._logger.info(`Clicked enable captions: ${enableSel}`);
await this._page.waitForTimeout(1000);
return;
}
} catch {
// Continue
}
}
this._logger.info('Opened captions submenu but could not find enable button');
break;
}
} catch {
// Continue
}
}
// ── Strategy 4 (fallback): "Record and transcribe" → "Start transcription" ──
// Requires transcript panel to be visible for scraping. Only used if live captions failed.
this._logger.info('Live captions not available, trying transcription fallback...');
// Re-open More menu (previous strategies may have closed it)
try {
await this._openMoreMenu();
} catch {
this._logger.warn('Could not re-open More menu for transcription fallback');
}
// ── Strategy 2: "Record and transcribe" → "Start transcription" + "Show transcript" ──
const recordMenuSelectors = [
'[data-tid="RecordingMenuControl-id"]',
'div[role="menuitem"]:has-text("Record and transcribe")',
@ -341,7 +214,6 @@ export class CaptionsProcedure {
'[data-tid="transcript-panel-button"]',
'[role="menuitem"]:has-text("Show transcript")',
'[role="menuitem"]:has-text("Transkript anzeigen")',
'[role="menuitem"]:has-text("Transkript")',
];
for (const showSel of showTranscriptSelectors) {
@ -358,6 +230,9 @@ export class CaptionsProcedure {
}
}
// Close any remaining menu overlay
await this._page.keyboard.press('Escape');
await this._page.waitForTimeout(500);
return;
}
} catch {
@ -365,6 +240,123 @@ export class CaptionsProcedure {
}
}
// ── Strategy 3: "Captions & transcripts" submenu (older Teams) ──
const submenuSelectors = [
'[data-tid="captions-and-transcripts-button"]',
'[role="menuitem"]:has-text("Captions & transcripts")',
'[role="menuitem"]:has-text("Captions and transcripts")',
'[role="menuitem"]:has-text("Untertitel und Transkripte")',
'[role="menuitem"]:has-text("Untertitel")',
];
for (const selector of submenuSelectors) {
try {
const item = await this._page.$(selector);
if (item) {
await item.click();
this._logger.info(`Clicked captions submenu: ${selector}`);
await this._page.waitForTimeout(1500);
const enableSelectors = [
'button:has-text("Turn on live captions")',
'button:has-text("Live captions")',
'button:has-text("Live-Untertitel aktivieren")',
'[role="menuitem"]:has-text("Turn on live captions")',
'[role="menuitem"]:has-text("Live captions")',
'[role="menuitemcheckbox"]:has-text("captions")',
'[data-tid="toggle-captions"]',
];
for (const enableSel of enableSelectors) {
try {
const enableBtn = await this._page.$(enableSel);
if (enableBtn) {
await enableBtn.click();
this._logger.info(`Clicked enable captions: ${enableSel}`);
await this._page.waitForTimeout(1000);
return;
}
} catch {
// Continue
}
}
this._logger.info('Opened captions submenu but could not find enable button');
break;
}
} catch {
// Continue
}
}
// ── Strategy 4 (fallback): "Language and speech" panel toggle ──
this._logger.info('Trying "Language and speech" as fallback...');
// Ensure clean menu state: close any open panels/menus first
await this._page.keyboard.press('Escape');
await this._page.waitForTimeout(500);
await this._page.keyboard.press('Escape');
await this._page.waitForTimeout(500);
try {
await this._openMoreMenu();
} catch {
this._logger.warn('Could not re-open More menu for Language and speech fallback');
}
const langSpeechSelectors = [
'[data-tid="LanguageSpeechMenuControl-id"]',
'div[role="menuitem"]:has-text("Language and speech")',
'div[role="menuitem"]:has-text("Sprache und Spracheingabe")',
];
for (const selector of langSpeechSelectors) {
try {
const item = await this._page.$(selector);
if (item) {
await item.click();
this._logger.info(`Clicked "Language and speech": ${selector}`);
await this._page.waitForTimeout(2000);
const toggleResult = await this._page.evaluate(() => {
const switches = document.querySelectorAll(
'input[role="switch"], [role="switch"], input[type="checkbox"]'
);
for (const sw of Array.from(switches)) {
const label = (sw.getAttribute('aria-label') || '').toLowerCase();
const tid = (sw.getAttribute('data-tid') || '').toLowerCase();
const parentEl = sw.closest('div, label, span') as HTMLElement;
const nearText = (parentEl?.textContent || '').toLowerCase();
const isCaptions =
label.includes('caption') || label.includes('untertitel') ||
tid.includes('caption') || tid.includes('subtitle') ||
nearText.includes('live caption') || nearText.includes('liveuntertitel');
if (isCaptions) {
if (!(sw as HTMLInputElement).checked) {
(sw as HTMLElement).click();
return { found: true, clicked: true, info: label || tid || nearText.substring(0, 60) };
}
return { found: true, clicked: false, info: `already on: ${label || tid}` };
}
}
return { found: false, clicked: false, info: '' };
});
this._logger.info(`Captions toggle result: ${JSON.stringify(toggleResult)}`);
if (toggleResult.found && toggleResult.clicked) {
await this._page.waitForTimeout(1500);
}
await this._page.keyboard.press('Escape');
if (toggleResult.found) return;
this._logger.warn('Language panel opened but no captions toggle found');
break;
}
} catch {
// Continue
}
}
// ── Strategy 5: DOM scan for anything containing "caption" / "transcri" ──
const found = await this._page.evaluate(() => {
const keywords = ['caption', 'captions', 'untertitel', 'live caption', 'transcri', 'transkri'];
@ -1122,6 +1114,18 @@ export class CaptionsProcedure {
return false;
}
// ── Noise filter: skip elements that are clearly NOT captions/transcript ──
const _noisePatterns = [
'meeting ended', 'meeting started', 'was invited', 'left the chat',
'doesn\'t have a teams account', 'new notification', 'is typing',
'last read', 'verify their identity', 'left the meeting',
'joined the meeting', 'apply and restart',
];
function _isNoise(text: string): boolean {
const lower = text.toLowerCase();
return _noisePatterns.some(p => lower.includes(p));
}
// ── Combined handler for mutation observer ──
function _handleAddedNode(node: Node): void {
if (node.nodeType !== Node.ELEMENT_NODE) return;
@ -1131,6 +1135,13 @@ export class CaptionsProcedure {
const text = el.innerText?.trim();
if (!text || text.length < 2) return;
// Skip noise (chat history, notifications, system messages)
if (_isNoise(text)) return;
// Skip elements from the chat area (data-tid="typing-indicator" etc.)
const tid = el.getAttribute('data-tid') || '';
if (tid === 'typing-indicator') return;
// Try caption extraction first (anonymous UI)
if (_extractCaption(el)) return;

View file

@ -86,7 +86,7 @@ export class ChatProcedure {
this._isSubscribed = true;
this._logger.info('Subscribing to chat messages...');
// Expose callback from Node.js to browser
// Expose callbacks from Node.js to browser
try {
await this._page.exposeFunction('__onChatMessageEvent', (msg: {
speaker: string;
@ -97,16 +97,42 @@ export class ChatProcedure {
});
} catch {
// Function may already be exposed from a previous subscription
this._logger.debug('__onChatMessageEvent already exposed');
}
try {
await this._page.exposeFunction('__onChatDebug', (info: {
tag: string;
tid: string;
text: string;
children: number;
html: string;
}) => {
this._logger.info(`ChatDOM: <${info.tag} data-tid="${info.tid}"> children=${info.children}, text="${info.text.substring(0, 120)}"`);
});
} catch {
// Already exposed
}
// Find chat container and set up observer
const chatObserverTarget = await this._page.evaluate(() => {
function _extractChatMessage(el: HTMLElement): void {
// Noise patterns: system messages, not actual chat
const noisePatterns = [
'meeting ended', 'meeting started', 'was invited', 'left the chat',
'joined the meeting', 'left the meeting', 'doesn\'t have a teams account',
'verify their identity', 'new notification', 'last read',
];
function _isNoise(text: string): boolean {
const lower = text.toLowerCase();
return noisePatterns.some(p => lower.includes(p));
}
function _extractChatMessage(el: HTMLElement): boolean {
// Strategy 1: Standard selectors
const messageSelectors = [
'[data-tid="chat-message"]',
'.fui-ChatMessage',
'[data-tid*="message-body"]',
'[data-tid*="chat-pane-message"]',
];
let messageEl: HTMLElement | null = null;
@ -114,13 +140,13 @@ export class ChatProcedure {
messageEl = el.matches?.(sel) ? el : el.querySelector(sel);
if (messageEl) break;
}
if (!messageEl) return;
// Extract author
if (messageEl) {
const authorSelectors = [
'[data-tid="message-author"]',
'[data-tid="message-author-name"]',
'.fui-ChatMessage__author',
'[data-tid*="author"]',
];
let author = 'Unknown';
for (const sel of authorSelectors) {
@ -131,11 +157,11 @@ export class ChatProcedure {
}
}
// Extract text
const bodySelectors = [
'[data-tid="message-body"]',
'.fui-ChatMessage__body',
'[data-tid="chat-message-text"]',
'[data-tid*="message-body"]',
];
let text = '';
for (const sel of bodySelectors) {
@ -152,9 +178,50 @@ export class ChatProcedure {
text,
timestamp: new Date().toISOString(),
});
return true;
}
}
// Strategy 2: Structural fallback for authenticated Teams chat
// Chat messages typically have: author element + body element as children
const fullText = el.innerText?.trim() || '';
if (!fullText || fullText.length < 2 || _isNoise(fullText)) return false;
// Skip typing indicators, system messages
const tid = el.getAttribute('data-tid') || '';
if (tid === 'typing-indicator') return false;
// Look for elements that look like user messages (have author-like + body-like children)
const children = Array.from(el.children) as HTMLElement[];
if (children.length >= 2) {
// Find an element that looks like a name (short text, no data-tid with "body")
for (let i = 0; i < children.length - 1; i++) {
const candidateName = children[i].innerText?.trim() || '';
const candidateBody = children.slice(i + 1).map(c => c.innerText?.trim()).filter(Boolean).join(' ').trim();
if (
candidateName.length > 1 && candidateName.length < 60 &&
candidateBody.length > 1 &&
!_isNoise(candidateBody) &&
!candidateName.includes('meeting') && !candidateName.includes('Meeting')
) {
// Check if this looks like a time-stamped message (not just any two children)
const hasTid = children[i].getAttribute('data-tid') || '';
if (hasTid.includes('author') || hasTid.includes('name') || hasTid.includes('sender')) {
(window as any).__onChatMessageEvent({
speaker: candidateName,
text: candidateBody,
timestamp: new Date().toISOString(),
});
return true;
}
}
}
}
return false;
}
// Teams chat containers - try multiple selectors
const chatContainerSelectors = [
'[data-tid="message-pane-list"]',
@ -165,15 +232,25 @@ export class ChatProcedure {
];
let chatContainer: Element | null = null;
let matchedSelector = '';
for (const sel of chatContainerSelectors) {
chatContainer = document.querySelector(sel);
if (chatContainer) break;
if (chatContainer) {
matchedSelector = sel;
break;
}
}
if (!chatContainer) {
const candidates = document.querySelectorAll('[data-tid*="chat"], [data-tid*="message"]');
if (candidates.length > 0) {
chatContainer = candidates[0];
for (const c of Array.from(candidates)) {
const cTid = c.getAttribute('data-tid') || '';
// Prefer larger containers, not buttons or small elements
if ((c as HTMLElement).offsetHeight > 50 && c.tagName !== 'BUTTON') {
chatContainer = c;
matchedSelector = `[data-tid="${cTid}"]`;
break;
}
}
}
@ -185,7 +262,25 @@ export class ChatProcedure {
if (mutation.type === 'childList') {
mutation.addedNodes.forEach((node) => {
if (node.nodeType !== Node.ELEMENT_NODE) return;
_extractChatMessage(node as HTMLElement);
const el = node as HTMLElement;
const text = el.innerText?.trim() || '';
if (!text || text.length < 2) return;
if (!_extractChatMessage(el)) {
// Log unrecognized elements for debugging (skip noise)
if (!_isNoise(text) && text.length > 3) {
const tid = el.getAttribute('data-tid') || '';
if (tid !== 'typing-indicator') {
(window as any).__onChatDebug?.({
tag: el.tagName,
tid,
text: text.substring(0, 200),
children: el.children?.length || 0,
html: el.innerHTML?.substring(0, 500) || '',
});
}
}
}
});
}
}
@ -194,7 +289,7 @@ export class ChatProcedure {
observer.observe(target, { childList: true, subtree: true });
(window as any).__chatObserver = observer;
return chatContainer ? 'container' : 'body-fallback';
return chatContainer ? `container:${matchedSelector}` : 'body-fallback';
});
this._logger.info(`Chat MutationObserver set up (target: ${chatObserverTarget})`);