From c36c954ac8c32584e5717ac059dd117f399aa9c2 Mon Sep 17 00:00:00 2001
From: ValueOn AG
Date: Tue, 17 Feb 2026 22:22:30 +0100
Subject: [PATCH] fix: add transcript panel detection with wildcard selector,
debug logging, and structural extraction
Co-authored-by: Cursor
---
src/bot/captionsProcedure.ts | 170 +++++++++++++++++++++++++++--------
1 file changed, 135 insertions(+), 35 deletions(-)
diff --git a/src/bot/captionsProcedure.ts b/src/bot/captionsProcedure.ts
index f3f55f1..5fb6404 100644
--- a/src/bot/captionsProcedure.ts
+++ b/src/bot/captionsProcedure.ts
@@ -906,7 +906,7 @@ export class CaptionsProcedure {
this._isSubscribed = true;
this._logger.info('Subscribing to captions...');
- // Expose a callback function from Node.js to the browser context
+ // Expose callback functions from Node.js to the browser context
await this._page.exposeFunction('__onCaptionEvent', (caption: {
speaker: string;
text: string;
@@ -915,35 +915,53 @@ export class CaptionsProcedure {
this._handleCaptionEvent(caption);
});
- // Verify captions container is present
- const containerSelectors = [
+ // Debug callback: logs transcript DOM structure to help identify selectors
+ try {
+ await this._page.exposeFunction('__onCaptionDebug', (info: {
+ tag: string;
+ tid: string;
+ classes: string;
+ text: string;
+ children: number;
+ html: string;
+ }) => {
+ this._logger.info(
+ `TranscriptDOM: <${info.tag} data-tid="${info.tid}"> ` +
+ `children=${info.children}, text="${info.text}"`,
+ );
+ this._logger.debug(`TranscriptDOM html: ${info.html}`);
+ });
+ } catch {
+ // May already be exposed
+ }
+
+ // Wait for a known container
+ const waitSelectors = [
'div[data-tid="closed-caption-renderer-wrapper"]',
'div[data-tid="live-captions-renderer"]',
'[data-tid="caption-area"]',
+ '[data-tid*="transcript"]',
];
let containerFound = false;
- for (const sel of containerSelectors) {
+ for (const sel of waitSelectors) {
try {
await this._page.waitForSelector(sel, { timeout: 10000 });
containerFound = true;
- this._logger.info(`Captions container found: ${sel}`);
+ this._logger.info(`Captions/transcript container found: ${sel}`);
break;
} catch {
// Try next
}
}
if (!containerFound) {
- this._logger.warn('Captions container not found with known selectors, subscribing anyway');
+ this._logger.warn('Captions/transcript container not found, subscribing with body fallback');
}
- this._logger.info('Setting up MutationObserver for captions...');
+ this._logger.info('Setting up MutationObserver for captions/transcription...');
- // Set up MutationObserver in the browser (Recall.ai approach)
- // Falls back to document.body when the specific container is not found
- // (authenticated Teams may use different container selectors)
const observerTarget = await this._page.evaluate(() => {
- // Helper: extract caption data from an element tree
- function _extractCaption(element: HTMLElement): void {
+ // ── Helper: extract caption data (anonymous/light-meetings captions) ──
+ function _extractCaption(element: HTMLElement): boolean {
const captionMessage = element.querySelector('.fui-ChatMessageCompact')
|| (element.classList?.contains('fui-ChatMessageCompact') ? element : null);
@@ -952,7 +970,6 @@ export class CaptionsProcedure {
const contentElement = captionMessage.querySelector('span[data-tid="closed-caption-text"]');
if (authorElement && contentElement) {
- // Watch for real-time updates in the caption text
const textObserver = new MutationObserver(() => {
const speaker = authorElement.textContent?.trim() ?? 'Unknown';
const text = (contentElement as any).innerText?.trim() ?? '';
@@ -962,17 +979,96 @@ export class CaptionsProcedure {
timestamp: new Date().toISOString(),
});
});
-
textObserver.observe(contentElement, {
childList: true,
subtree: true,
characterData: true,
});
+ return true;
}
}
+ return false;
+ }
+
+ // ── Helper: extract transcript entry (authenticated Teams transcript panel) ──
+ function _extractTranscript(element: HTMLElement): boolean {
+ const text = element.innerText?.trim();
+ if (!text || text.length < 2) return false;
+
+ // Strategy A: data-tid based speaker/text elements
+ const speakerEl = element.querySelector(
+ '[data-tid*="speaker"], [data-tid*="author"], [data-tid*="name"], ' +
+ '[data-tid*="participant"]'
+ );
+ const textEl = element.querySelector(
+ '[data-tid*="text"], [data-tid*="content"], [data-tid*="body"], ' +
+ '[data-tid*="message"]'
+ );
+ if (speakerEl && textEl) {
+ const speaker = speakerEl.textContent?.trim() || 'Unknown';
+ const content = (textEl as HTMLElement).innerText?.trim() || '';
+ if (content) {
+ (window as any).__onCaptionEvent({
+ speaker,
+ text: content,
+ timestamp: new Date().toISOString(),
+ });
+ return true;
+ }
+ }
+
+ // Strategy B: structural — first short text child = speaker, rest = text
+ const directChildren = Array.from(element.children) as HTMLElement[];
+ if (directChildren.length >= 2) {
+ const first = directChildren[0].innerText?.trim() || '';
+ const rest = directChildren
+ .slice(1)
+ .map(c => c.innerText?.trim())
+ .filter(Boolean)
+ .join(' ')
+ .trim();
+ if (first && first.length < 60 && rest && rest.length > 2) {
+ (window as any).__onCaptionEvent({
+ speaker: first,
+ text: rest,
+ timestamp: new Date().toISOString(),
+ });
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ // ── Combined handler for mutation observer ──
+ function _handleAddedNode(node: Node): void {
+ if (node.nodeType !== Node.ELEMENT_NODE) return;
+ const el = node as HTMLElement;
+
+ // Skip tiny/empty elements
+ const text = el.innerText?.trim();
+ if (!text || text.length < 2) return;
+
+ // Try caption extraction first (anonymous UI)
+ if (_extractCaption(el)) return;
+
+ // Try transcript extraction (authenticated UI)
+ if (_extractTranscript(el)) return;
+
+ // Not recognized — log for debugging (only elements with meaningful text)
+ if (text.length > 3) {
+ (window as any).__onCaptionDebug?.({
+ tag: el.tagName,
+ tid: el.getAttribute('data-tid') || '',
+ classes: (el.className || '').substring(0, 100),
+ text: text.substring(0, 200),
+ children: el.children?.length || 0,
+ html: el.innerHTML?.substring(0, 500) || '',
+ });
+ }
}
- // Try specific caption/transcript container selectors first
+ // ── Find container ──
const containerSelectors = [
'div[data-tid="closed-caption-renderer-wrapper"]',
'div[data-tid="live-captions-renderer"]',
@@ -982,31 +1078,40 @@ export class CaptionsProcedure {
];
let targetNode: Element | null = null;
+ let targetSelector = '';
for (const sel of containerSelectors) {
targetNode = document.querySelector(sel);
- if (targetNode) break;
+ if (targetNode) {
+ targetSelector = sel;
+ break;
+ }
+ }
+
+ // Also try wildcard match (transcript container)
+ if (!targetNode) {
+ const transcriptEl = document.querySelector('[data-tid*="transcript"]');
+ if (transcriptEl) {
+ targetNode = transcriptEl;
+ targetSelector = `[data-tid="${transcriptEl.getAttribute('data-tid')}"]`;
+ }
}
if (targetNode) {
- // Targeted observer on the specific caption container
+ const tid = targetNode.getAttribute('data-tid') || '';
const observer = new MutationObserver((mutationsList) => {
for (const mutation of mutationsList) {
if (mutation.type === 'childList') {
- mutation.addedNodes.forEach((node) => {
- if (node.nodeType === Node.ELEMENT_NODE) {
- _extractCaption(node as HTMLElement);
- }
- });
+ mutation.addedNodes.forEach(_handleAddedNode);
}
}
});
observer.observe(targetNode, { childList: true, subtree: true });
(window as any).__captionsObserver = observer;
- return 'container';
+ return `container:${tid}`;
}
- // Fallback: observe document.body and look for caption elements anywhere
- // Also watches for the caption container to appear later (e.g. after enabling)
+ // ── Fallback: observe document.body ──
+ const allSelectors = [...containerSelectors, '[data-tid*="transcript"]'];
const bodyObserver = new MutationObserver((mutationsList) => {
for (const mutation of mutationsList) {
if (mutation.type !== 'childList') continue;
@@ -1014,20 +1119,16 @@ export class CaptionsProcedure {
if (node.nodeType !== Node.ELEMENT_NODE) return;
const el = node as HTMLElement;
- // Check if a specific caption container just appeared
- for (const sel of containerSelectors) {
+ // Check if a container just appeared
+ for (const sel of allSelectors) {
const container = el.matches?.(sel) ? el : el.querySelector?.(sel);
if (container) {
- // Switch to targeted observation
bodyObserver.disconnect();
+ const tid = container.getAttribute('data-tid') || '';
const targeted = new MutationObserver((muts) => {
for (const m of muts) {
if (m.type === 'childList') {
- m.addedNodes.forEach((n) => {
- if (n.nodeType === Node.ELEMENT_NODE) {
- _extractCaption(n as HTMLElement);
- }
- });
+ m.addedNodes.forEach(_handleAddedNode);
}
}
});
@@ -1037,8 +1138,7 @@ export class CaptionsProcedure {
}
}
- // Direct caption element detection (body-level)
- _extractCaption(el);
+ _handleAddedNode(node);
});
}
});