fix: add transcript panel detection with wildcard selector, debug logging, and structural extraction
Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
parent
aef99057b2
commit
c36c954ac8
1 changed files with 135 additions and 35 deletions
|
|
@ -906,7 +906,7 @@ export class CaptionsProcedure {
|
||||||
this._isSubscribed = true;
|
this._isSubscribed = true;
|
||||||
this._logger.info('Subscribing to captions...');
|
this._logger.info('Subscribing to captions...');
|
||||||
|
|
||||||
// Expose a callback function from Node.js to the browser context
|
// Expose callback functions from Node.js to the browser context
|
||||||
await this._page.exposeFunction('__onCaptionEvent', (caption: {
|
await this._page.exposeFunction('__onCaptionEvent', (caption: {
|
||||||
speaker: string;
|
speaker: string;
|
||||||
text: string;
|
text: string;
|
||||||
|
|
@ -915,35 +915,53 @@ export class CaptionsProcedure {
|
||||||
this._handleCaptionEvent(caption);
|
this._handleCaptionEvent(caption);
|
||||||
});
|
});
|
||||||
|
|
||||||
// Verify captions container is present
|
// Debug callback: logs transcript DOM structure to help identify selectors
|
||||||
const containerSelectors = [
|
try {
|
||||||
|
await this._page.exposeFunction('__onCaptionDebug', (info: {
|
||||||
|
tag: string;
|
||||||
|
tid: string;
|
||||||
|
classes: string;
|
||||||
|
text: string;
|
||||||
|
children: number;
|
||||||
|
html: string;
|
||||||
|
}) => {
|
||||||
|
this._logger.info(
|
||||||
|
`TranscriptDOM: <${info.tag} data-tid="${info.tid}"> ` +
|
||||||
|
`children=${info.children}, text="${info.text}"`,
|
||||||
|
);
|
||||||
|
this._logger.debug(`TranscriptDOM html: ${info.html}`);
|
||||||
|
});
|
||||||
|
} catch {
|
||||||
|
// May already be exposed
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait for a known container
|
||||||
|
const waitSelectors = [
|
||||||
'div[data-tid="closed-caption-renderer-wrapper"]',
|
'div[data-tid="closed-caption-renderer-wrapper"]',
|
||||||
'div[data-tid="live-captions-renderer"]',
|
'div[data-tid="live-captions-renderer"]',
|
||||||
'[data-tid="caption-area"]',
|
'[data-tid="caption-area"]',
|
||||||
|
'[data-tid*="transcript"]',
|
||||||
];
|
];
|
||||||
let containerFound = false;
|
let containerFound = false;
|
||||||
for (const sel of containerSelectors) {
|
for (const sel of waitSelectors) {
|
||||||
try {
|
try {
|
||||||
await this._page.waitForSelector(sel, { timeout: 10000 });
|
await this._page.waitForSelector(sel, { timeout: 10000 });
|
||||||
containerFound = true;
|
containerFound = true;
|
||||||
this._logger.info(`Captions container found: ${sel}`);
|
this._logger.info(`Captions/transcript container found: ${sel}`);
|
||||||
break;
|
break;
|
||||||
} catch {
|
} catch {
|
||||||
// Try next
|
// Try next
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!containerFound) {
|
if (!containerFound) {
|
||||||
this._logger.warn('Captions container not found with known selectors, subscribing anyway');
|
this._logger.warn('Captions/transcript container not found, subscribing with body fallback');
|
||||||
}
|
}
|
||||||
|
|
||||||
this._logger.info('Setting up MutationObserver for captions...');
|
this._logger.info('Setting up MutationObserver for captions/transcription...');
|
||||||
|
|
||||||
// Set up MutationObserver in the browser (Recall.ai approach)
|
|
||||||
// Falls back to document.body when the specific container is not found
|
|
||||||
// (authenticated Teams may use different container selectors)
|
|
||||||
const observerTarget = await this._page.evaluate(() => {
|
const observerTarget = await this._page.evaluate(() => {
|
||||||
// Helper: extract caption data from an element tree
|
// ── Helper: extract caption data (anonymous/light-meetings captions) ──
|
||||||
function _extractCaption(element: HTMLElement): void {
|
function _extractCaption(element: HTMLElement): boolean {
|
||||||
const captionMessage = element.querySelector('.fui-ChatMessageCompact')
|
const captionMessage = element.querySelector('.fui-ChatMessageCompact')
|
||||||
|| (element.classList?.contains('fui-ChatMessageCompact') ? element : null);
|
|| (element.classList?.contains('fui-ChatMessageCompact') ? element : null);
|
||||||
|
|
||||||
|
|
@ -952,7 +970,6 @@ export class CaptionsProcedure {
|
||||||
const contentElement = captionMessage.querySelector('span[data-tid="closed-caption-text"]');
|
const contentElement = captionMessage.querySelector('span[data-tid="closed-caption-text"]');
|
||||||
|
|
||||||
if (authorElement && contentElement) {
|
if (authorElement && contentElement) {
|
||||||
// Watch for real-time updates in the caption text
|
|
||||||
const textObserver = new MutationObserver(() => {
|
const textObserver = new MutationObserver(() => {
|
||||||
const speaker = authorElement.textContent?.trim() ?? 'Unknown';
|
const speaker = authorElement.textContent?.trim() ?? 'Unknown';
|
||||||
const text = (contentElement as any).innerText?.trim() ?? '';
|
const text = (contentElement as any).innerText?.trim() ?? '';
|
||||||
|
|
@ -962,17 +979,96 @@ export class CaptionsProcedure {
|
||||||
timestamp: new Date().toISOString(),
|
timestamp: new Date().toISOString(),
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
textObserver.observe(contentElement, {
|
textObserver.observe(contentElement, {
|
||||||
childList: true,
|
childList: true,
|
||||||
subtree: true,
|
subtree: true,
|
||||||
characterData: true,
|
characterData: true,
|
||||||
});
|
});
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Helper: extract transcript entry (authenticated Teams transcript panel) ──
|
||||||
|
function _extractTranscript(element: HTMLElement): boolean {
|
||||||
|
const text = element.innerText?.trim();
|
||||||
|
if (!text || text.length < 2) return false;
|
||||||
|
|
||||||
|
// Strategy A: data-tid based speaker/text elements
|
||||||
|
const speakerEl = element.querySelector(
|
||||||
|
'[data-tid*="speaker"], [data-tid*="author"], [data-tid*="name"], ' +
|
||||||
|
'[data-tid*="participant"]'
|
||||||
|
);
|
||||||
|
const textEl = element.querySelector(
|
||||||
|
'[data-tid*="text"], [data-tid*="content"], [data-tid*="body"], ' +
|
||||||
|
'[data-tid*="message"]'
|
||||||
|
);
|
||||||
|
if (speakerEl && textEl) {
|
||||||
|
const speaker = speakerEl.textContent?.trim() || 'Unknown';
|
||||||
|
const content = (textEl as HTMLElement).innerText?.trim() || '';
|
||||||
|
if (content) {
|
||||||
|
(window as any).__onCaptionEvent({
|
||||||
|
speaker,
|
||||||
|
text: content,
|
||||||
|
timestamp: new Date().toISOString(),
|
||||||
|
});
|
||||||
|
return true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Try specific caption/transcript container selectors first
|
// Strategy B: structural — first short text child = speaker, rest = text
|
||||||
|
const directChildren = Array.from(element.children) as HTMLElement[];
|
||||||
|
if (directChildren.length >= 2) {
|
||||||
|
const first = directChildren[0].innerText?.trim() || '';
|
||||||
|
const rest = directChildren
|
||||||
|
.slice(1)
|
||||||
|
.map(c => c.innerText?.trim())
|
||||||
|
.filter(Boolean)
|
||||||
|
.join(' ')
|
||||||
|
.trim();
|
||||||
|
if (first && first.length < 60 && rest && rest.length > 2) {
|
||||||
|
(window as any).__onCaptionEvent({
|
||||||
|
speaker: first,
|
||||||
|
text: rest,
|
||||||
|
timestamp: new Date().toISOString(),
|
||||||
|
});
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Combined handler for mutation observer ──
|
||||||
|
function _handleAddedNode(node: Node): void {
|
||||||
|
if (node.nodeType !== Node.ELEMENT_NODE) return;
|
||||||
|
const el = node as HTMLElement;
|
||||||
|
|
||||||
|
// Skip tiny/empty elements
|
||||||
|
const text = el.innerText?.trim();
|
||||||
|
if (!text || text.length < 2) return;
|
||||||
|
|
||||||
|
// Try caption extraction first (anonymous UI)
|
||||||
|
if (_extractCaption(el)) return;
|
||||||
|
|
||||||
|
// Try transcript extraction (authenticated UI)
|
||||||
|
if (_extractTranscript(el)) return;
|
||||||
|
|
||||||
|
// Not recognized — log for debugging (only elements with meaningful text)
|
||||||
|
if (text.length > 3) {
|
||||||
|
(window as any).__onCaptionDebug?.({
|
||||||
|
tag: el.tagName,
|
||||||
|
tid: el.getAttribute('data-tid') || '',
|
||||||
|
classes: (el.className || '').substring(0, 100),
|
||||||
|
text: text.substring(0, 200),
|
||||||
|
children: el.children?.length || 0,
|
||||||
|
html: el.innerHTML?.substring(0, 500) || '',
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Find container ──
|
||||||
const containerSelectors = [
|
const containerSelectors = [
|
||||||
'div[data-tid="closed-caption-renderer-wrapper"]',
|
'div[data-tid="closed-caption-renderer-wrapper"]',
|
||||||
'div[data-tid="live-captions-renderer"]',
|
'div[data-tid="live-captions-renderer"]',
|
||||||
|
|
@ -982,31 +1078,40 @@ export class CaptionsProcedure {
|
||||||
];
|
];
|
||||||
|
|
||||||
let targetNode: Element | null = null;
|
let targetNode: Element | null = null;
|
||||||
|
let targetSelector = '';
|
||||||
for (const sel of containerSelectors) {
|
for (const sel of containerSelectors) {
|
||||||
targetNode = document.querySelector(sel);
|
targetNode = document.querySelector(sel);
|
||||||
if (targetNode) break;
|
if (targetNode) {
|
||||||
|
targetSelector = sel;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Also try wildcard match (transcript container)
|
||||||
|
if (!targetNode) {
|
||||||
|
const transcriptEl = document.querySelector('[data-tid*="transcript"]');
|
||||||
|
if (transcriptEl) {
|
||||||
|
targetNode = transcriptEl;
|
||||||
|
targetSelector = `[data-tid="${transcriptEl.getAttribute('data-tid')}"]`;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (targetNode) {
|
if (targetNode) {
|
||||||
// Targeted observer on the specific caption container
|
const tid = targetNode.getAttribute('data-tid') || '';
|
||||||
const observer = new MutationObserver((mutationsList) => {
|
const observer = new MutationObserver((mutationsList) => {
|
||||||
for (const mutation of mutationsList) {
|
for (const mutation of mutationsList) {
|
||||||
if (mutation.type === 'childList') {
|
if (mutation.type === 'childList') {
|
||||||
mutation.addedNodes.forEach((node) => {
|
mutation.addedNodes.forEach(_handleAddedNode);
|
||||||
if (node.nodeType === Node.ELEMENT_NODE) {
|
|
||||||
_extractCaption(node as HTMLElement);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
observer.observe(targetNode, { childList: true, subtree: true });
|
observer.observe(targetNode, { childList: true, subtree: true });
|
||||||
(window as any).__captionsObserver = observer;
|
(window as any).__captionsObserver = observer;
|
||||||
return 'container';
|
return `container:${tid}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fallback: observe document.body and look for caption elements anywhere
|
// ── Fallback: observe document.body ──
|
||||||
// Also watches for the caption container to appear later (e.g. after enabling)
|
const allSelectors = [...containerSelectors, '[data-tid*="transcript"]'];
|
||||||
const bodyObserver = new MutationObserver((mutationsList) => {
|
const bodyObserver = new MutationObserver((mutationsList) => {
|
||||||
for (const mutation of mutationsList) {
|
for (const mutation of mutationsList) {
|
||||||
if (mutation.type !== 'childList') continue;
|
if (mutation.type !== 'childList') continue;
|
||||||
|
|
@ -1014,20 +1119,16 @@ export class CaptionsProcedure {
|
||||||
if (node.nodeType !== Node.ELEMENT_NODE) return;
|
if (node.nodeType !== Node.ELEMENT_NODE) return;
|
||||||
const el = node as HTMLElement;
|
const el = node as HTMLElement;
|
||||||
|
|
||||||
// Check if a specific caption container just appeared
|
// Check if a container just appeared
|
||||||
for (const sel of containerSelectors) {
|
for (const sel of allSelectors) {
|
||||||
const container = el.matches?.(sel) ? el : el.querySelector?.(sel);
|
const container = el.matches?.(sel) ? el : el.querySelector?.(sel);
|
||||||
if (container) {
|
if (container) {
|
||||||
// Switch to targeted observation
|
|
||||||
bodyObserver.disconnect();
|
bodyObserver.disconnect();
|
||||||
|
const tid = container.getAttribute('data-tid') || '';
|
||||||
const targeted = new MutationObserver((muts) => {
|
const targeted = new MutationObserver((muts) => {
|
||||||
for (const m of muts) {
|
for (const m of muts) {
|
||||||
if (m.type === 'childList') {
|
if (m.type === 'childList') {
|
||||||
m.addedNodes.forEach((n) => {
|
m.addedNodes.forEach(_handleAddedNode);
|
||||||
if (n.nodeType === Node.ELEMENT_NODE) {
|
|
||||||
_extractCaption(n as HTMLElement);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
@ -1037,8 +1138,7 @@ export class CaptionsProcedure {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Direct caption element detection (body-level)
|
_handleAddedNode(node);
|
||||||
_extractCaption(el);
|
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue