fix: auth join detection, caption language dropdown, audio injection via getUserMedia override
Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
parent
13bf75bea7
commit
bd63dfc40a
4 changed files with 242 additions and 47 deletions
|
|
@ -3,12 +3,19 @@ import { Logger } from 'winston';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Handles audio playback in the Teams meeting.
|
* Handles audio playback in the Teams meeting.
|
||||||
* Injects TTS audio into the browser to be played through the meeting.
|
*
|
||||||
|
* Architecture:
|
||||||
|
* - Before any page loads, we inject an init script that overrides getUserMedia
|
||||||
|
* to return a MediaStream from a MediaStreamDestination we control.
|
||||||
|
* - When Teams calls getUserMedia({audio: true}), it gets our custom stream.
|
||||||
|
* - When TTS audio is played, it's piped into the same MediaStreamDestination,
|
||||||
|
* so Teams picks it up as microphone input and sends it via WebRTC.
|
||||||
*/
|
*/
|
||||||
export class AudioProcedure {
|
export class AudioProcedure {
|
||||||
private _page: Page;
|
private _page: Page;
|
||||||
private _logger: Logger;
|
private _logger: Logger;
|
||||||
private _audioContext: boolean = false;
|
private _audioContext: boolean = false;
|
||||||
|
private _initScriptInjected: boolean = false;
|
||||||
|
|
||||||
constructor(page: Page, logger: Logger) {
|
constructor(page: Page, logger: Logger) {
|
||||||
this._page = page;
|
this._page = page;
|
||||||
|
|
@ -16,8 +23,62 @@ export class AudioProcedure {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Initialize the audio context in the browser.
|
* Inject the getUserMedia override BEFORE any page navigation.
|
||||||
* Must be called after user interaction (joining meeting counts).
|
* This MUST be called before navigating to Teams.
|
||||||
|
* Uses page.addInitScript so it runs in every new document context.
|
||||||
|
*/
|
||||||
|
async injectAudioOverride(): Promise<void> {
|
||||||
|
if (this._initScriptInjected) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
this._logger.info('Injecting audio getUserMedia override...');
|
||||||
|
|
||||||
|
await this._page.addInitScript(() => {
|
||||||
|
// Create a shared AudioContext and MediaStreamDestination
|
||||||
|
// These persist across the page lifetime
|
||||||
|
const AudioContextClass = window.AudioContext || (window as any).webkitAudioContext;
|
||||||
|
const ctx = new AudioContextClass();
|
||||||
|
const streamDest = ctx.createMediaStreamDestination();
|
||||||
|
|
||||||
|
// Store globally for later TTS injection
|
||||||
|
(window as any).__ttsAudioContext = ctx;
|
||||||
|
(window as any).__ttsStreamDest = streamDest;
|
||||||
|
(window as any).__ttsAudioStream = streamDest.stream;
|
||||||
|
|
||||||
|
// Override getUserMedia to return our controlled stream for audio requests
|
||||||
|
const originalGetUserMedia = navigator.mediaDevices.getUserMedia.bind(navigator.mediaDevices);
|
||||||
|
navigator.mediaDevices.getUserMedia = async (constraints?: MediaStreamConstraints) => {
|
||||||
|
if (constraints && constraints.audio) {
|
||||||
|
// Return our TTS-injectable audio stream
|
||||||
|
// If video is also requested, combine our audio with real/fake video
|
||||||
|
if (constraints.video) {
|
||||||
|
try {
|
||||||
|
const videoStream = await originalGetUserMedia({ video: constraints.video });
|
||||||
|
const combinedStream = new MediaStream();
|
||||||
|
// Add our audio track
|
||||||
|
streamDest.stream.getAudioTracks().forEach(t => combinedStream.addTrack(t));
|
||||||
|
// Add their video track
|
||||||
|
videoStream.getVideoTracks().forEach(t => combinedStream.addTrack(t));
|
||||||
|
return combinedStream;
|
||||||
|
} catch {
|
||||||
|
// If video fails, just return audio
|
||||||
|
return streamDest.stream;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return streamDest.stream;
|
||||||
|
}
|
||||||
|
return originalGetUserMedia(constraints);
|
||||||
|
};
|
||||||
|
});
|
||||||
|
|
||||||
|
this._initScriptInjected = true;
|
||||||
|
this._logger.info('Audio getUserMedia override injected');
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initialize the audio context in the browser for TTS playback.
|
||||||
|
* Must be called after joining the meeting (user gesture context).
|
||||||
*/
|
*/
|
||||||
async initialize(): Promise<void> {
|
async initialize(): Promise<void> {
|
||||||
if (this._audioContext) {
|
if (this._audioContext) {
|
||||||
|
|
@ -27,30 +88,23 @@ export class AudioProcedure {
|
||||||
this._logger.info('Initializing audio context...');
|
this._logger.info('Initializing audio context...');
|
||||||
|
|
||||||
await this._page.evaluate(() => {
|
await this._page.evaluate(() => {
|
||||||
// Create a global audio context
|
// The __ttsAudioContext was created by the init script.
|
||||||
const AudioContext = window.AudioContext || (window as any).webkitAudioContext;
|
// Resume it now (requires user gesture - joining meeting counts).
|
||||||
const ctx = new AudioContext();
|
const ctx = (window as any).__ttsAudioContext as AudioContext;
|
||||||
(window as any).__audioContext = ctx;
|
if (ctx && ctx.state === 'suspended') {
|
||||||
(window as any).__audioQueue = [];
|
ctx.resume();
|
||||||
(window as any).__isPlaying = false;
|
}
|
||||||
|
|
||||||
// Create a MediaStream destination so audio is routed into the
|
// If init script didn't run (e.g. page navigated before injection),
|
||||||
// browser's virtual microphone (picked up by Teams) instead of
|
// create fallback audio infrastructure
|
||||||
// the default speaker output (ctx.destination).
|
if (!ctx) {
|
||||||
const streamDest = ctx.createMediaStreamDestination();
|
const AudioContextClass = window.AudioContext || (window as any).webkitAudioContext;
|
||||||
(window as any).__audioStreamDest = streamDest;
|
const newCtx = new AudioContextClass();
|
||||||
|
const streamDest = newCtx.createMediaStreamDestination();
|
||||||
// Expose the stream so headless Chromium can pipe it as mic input.
|
(window as any).__ttsAudioContext = newCtx;
|
||||||
// navigator.mediaDevices.getUserMedia will be overridden to return this stream.
|
(window as any).__ttsStreamDest = streamDest;
|
||||||
const audioStream = streamDest.stream;
|
(window as any).__ttsAudioStream = streamDest.stream;
|
||||||
const originalGetUserMedia = navigator.mediaDevices.getUserMedia.bind(navigator.mediaDevices);
|
}
|
||||||
navigator.mediaDevices.getUserMedia = async (constraints?: MediaStreamConstraints) => {
|
|
||||||
// If requesting audio only, return our TTS stream
|
|
||||||
if (constraints && constraints.audio && !constraints.video) {
|
|
||||||
return audioStream;
|
|
||||||
}
|
|
||||||
return originalGetUserMedia(constraints);
|
|
||||||
};
|
|
||||||
});
|
});
|
||||||
|
|
||||||
this._audioContext = true;
|
this._audioContext = true;
|
||||||
|
|
@ -59,7 +113,7 @@ export class AudioProcedure {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Play audio in the browser.
|
* Play audio in the browser.
|
||||||
* The audio will be heard by other meeting participants.
|
* Audio is piped into the MediaStreamDestination that Teams uses as mic input.
|
||||||
*
|
*
|
||||||
* @param audioData Base64 encoded audio data
|
* @param audioData Base64 encoded audio data
|
||||||
* @param format Audio format (mp3, wav, pcm)
|
* @param format Audio format (mp3, wav, pcm)
|
||||||
|
|
@ -73,7 +127,12 @@ export class AudioProcedure {
|
||||||
|
|
||||||
try {
|
try {
|
||||||
await this._page.evaluate(async ({ audioData, format }) => {
|
await this._page.evaluate(async ({ audioData, format }) => {
|
||||||
const ctx = (window as any).__audioContext as AudioContext;
|
const ctx = (window as any).__ttsAudioContext as AudioContext;
|
||||||
|
const streamDest = (window as any).__ttsStreamDest as MediaStreamAudioDestinationNode;
|
||||||
|
|
||||||
|
if (!ctx || !streamDest) {
|
||||||
|
throw new Error('Audio context not initialized');
|
||||||
|
}
|
||||||
|
|
||||||
// Resume context if suspended
|
// Resume context if suspended
|
||||||
if (ctx.state === 'suspended') {
|
if (ctx.state === 'suspended') {
|
||||||
|
|
@ -95,22 +154,19 @@ export class AudioProcedure {
|
||||||
audioBuffer = ctx.createBuffer(1, pcmData.length, 16000);
|
audioBuffer = ctx.createBuffer(1, pcmData.length, 16000);
|
||||||
const channelData = audioBuffer.getChannelData(0);
|
const channelData = audioBuffer.getChannelData(0);
|
||||||
for (let i = 0; i < pcmData.length; i++) {
|
for (let i = 0; i < pcmData.length; i++) {
|
||||||
channelData[i] = pcmData[i] / 32768; // Convert to float
|
channelData[i] = pcmData[i] / 32768;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// MP3/WAV: Use decodeAudioData
|
// MP3/WAV: Use decodeAudioData
|
||||||
audioBuffer = await ctx.decodeAudioData(bytes.buffer);
|
audioBuffer = await ctx.decodeAudioData(bytes.buffer.slice(0));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create source and play through the MediaStream destination
|
// Play through the MediaStreamDestination -> Teams mic input
|
||||||
// so audio is routed into the Teams microphone input, not speakers
|
|
||||||
const source = ctx.createBufferSource();
|
const source = ctx.createBufferSource();
|
||||||
source.buffer = audioBuffer;
|
source.buffer = audioBuffer;
|
||||||
const streamDest = (window as any).__audioStreamDest as MediaStreamAudioDestinationNode;
|
source.connect(streamDest);
|
||||||
source.connect(streamDest || ctx.destination);
|
|
||||||
source.start(0);
|
source.start(0);
|
||||||
|
|
||||||
// Return a promise that resolves when playback ends
|
|
||||||
return new Promise<void>((resolve) => {
|
return new Promise<void>((resolve) => {
|
||||||
source.onended = () => resolve();
|
source.onended = () => resolve();
|
||||||
});
|
});
|
||||||
|
|
@ -129,7 +185,7 @@ export class AudioProcedure {
|
||||||
async stopAudio(): Promise<void> {
|
async stopAudio(): Promise<void> {
|
||||||
try {
|
try {
|
||||||
await this._page.evaluate(() => {
|
await this._page.evaluate(() => {
|
||||||
const ctx = (window as any).__audioContext as AudioContext;
|
const ctx = (window as any).__ttsAudioContext as AudioContext;
|
||||||
if (ctx) {
|
if (ctx) {
|
||||||
ctx.suspend();
|
ctx.suspend();
|
||||||
}
|
}
|
||||||
|
|
@ -145,7 +201,7 @@ export class AudioProcedure {
|
||||||
async cleanup(): Promise<void> {
|
async cleanup(): Promise<void> {
|
||||||
try {
|
try {
|
||||||
await this._page.evaluate(() => {
|
await this._page.evaluate(() => {
|
||||||
const ctx = (window as any).__audioContext as AudioContext;
|
const ctx = (window as any).__ttsAudioContext as AudioContext;
|
||||||
if (ctx) {
|
if (ctx) {
|
||||||
ctx.close();
|
ctx.close();
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -332,6 +332,32 @@ export class CaptionsProcedure {
|
||||||
|
|
||||||
// Look for the spoken language dropdown/combobox
|
// Look for the spoken language dropdown/combobox
|
||||||
let languageSet = false;
|
let languageSet = false;
|
||||||
|
|
||||||
|
// First, log what's visible in the settings panel for debugging
|
||||||
|
const panelInfo = await this._page.evaluate(() => {
|
||||||
|
const selects = document.querySelectorAll('select');
|
||||||
|
const comboboxes = document.querySelectorAll('[role="combobox"]');
|
||||||
|
const listboxes = document.querySelectorAll('[role="listbox"]');
|
||||||
|
const dropdowns = document.querySelectorAll('[class*="dropdown" i], [class*="Dropdown" i]');
|
||||||
|
const allButtons = document.querySelectorAll('button');
|
||||||
|
const buttonsWithText = Array.from(allButtons)
|
||||||
|
.map(b => `${b.tagName}[${b.getAttribute('aria-label') || b.textContent?.trim().substring(0, 40)}]`)
|
||||||
|
.filter(t => t.length > 10)
|
||||||
|
.slice(0, 10);
|
||||||
|
return {
|
||||||
|
selects: selects.length,
|
||||||
|
comboboxes: comboboxes.length,
|
||||||
|
listboxes: listboxes.length,
|
||||||
|
dropdowns: dropdowns.length,
|
||||||
|
buttons: buttonsWithText,
|
||||||
|
bodySnippet: document.body?.innerText?.substring(0, 800) || '',
|
||||||
|
};
|
||||||
|
});
|
||||||
|
this._logger.info(`Caption settings panel - selects: ${panelInfo.selects}, comboboxes: ${panelInfo.comboboxes}, listboxes: ${panelInfo.listboxes}, dropdowns: ${panelInfo.dropdowns}`);
|
||||||
|
this._logger.info(`Panel buttons: ${JSON.stringify(panelInfo.buttons)}`);
|
||||||
|
this._logger.debug(`Panel text: ${panelInfo.bodySnippet.substring(0, 300)}`);
|
||||||
|
|
||||||
|
// Strategy A: Standard selectors
|
||||||
const dropdownSelectors = [
|
const dropdownSelectors = [
|
||||||
'select[aria-label*="spoken language" i]',
|
'select[aria-label*="spoken language" i]',
|
||||||
'select[aria-label*="Meeting spoken language" i]',
|
'select[aria-label*="Meeting spoken language" i]',
|
||||||
|
|
@ -339,7 +365,7 @@ export class CaptionsProcedure {
|
||||||
'[data-tid="spoken-language-dropdown"]',
|
'[data-tid="spoken-language-dropdown"]',
|
||||||
'div[role="combobox"]',
|
'div[role="combobox"]',
|
||||||
'div[role="listbox"]',
|
'div[role="listbox"]',
|
||||||
'select', // Generic fallback
|
'select',
|
||||||
];
|
];
|
||||||
|
|
||||||
for (const selector of dropdownSelectors) {
|
for (const selector of dropdownSelectors) {
|
||||||
|
|
@ -350,7 +376,6 @@ export class CaptionsProcedure {
|
||||||
const tagName = await dropdown.evaluate(el => el.tagName.toLowerCase());
|
const tagName = await dropdown.evaluate(el => el.tagName.toLowerCase());
|
||||||
|
|
||||||
if (tagName === 'select') {
|
if (tagName === 'select') {
|
||||||
// Native select element
|
|
||||||
for (const name of targetNames) {
|
for (const name of targetNames) {
|
||||||
try {
|
try {
|
||||||
await this._page.selectOption(selector, { label: name });
|
await this._page.selectOption(selector, { label: name });
|
||||||
|
|
@ -368,7 +393,6 @@ export class CaptionsProcedure {
|
||||||
|
|
||||||
for (const name of targetNames) {
|
for (const name of targetNames) {
|
||||||
try {
|
try {
|
||||||
// Try role="option" first, then generic text search
|
|
||||||
const optionSelectors = [
|
const optionSelectors = [
|
||||||
`[role="option"]:has-text("${name}")`,
|
`[role="option"]:has-text("${name}")`,
|
||||||
`li:has-text("${name}")`,
|
`li:has-text("${name}")`,
|
||||||
|
|
@ -397,6 +421,69 @@ export class CaptionsProcedure {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Strategy B: DOM evaluation fallback - find any dropdown-like element and interact
|
||||||
|
if (!languageSet) {
|
||||||
|
this._logger.info('Standard dropdown selectors failed, trying DOM evaluation fallback...');
|
||||||
|
|
||||||
|
languageSet = await this._page.evaluate((names: string[]) => {
|
||||||
|
// Find all elements that could be dropdowns (Fluent UI uses various patterns)
|
||||||
|
const candidates = document.querySelectorAll(
|
||||||
|
'[role="combobox"], [role="listbox"], select, ' +
|
||||||
|
'[class*="dropdown" i], [class*="Dropdown"], ' +
|
||||||
|
'button[aria-haspopup="listbox"], button[aria-haspopup="true"], ' +
|
||||||
|
'[aria-expanded]'
|
||||||
|
);
|
||||||
|
|
||||||
|
for (let i = 0; i < candidates.length; i++) {
|
||||||
|
const el = candidates[i] as HTMLElement;
|
||||||
|
const label = el.getAttribute('aria-label') || '';
|
||||||
|
const nearbyText = el.parentElement?.innerText || '';
|
||||||
|
|
||||||
|
// Check if this dropdown is related to language
|
||||||
|
const isLanguageRelated =
|
||||||
|
label.toLowerCase().includes('language') ||
|
||||||
|
label.toLowerCase().includes('sprache') ||
|
||||||
|
nearbyText.toLowerCase().includes('spoken language') ||
|
||||||
|
nearbyText.toLowerCase().includes('gesprochene sprache');
|
||||||
|
|
||||||
|
if (isLanguageRelated || candidates.length === 1) {
|
||||||
|
// Click to open the dropdown
|
||||||
|
el.click();
|
||||||
|
|
||||||
|
// Wait a frame for options to render
|
||||||
|
return new Promise<boolean>((resolve) => {
|
||||||
|
requestAnimationFrame(() => {
|
||||||
|
requestAnimationFrame(() => {
|
||||||
|
// Look for options
|
||||||
|
const options = document.querySelectorAll(
|
||||||
|
'[role="option"], [role="menuitem"], li[class*="option" i]'
|
||||||
|
);
|
||||||
|
|
||||||
|
for (let j = 0; j < options.length; j++) {
|
||||||
|
const opt = options[j] as HTMLElement;
|
||||||
|
const optText = opt.innerText?.trim() || '';
|
||||||
|
|
||||||
|
if (names.some(n => optText.includes(n))) {
|
||||||
|
opt.click();
|
||||||
|
resolve(true);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
resolve(false);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return Promise.resolve(false);
|
||||||
|
}, targetNames);
|
||||||
|
|
||||||
|
if (languageSet) {
|
||||||
|
this._logger.info('Selected spoken language via DOM evaluation fallback');
|
||||||
|
await this._page.waitForTimeout(500);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (!languageSet) {
|
if (!languageSet) {
|
||||||
this._logger.warn('Could not find/select spoken language in dropdown');
|
this._logger.warn('Could not find/select spoken language in dropdown');
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -264,12 +264,14 @@ export class JoinProcedure {
|
||||||
* Check if the bot is currently in the meeting (admitted from lobby).
|
* Check if the bot is currently in the meeting (admitted from lobby).
|
||||||
* Primary selector: button[id="hangup-button"] (confirmed by Recall.ai).
|
* Primary selector: button[id="hangup-button"] (confirmed by Recall.ai).
|
||||||
* Note: Teams uses `id` (not `data-tid`) for the hangup button since 2025 redesign.
|
* Note: Teams uses `id` (not `data-tid`) for the hangup button since 2025 redesign.
|
||||||
|
*
|
||||||
|
* For authenticated joins, Teams v2 sometimes renders differently.
|
||||||
|
* Additional fallback: check the URL for meeting patterns and DOM for call UI.
|
||||||
*/
|
*/
|
||||||
async isInMeeting(options: { waitForSeconds?: number } = {}): Promise<boolean> {
|
async isInMeeting(options: { waitForSeconds?: number } = {}): Promise<boolean> {
|
||||||
const timeout = (options.waitForSeconds || 5) * 1000;
|
const timeout = (options.waitForSeconds || 5) * 1000;
|
||||||
|
|
||||||
// Primary selector - confirmed by Recall.ai (Jan 2025)
|
// Primary selectors - known meeting UI elements
|
||||||
// Note: Teams now uses id="hangup-button" instead of data-tid="hangup-button"
|
|
||||||
const inMeetingSelectors = [
|
const inMeetingSelectors = [
|
||||||
'button[id="hangup-button"]',
|
'button[id="hangup-button"]',
|
||||||
'button[id="callingButtons-showMoreBtn"]',
|
'button[id="callingButtons-showMoreBtn"]',
|
||||||
|
|
@ -278,6 +280,16 @@ export class JoinProcedure {
|
||||||
'[data-tid="call-composite"]',
|
'[data-tid="call-composite"]',
|
||||||
'button[aria-label*="Leave"]',
|
'button[aria-label*="Leave"]',
|
||||||
'[data-tid="callingButtons-showMoreBtn"]',
|
'[data-tid="callingButtons-showMoreBtn"]',
|
||||||
|
// Teams v2 (2025+) additional selectors
|
||||||
|
'[data-tid="call-controls"]',
|
||||||
|
'[data-tid="meeting-composite"]',
|
||||||
|
'div[data-tid="video-gallery"]',
|
||||||
|
'button[aria-label*="Hang up"]',
|
||||||
|
'button[aria-label*="leave" i]',
|
||||||
|
// Mic/Camera toggle buttons are only visible in an active call
|
||||||
|
'button[id="microphone-button"]',
|
||||||
|
'button[data-tid="toggle-mute"]',
|
||||||
|
'[data-tid="microphone-button"]',
|
||||||
];
|
];
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
|
@ -287,8 +299,35 @@ export class JoinProcedure {
|
||||||
});
|
});
|
||||||
return true;
|
return true;
|
||||||
} catch {
|
} catch {
|
||||||
return false;
|
// Selector-based detection failed, try DOM evaluation as fallback
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Fallback: evaluate the page for meeting indicators
|
||||||
|
try {
|
||||||
|
const inMeeting = await this._page.evaluate(() => {
|
||||||
|
// Check for call-related aria roles and meeting elements
|
||||||
|
const bodyText = document.body?.innerText || '';
|
||||||
|
const meetingIndicators = [
|
||||||
|
'Leave', // Leave button text
|
||||||
|
'Mute', // Mic mute button
|
||||||
|
'Unmute', // Mic unmute button
|
||||||
|
'Turn off camera', // Camera control
|
||||||
|
'Turn on camera',
|
||||||
|
'Share', // Share screen
|
||||||
|
];
|
||||||
|
const found = meetingIndicators.filter(ind => bodyText.includes(ind));
|
||||||
|
// Need at least 2 meeting indicators to confirm we're in a meeting
|
||||||
|
return found.length >= 2;
|
||||||
|
});
|
||||||
|
if (inMeeting) {
|
||||||
|
this._logger.info('Detected meeting via DOM text analysis (fallback)');
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// Page may not be ready
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -437,7 +437,9 @@ export class BotOrchestrator {
|
||||||
headless: config.botHeadless,
|
headless: config.botHeadless,
|
||||||
args: [
|
args: [
|
||||||
'--use-fake-ui-for-media-stream', // Auto-accept media permissions
|
'--use-fake-ui-for-media-stream', // Auto-accept media permissions
|
||||||
'--use-fake-device-for-media-stream', // Use fake devices
|
// NOTE: --use-fake-device-for-media-stream is intentionally NOT used.
|
||||||
|
// We override getUserMedia via addInitScript to return a MediaStreamDestination
|
||||||
|
// that we control, so TTS audio can be injected into Teams' mic input.
|
||||||
'--disable-web-security',
|
'--disable-web-security',
|
||||||
'--disable-features=IsolateOrigins,site-per-process',
|
'--disable-features=IsolateOrigins,site-per-process',
|
||||||
'--autoplay-policy=no-user-gesture-required',
|
'--autoplay-policy=no-user-gesture-required',
|
||||||
|
|
@ -468,6 +470,10 @@ export class BotOrchestrator {
|
||||||
);
|
);
|
||||||
this._audioProcedure = new AudioProcedure(this._page, this._logger);
|
this._audioProcedure = new AudioProcedure(this._page, this._logger);
|
||||||
|
|
||||||
|
// Inject audio getUserMedia override BEFORE any navigation
|
||||||
|
// This ensures Teams gets our controlled audio stream when it calls getUserMedia
|
||||||
|
await this._audioProcedure.injectAudioOverride();
|
||||||
|
|
||||||
// Handle page errors
|
// Handle page errors
|
||||||
this._page.on('pageerror', (error) => {
|
this._page.on('pageerror', (error) => {
|
||||||
this._logger.error('Page error:', error);
|
this._logger.error('Page error:', error);
|
||||||
|
|
@ -537,11 +543,18 @@ export class BotOrchestrator {
|
||||||
// - Page is transitioning between states
|
// - Page is transitioning between states
|
||||||
// Only give up after several consecutive cycles with no signal
|
// Only give up after several consecutive cycles with no signal
|
||||||
consecutiveNoSignal++;
|
consecutiveNoSignal++;
|
||||||
this._logger.info(`No lobby/meeting signal detected (attempt ${consecutiveNoSignal}/${maxNoSignal}), waiting...`);
|
const currentUrl = this._page?.url() || 'unknown';
|
||||||
|
this._logger.info(`No lobby/meeting signal detected (attempt ${consecutiveNoSignal}/${maxNoSignal}), URL: ${currentUrl}`);
|
||||||
|
|
||||||
if (consecutiveNoSignal >= maxNoSignal) {
|
if (consecutiveNoSignal >= maxNoSignal) {
|
||||||
// Take a screenshot for debugging before giving up
|
// Take a screenshot and log page content for debugging before giving up
|
||||||
await this._takeScreenshot('no-meeting-signal');
|
await this._takeScreenshot('no-meeting-signal');
|
||||||
|
try {
|
||||||
|
const bodySnippet = await this._page?.evaluate(() =>
|
||||||
|
document.body?.innerText?.substring(0, 500) || '(empty)'
|
||||||
|
);
|
||||||
|
this._logger.warn(`Page content before giving up: ${bodySnippet}`);
|
||||||
|
} catch { /* ignore */ }
|
||||||
throw new Error('Bot was removed from lobby or meeting ended');
|
throw new Error('Bot was removed from lobby or meeting ended');
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue