fix: audio routing via MediaStream, auth context-destroyed handling, session cleanup

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
ValueOn AG 2026-02-15 15:19:32 +01:00
parent c420987dcb
commit 31c196978b
3 changed files with 91 additions and 7 deletions

View file

@ -29,9 +29,28 @@ export class AudioProcedure {
await this._page.evaluate(() => { await this._page.evaluate(() => {
// Create a global audio context // Create a global audio context
const AudioContext = window.AudioContext || (window as any).webkitAudioContext; const AudioContext = window.AudioContext || (window as any).webkitAudioContext;
(window as any).__audioContext = new AudioContext(); const ctx = new AudioContext();
(window as any).__audioContext = ctx;
(window as any).__audioQueue = []; (window as any).__audioQueue = [];
(window as any).__isPlaying = false; (window as any).__isPlaying = false;
// Create a MediaStream destination so audio is routed into the
// browser's virtual microphone (picked up by Teams) instead of
// the default speaker output (ctx.destination).
const streamDest = ctx.createMediaStreamDestination();
(window as any).__audioStreamDest = streamDest;
// Expose the stream so headless Chromium can pipe it as mic input.
// navigator.mediaDevices.getUserMedia will be overridden to return this stream.
const audioStream = streamDest.stream;
const originalGetUserMedia = navigator.mediaDevices.getUserMedia.bind(navigator.mediaDevices);
navigator.mediaDevices.getUserMedia = async (constraints?: MediaStreamConstraints) => {
// If requesting audio only, return our TTS stream
if (constraints && constraints.audio && !constraints.video) {
return audioStream;
}
return originalGetUserMedia(constraints);
};
}); });
this._audioContext = true; this._audioContext = true;
@ -83,10 +102,12 @@ export class AudioProcedure {
audioBuffer = await ctx.decodeAudioData(bytes.buffer); audioBuffer = await ctx.decodeAudioData(bytes.buffer);
} }
// Create source and play // Create source and play through the MediaStream destination
// so audio is routed into the Teams microphone input, not speakers
const source = ctx.createBufferSource(); const source = ctx.createBufferSource();
source.buffer = audioBuffer; source.buffer = audioBuffer;
source.connect(ctx.destination); const streamDest = (window as any).__audioStreamDest as MediaStreamAudioDestinationNode;
source.connect(streamDest || ctx.destination);
source.start(0); source.start(0);
// Return a promise that resolves when playback ends // Return a promise that resolves when playback ends

View file

@ -197,9 +197,43 @@ export class AuthProcedure {
/** /**
* Verify that authentication was successful. * Verify that authentication was successful.
* Checks if we're on a Microsoft/Teams page with an authenticated session. * Checks if we're on a Microsoft/Teams page with an authenticated session.
*
* Note: After successful login, Microsoft often triggers navigation/redirects
* which can destroy the execution context. An "Execution context was destroyed"
* error is treated as a successful login (navigation = login worked).
*/ */
private async _verifyAuthentication(): Promise<boolean> { private async _verifyAuthentication(): Promise<boolean> {
try { try {
// Wait for navigation that indicates login succeeded (redirect to Teams/Office)
try {
await this._page.waitForNavigation({
url: (url) =>
url.href.includes('teams.microsoft.com') ||
url.href.includes('office.com') ||
url.href.includes('myapps.microsoft.com') ||
url.href.includes('microsoftonline.com/common/oauth2'),
timeout: 15000,
});
this._logger.info('Navigation detected after login - authentication succeeded');
return true;
} catch (navError) {
const errorMessage = String(navError);
// "Execution context was destroyed" means the page navigated away
// from the login page, which indicates a successful login redirect
if (errorMessage.includes('Execution context was destroyed') ||
errorMessage.includes('execution context') ||
errorMessage.includes('navigation')) {
this._logger.info('Execution context destroyed during verification - treating as successful login (page navigated)');
// Give the page a moment to settle after navigation
await this._page.waitForTimeout(2000);
return true;
}
// Timeout - check where we ended up
this._logger.debug(`waitForNavigation did not match expected URL: ${navError}`);
}
const url = this._page.url(); const url = this._page.url();
// If we're on Teams or Microsoft portal, we're authenticated // If we're on Teams or Microsoft portal, we're authenticated
@ -237,6 +271,13 @@ export class AuthProcedure {
// If we're somewhere else entirely, assume authenticated // If we're somewhere else entirely, assume authenticated
return true; return true;
} catch (error) { } catch (error) {
const errorMessage = String(error);
// Catch "Execution context was destroyed" at the top level too
if (errorMessage.includes('Execution context was destroyed') ||
errorMessage.includes('execution context')) {
this._logger.info('Execution context destroyed during verification (top-level) - treating as successful login');
return true;
}
this._logger.error(`Authentication verification error: ${error}`); this._logger.error(`Authentication verification error: ${error}`);
return false; return false;
} }

View file

@ -96,11 +96,13 @@ export class SessionManager {
/** /**
* End a bot session and leave the meeting. * End a bot session and leave the meeting.
* Robust: handles cases where the session was already cleaned up
* (e.g. disconnected state removed it from the map).
*/ */
async endSession(sessionId: string): Promise<void> { async endSession(sessionId: string): Promise<void> {
const orchestrator = this._sessions.get(sessionId); const orchestrator = this._sessions.get(sessionId);
if (!orchestrator) { if (!orchestrator) {
logger.warn(`Session ${sessionId} not found`); logger.warn(`Session ${sessionId} not found for endSession - may have already been cleaned up`);
return; return;
} }
@ -108,7 +110,10 @@ export class SessionManager {
try { try {
await orchestrator.stop(); await orchestrator.stop();
} catch (error) {
logger.error(`Error stopping session ${sessionId}:`, error);
} finally { } finally {
// Always remove from map after explicit end
this._sessions.delete(sessionId); this._sessions.delete(sessionId);
} }
} }
@ -166,14 +171,31 @@ export class SessionManager {
/** /**
* Handle state changes from orchestrators. * Handle state changes from orchestrators.
*
* IMPORTANT: Do NOT delete from _sessions on 'disconnected' state.
* The orchestrator may enter 'disconnected' due to a transient WebSocket
* drop or browser crash. If we delete here, the Gateway's subsequent
* 'leave' command won't find the session in endSession().
* Cleanup is done explicitly in endSession() or shutdown().
* Only auto-remove on terminal 'error' state after a delay so the
* Gateway still has time to call endSession() first.
*/ */
private _handleStateChange(sessionId: string, state: BotState, message?: string): void { private _handleStateChange(sessionId: string, state: BotState, message?: string): void {
logger.info(`Session ${sessionId} state: ${state}${message ? ` - ${message}` : ''}`); logger.info(`Session ${sessionId} state: ${state}${message ? ` - ${message}` : ''}`);
// Clean up if disconnected or error if (state === 'error') {
if (state === 'disconnected' || state === 'error') { // Give Gateway a grace period to call endSession(), then auto-cleanup
this._sessions.delete(sessionId); setTimeout(() => {
if (this._sessions.has(sessionId)) {
const orch = this._sessions.get(sessionId);
if (orch && orch.state === 'error') {
logger.info(`Auto-cleaning stale error session ${sessionId}`);
this._sessions.delete(sessionId);
}
}
}, 30000); // 30s grace period
} }
// 'disconnected' state: do NOT delete - let endSession() handle it
} }
/** /**