fixes
This commit is contained in:
parent
89e6d442ab
commit
2293ba9552
8 changed files with 1447 additions and 241 deletions
1
package-lock.json
generated
1
package-lock.json
generated
|
|
@ -7,6 +7,7 @@
|
||||||
"": {
|
"": {
|
||||||
"name": "service-teams-browser-bot",
|
"name": "service-teams-browser-bot",
|
||||||
"version": "1.0.0",
|
"version": "1.0.0",
|
||||||
|
"hasInstallScript": true,
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"dotenv": "^16.4.1",
|
"dotenv": "^16.4.1",
|
||||||
"express": "^4.18.2",
|
"express": "^4.18.2",
|
||||||
|
|
|
||||||
|
|
@ -154,9 +154,9 @@ export class AudioCaptureProcedure {
|
||||||
async injectCaptureOverride(): Promise<void> {
|
async injectCaptureOverride(): Promise<void> {
|
||||||
if (this._injected) return;
|
if (this._injected) return;
|
||||||
|
|
||||||
this._logger.info('[AudioCapture] Injecting RTCPeerConnection wrapper...');
|
this._logger.info('[AudioCapture] Injecting RTCPeerConnection wrapper (all frames)...');
|
||||||
|
|
||||||
await this._page.addInitScript((workletCode: string) => {
|
await this._page.context().addInitScript((workletCode: string) => {
|
||||||
(window as any).__audioCaptureChunks = [] as any[];
|
(window as any).__audioCaptureChunks = [] as any[];
|
||||||
(window as any).__audioCaptureProcessors = {} as Record<string, any>;
|
(window as any).__audioCaptureProcessors = {} as Record<string, any>;
|
||||||
(window as any).__audioCaptureContexts = {} as Record<string, AudioContext>;
|
(window as any).__audioCaptureContexts = {} as Record<string, AudioContext>;
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
import { Page } from 'playwright';
|
import { Page } from 'playwright';
|
||||||
import { Logger } from 'winston';
|
import { Logger } from 'winston';
|
||||||
|
import { poweronMediaPatchInstall } from './mediaGetUserMediaPatch';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Handles audio playback in the Teams meeting.
|
* Handles audio playback in the Teams meeting.
|
||||||
|
|
@ -11,139 +12,113 @@ import { Logger } from 'winston';
|
||||||
* - When Teams calls getUserMedia, the wrapper:
|
* - When Teams calls getUserMedia, the wrapper:
|
||||||
* 1. Calls the REAL getUserMedia (which returns Chromium's fake device stream)
|
* 1. Calls the REAL getUserMedia (which returns Chromium's fake device stream)
|
||||||
* 2. Replaces the audio track with one from our MediaStreamDestination
|
* 2. Replaces the audio track with one from our MediaStreamDestination
|
||||||
* 3. Returns the modified stream (our audio + Chromium's fake video)
|
* 3. Returns the modified stream; optional canvas video track instead of fake video
|
||||||
* - When TTS audio is played, it's piped into the MediaStreamDestination,
|
* - When TTS audio is played, it's piped into the MediaStreamDestination,
|
||||||
* and Teams sends it via WebRTC to other meeting participants.
|
* and Teams sends it via WebRTC to other meeting participants.
|
||||||
*/
|
*/
|
||||||
|
export type AudioProcedureOptions = {
|
||||||
|
useCanvasVideo?: boolean;
|
||||||
|
/** Shown in the center of the canvas (e.g. bot display name) */
|
||||||
|
displayLabel?: string;
|
||||||
|
};
|
||||||
|
|
||||||
export class AudioProcedure {
|
export class AudioProcedure {
|
||||||
private _page: Page;
|
private _page: Page;
|
||||||
private _logger: Logger;
|
private _logger: Logger;
|
||||||
|
private _useCanvasVideo: boolean;
|
||||||
|
private _displayLabel: string;
|
||||||
private _audioContext: boolean = false;
|
private _audioContext: boolean = false;
|
||||||
private _initScriptInjected: boolean = false;
|
private _initScriptInjected: boolean = false;
|
||||||
private _audioQueue: Array<{ audioData: string; format: 'mp3' | 'wav' | 'pcm' }> = [];
|
private _audioQueue: Array<{ audioData: string; format: 'mp3' | 'wav' | 'pcm' }> = [];
|
||||||
private _isPlaying: boolean = false;
|
private _isPlaying: boolean = false;
|
||||||
private _stopRequested: boolean = false;
|
private _stopRequested: boolean = false;
|
||||||
|
|
||||||
constructor(page: Page, logger: Logger) {
|
constructor(page: Page, logger: Logger, options?: AudioProcedureOptions) {
|
||||||
this._page = page;
|
this._page = page;
|
||||||
this._logger = logger;
|
this._logger = logger;
|
||||||
|
this._useCanvasVideo = !!options?.useCanvasVideo;
|
||||||
|
this._displayLabel = (options?.displayLabel || 'Bot').trim() || 'Bot';
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Inject the getUserMedia wrapper BEFORE any page navigation.
|
* Inject the getUserMedia wrapper BEFORE any page navigation.
|
||||||
* This MUST be called before navigating to Teams.
|
* This MUST be called before navigating to Teams.
|
||||||
* Uses page.addInitScript so it runs in every new document context.
|
* Uses browserContext.addInitScript so the hook runs in the main page and
|
||||||
|
* in embedded frames (Teams often runs media/WebRTC in an iframe; page-only
|
||||||
|
* injection would miss getUserMedia and you would only see the fake device).
|
||||||
*/
|
*/
|
||||||
async injectAudioOverride(): Promise<void> {
|
async injectAudioOverride(): Promise<void> {
|
||||||
if (this._initScriptInjected) {
|
if (this._initScriptInjected) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
this._logger.info('Injecting audio getUserMedia override...');
|
this._logger.info(
|
||||||
|
`Injecting audio getUserMedia override (canvasVideo=${this._useCanvasVideo}, label="${this._displayLabel}")...`,
|
||||||
await this._page.addInitScript(() => {
|
|
||||||
// Create a shared AudioContext and MediaStreamDestination for TTS injection
|
|
||||||
const AudioContextClass = window.AudioContext || (window as any).webkitAudioContext;
|
|
||||||
const ctx = new AudioContextClass();
|
|
||||||
const streamDest = ctx.createMediaStreamDestination();
|
|
||||||
|
|
||||||
// Store globally for later TTS injection
|
|
||||||
(window as any).__ttsAudioContext = ctx;
|
|
||||||
(window as any).__ttsStreamDest = streamDest;
|
|
||||||
(window as any).__ttsAudioStream = streamDest.stream;
|
|
||||||
|
|
||||||
// Wrap getUserMedia to replace audio tracks with our TTS-injectable stream
|
|
||||||
const originalGetUserMedia = navigator.mediaDevices.getUserMedia.bind(navigator.mediaDevices);
|
|
||||||
navigator.mediaDevices.getUserMedia = async (constraints?: MediaStreamConstraints) => {
|
|
||||||
// Get the real stream (from Chromium's fake devices)
|
|
||||||
const realStream = await originalGetUserMedia(constraints);
|
|
||||||
|
|
||||||
if (constraints && constraints.audio) {
|
|
||||||
// Build a new stream: our TTS audio track + their video tracks
|
|
||||||
const combinedStream = new MediaStream();
|
|
||||||
|
|
||||||
// Clone the TTS track so Teams can't kill the original via track.stop()
|
|
||||||
streamDest.stream.getAudioTracks().forEach(t => combinedStream.addTrack(t.clone()));
|
|
||||||
|
|
||||||
// Keep the real video tracks (from fake camera)
|
|
||||||
realStream.getVideoTracks().forEach(t => combinedStream.addTrack(t));
|
|
||||||
|
|
||||||
// Diagnostic signal for production logs: confirms override really feeds Teams.
|
|
||||||
try {
|
|
||||||
const audioTracks = combinedStream.getAudioTracks();
|
|
||||||
const videoTracks = combinedStream.getVideoTracks();
|
|
||||||
console.log(
|
|
||||||
`[AudioPlayback] getUserMedia override active: audioTracks=${audioTracks.length}, videoTracks=${videoTracks.length}, audioLabel="${audioTracks[0]?.label || 'n/a'}"`,
|
|
||||||
);
|
);
|
||||||
} catch {
|
|
||||||
// ignore
|
|
||||||
}
|
|
||||||
|
|
||||||
return combinedStream;
|
await this._page.context().addInitScript(poweronMediaPatchInstall, {
|
||||||
}
|
useCanvasVideo: this._useCanvasVideo,
|
||||||
|
displayLabel: this._displayLabel,
|
||||||
// No audio requested - return the real stream as-is
|
|
||||||
return realStream;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Force all RTCPeerConnection audio senders to use our TTS track.
|
|
||||||
// This ensures Teams actually sends our audio even if getUserMedia
|
|
||||||
// override happened in a different context or was renegotiated.
|
|
||||||
(window as any).__forceTtsTrackToSenders = async () => {
|
|
||||||
const pcs = ((window as any).__audioCapturePeerConnections || []) as RTCPeerConnection[];
|
|
||||||
const ttsTrack = streamDest.stream.getAudioTracks()?.[0];
|
|
||||||
if (!ttsTrack) return { replaced: 0, pcs: pcs?.length || 0, reason: 'no-tts-track' };
|
|
||||||
|
|
||||||
// #region agent log
|
|
||||||
const diag: Record<string, any> = {
|
|
||||||
ttsTrackId: ttsTrack.id,
|
|
||||||
ttsTrackEnabled: ttsTrack.enabled,
|
|
||||||
ttsTrackReadyState: ttsTrack.readyState,
|
|
||||||
ttsTrackMuted: ttsTrack.muted,
|
|
||||||
beforeSenderTrackIds: [] as string[],
|
|
||||||
afterSenderTrackIds: [] as string[],
|
|
||||||
};
|
|
||||||
// #endregion
|
|
||||||
|
|
||||||
let replaced = 0;
|
|
||||||
for (const pc of pcs) {
|
|
||||||
try {
|
|
||||||
const senders = pc.getSenders?.() || [];
|
|
||||||
for (const sender of senders) {
|
|
||||||
if (sender?.track?.kind === 'audio') {
|
|
||||||
// #region agent log
|
|
||||||
diag.beforeSenderTrackIds.push(sender.track.id);
|
|
||||||
// #endregion
|
|
||||||
const freshClone = ttsTrack.clone();
|
|
||||||
await sender.replaceTrack(freshClone);
|
|
||||||
replaced++;
|
|
||||||
// #region agent log
|
|
||||||
const afterTrack = sender.track;
|
|
||||||
diag.afterSenderTrackIds.push(afterTrack?.id || 'null');
|
|
||||||
diag.afterSenderTrackEnabled = afterTrack?.enabled;
|
|
||||||
diag.afterSenderTrackReadyState = afterTrack?.readyState;
|
|
||||||
diag.originalTrackState = ttsTrack.readyState;
|
|
||||||
if (afterTrack && !afterTrack.enabled) {
|
|
||||||
afterTrack.enabled = true;
|
|
||||||
diag.forcedEnabled = true;
|
|
||||||
}
|
|
||||||
// #endregion
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (err: any) {
|
|
||||||
// #region agent log
|
|
||||||
diag.error = String(err?.message || err);
|
|
||||||
// #endregion
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return { replaced, pcs: pcs?.length || 0, reason: 'ok', diag };
|
|
||||||
};
|
|
||||||
});
|
});
|
||||||
|
|
||||||
this._initScriptInjected = true;
|
this._initScriptInjected = true;
|
||||||
this._logger.info('Audio getUserMedia override injected');
|
this._logger.info('Audio getUserMedia override injected');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Re-run the media patch in every frame. Needed when Teams replaces the document
|
||||||
|
* in an iframe (addInitScript runs too early) or overwrites getUserMedia.
|
||||||
|
*/
|
||||||
|
async reinstallMediaPatchInAllFrames(): Promise<void> {
|
||||||
|
const payload = { useCanvasVideo: this._useCanvasVideo, displayLabel: this._displayLabel };
|
||||||
|
for (const frame of this._page.frames()) {
|
||||||
|
try {
|
||||||
|
await frame.evaluate(poweronMediaPatchInstall, payload);
|
||||||
|
} catch (e) {
|
||||||
|
this._logger.info(`[mediaPatch] frame skipped: ${e}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
await this._forceCanvasVideoInAllFrames('reinstall');
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Replace outbound video in every frame. Teams may run WebRTC in a subframe;
|
||||||
|
* only touching the main window leaves Chromium's default fake (green) video.
|
||||||
|
*/
|
||||||
|
private async _forceCanvasVideoInAllFrames(phase: string): Promise<void> {
|
||||||
|
if (!this._useCanvasVideo) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const parts: string[] = [];
|
||||||
|
for (const frame of this._page.frames()) {
|
||||||
|
try {
|
||||||
|
const r = await frame.evaluate(async () => {
|
||||||
|
const w = window as any;
|
||||||
|
w.__startBotAvatarStream?.();
|
||||||
|
return w.__forceVideoTrackToSenders?.();
|
||||||
|
});
|
||||||
|
const shortUrl = (() => {
|
||||||
|
try {
|
||||||
|
return frame.url().substring(0, 100);
|
||||||
|
} catch {
|
||||||
|
return '(no-url)';
|
||||||
|
}
|
||||||
|
})();
|
||||||
|
const rr: any = r || {};
|
||||||
|
parts.push(
|
||||||
|
`[${shortUrl}] r=${rr.replaced ?? 0} add=${rr.added ?? 0} pcs=${rr.pcs ?? 0} `
|
||||||
|
+ `tx=${rr.totalTransceivers ?? 0} vidTx=${rr.videoTransceivers ?? 0} `
|
||||||
|
+ `vidWith=${rr.videoSendersWithTrack ?? 0} vidNoTrack=${rr.videoSendersWithoutTrack ?? 0} `
|
||||||
|
+ `dirB=[${(rr.directionsBefore || []).join(',')}] dirA=[${(rr.directionsAfter || []).join(',')}] `
|
||||||
|
+ `${rr.reason || ''}`.trim(),
|
||||||
|
);
|
||||||
|
} catch (e: any) {
|
||||||
|
parts.push(`err=${String(e?.message || e).slice(0, 64)}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
this._logger.info(`Canvas video ${phase}: ${parts.join(' | ')}`);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Initialize the audio context in the browser for TTS playback.
|
* Initialize the audio context in the browser for TTS playback.
|
||||||
* Must be called after joining the meeting (user gesture context).
|
* Must be called after joining the meeting (user gesture context).
|
||||||
|
|
@ -175,6 +150,10 @@ export class AudioProcedure {
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
if (this._useCanvasVideo) {
|
||||||
|
await this._forceCanvasVideoInAllFrames('init');
|
||||||
|
}
|
||||||
|
|
||||||
this._audioContext = true;
|
this._audioContext = true;
|
||||||
this._logger.info('Audio context initialized');
|
this._logger.info('Audio context initialized');
|
||||||
}
|
}
|
||||||
|
|
@ -279,6 +258,10 @@ export class AudioProcedure {
|
||||||
);
|
);
|
||||||
// #endregion
|
// #endregion
|
||||||
|
|
||||||
|
if (this._useCanvasVideo) {
|
||||||
|
await this._forceCanvasVideoInAllFrames('tts');
|
||||||
|
}
|
||||||
|
|
||||||
// Collect WebRTC stats BEFORE playback
|
// Collect WebRTC stats BEFORE playback
|
||||||
// #region agent log
|
// #region agent log
|
||||||
const statsBefore = await this._page.evaluate(async () => {
|
const statsBefore = await this._page.evaluate(async () => {
|
||||||
|
|
@ -405,12 +388,36 @@ export class AudioProcedure {
|
||||||
*/
|
*/
|
||||||
async cleanup(): Promise<void> {
|
async cleanup(): Promise<void> {
|
||||||
try {
|
try {
|
||||||
await this._page.evaluate(() => {
|
for (const frame of this._page.frames()) {
|
||||||
const ctx = (window as any).__ttsAudioContext as AudioContext;
|
try {
|
||||||
if (ctx) {
|
await frame.evaluate(() => {
|
||||||
ctx.close();
|
const w = window as any;
|
||||||
|
if (w.__botAvatarDrawInterval) {
|
||||||
|
clearInterval(w.__botAvatarDrawInterval);
|
||||||
|
w.__botAvatarDrawInterval = null;
|
||||||
|
}
|
||||||
|
if (w.__botAvatarVideoTrack) {
|
||||||
|
try {
|
||||||
|
w.__botAvatarVideoTrack.stop();
|
||||||
|
} catch {
|
||||||
|
// ignore
|
||||||
|
}
|
||||||
|
w.__botAvatarVideoTrack = null;
|
||||||
|
}
|
||||||
|
if (w.__botAvatarCanvas && w.__botAvatarCanvas.remove) {
|
||||||
|
w.__botAvatarCanvas.remove();
|
||||||
|
w.__botAvatarCanvas = null;
|
||||||
|
}
|
||||||
|
w.__botAvatarStreamStarted = false;
|
||||||
|
const actx = w.__ttsAudioContext as AudioContext;
|
||||||
|
if (actx) {
|
||||||
|
actx.close();
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
} catch {
|
||||||
|
// cross-origin or closed frame
|
||||||
|
}
|
||||||
|
}
|
||||||
} catch {
|
} catch {
|
||||||
// Page might be closed
|
// Page might be closed
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -19,6 +19,69 @@ export class BackgroundProcedure {
|
||||||
this._logger = logger;
|
this._logger = logger;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Open background effects and select "no" virtual background (camera only).
|
||||||
|
* Teams can show a flat green/gray placeholder when a background effect is
|
||||||
|
* on even when the feed is a fake or canvas source.
|
||||||
|
*/
|
||||||
|
async trySelectNoVirtualBackground(): Promise<boolean> {
|
||||||
|
try {
|
||||||
|
const opened = await this._openBackgroundEffectsPanel();
|
||||||
|
if (!opened) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
await this._page.waitForTimeout(500);
|
||||||
|
|
||||||
|
const noEffectSelectors: string[] = [
|
||||||
|
'button[aria-label*="None" i]',
|
||||||
|
'button[aria-label*="Kein" i]',
|
||||||
|
'button[aria-label*="ohne" i]',
|
||||||
|
'button[aria-label*="off" i][aria-label*="background" i]',
|
||||||
|
'button[aria-label*="Hintergrund entfernen" i]',
|
||||||
|
'[data-tid="background-item-none"]',
|
||||||
|
'button[role="tab"][name="None" i]',
|
||||||
|
];
|
||||||
|
for (const sel of noEffectSelectors) {
|
||||||
|
const btn = await this._page.$(sel);
|
||||||
|
if (btn) {
|
||||||
|
await btn.click();
|
||||||
|
this._logger.info(`Selected no background effect: ${sel}`);
|
||||||
|
await this._page.waitForTimeout(500);
|
||||||
|
await this._dismissPanelIfOpen();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// First gallery tile (often "none" or blur off) in many Teams builds
|
||||||
|
const tile = await this._page.$(
|
||||||
|
'[data-tid="background-image"], [class*="background-item"], li[role="listitem"] button',
|
||||||
|
);
|
||||||
|
if (tile) {
|
||||||
|
await tile.click();
|
||||||
|
this._logger.info('Clicked first background effects tile (often no effect)');
|
||||||
|
await this._page.waitForTimeout(400);
|
||||||
|
await this._dismissPanelIfOpen();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
await this._dismissPanelIfOpen();
|
||||||
|
this._logger.warn('Could not find "no background" control');
|
||||||
|
return false;
|
||||||
|
} catch (e) {
|
||||||
|
this._logger.warn(`trySelectNoVirtualBackground: ${e}`);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private async _dismissPanelIfOpen(): Promise<void> {
|
||||||
|
try {
|
||||||
|
await this._page.keyboard.press('Escape');
|
||||||
|
await this._page.waitForTimeout(200);
|
||||||
|
} catch {
|
||||||
|
// ignore
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set a virtual background from a URL on the Teams pre-join screen.
|
* Set a virtual background from a URL on the Teams pre-join screen.
|
||||||
*
|
*
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load diff
385
src/bot/mediaGetUserMediaPatch.ts
Normal file
385
src/bot/mediaGetUserMediaPatch.ts
Normal file
|
|
@ -0,0 +1,385 @@
|
||||||
|
/**
|
||||||
|
* Injected in the browser: wraps getUserMedia, TTS destination, optional canvas
|
||||||
|
* video. Must be a single self-contained function for Playwright serialization.
|
||||||
|
* Re-calling this on the same document re-patches gUM and reuses the saved
|
||||||
|
* Chromium getUserMedia + AudioContext when present (Teams can replace
|
||||||
|
* navigator.mediaDevices.getUserMedia after a document/iframe refresh).
|
||||||
|
*/
|
||||||
|
export type MediaGetUserMediaPatchOptions = {
|
||||||
|
useCanvasVideo: boolean;
|
||||||
|
displayLabel: string;
|
||||||
|
};
|
||||||
|
|
||||||
|
export const poweronMediaPatchInstall = (opts: MediaGetUserMediaPatchOptions) => {
|
||||||
|
'use strict';
|
||||||
|
const { useCanvasVideo, displayLabel } = opts;
|
||||||
|
const w: any = window as any;
|
||||||
|
|
||||||
|
if (!w.__gumChromium) {
|
||||||
|
w.__gumChromium = (navigator.mediaDevices as any).getUserMedia.bind(navigator.mediaDevices);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Patch RTCPeerConnection.prototype methods once per realm to observe + react to Teams' track placement.
|
||||||
|
if (!w.__poweronRtcPatched && (window as any).RTCPeerConnection) {
|
||||||
|
w.__poweronRtcPatched = true;
|
||||||
|
const RTCProto: any = (window as any).RTCPeerConnection.prototype;
|
||||||
|
const _origAddTrack = RTCProto.addTrack;
|
||||||
|
const _origAddTransceiver = RTCProto.addTransceiver;
|
||||||
|
RTCProto.addTrack = function (track: MediaStreamTrack, ...streams: MediaStream[]) {
|
||||||
|
try {
|
||||||
|
// eslint-disable-next-line no-console
|
||||||
|
console.log(
|
||||||
|
'[AudioPlayback] pc.addTrack kind=' + (track && track.kind)
|
||||||
|
+ ' id=' + (track && track.id)
|
||||||
|
+ ' enabled=' + (track && track.enabled),
|
||||||
|
);
|
||||||
|
} catch {
|
||||||
|
// ignore
|
||||||
|
}
|
||||||
|
let useTrack: MediaStreamTrack = track;
|
||||||
|
try {
|
||||||
|
if (useCanvasVideo && track && track.kind === 'video') {
|
||||||
|
if (typeof w.__startBotAvatarStream === 'function') {
|
||||||
|
w.__startBotAvatarStream();
|
||||||
|
}
|
||||||
|
const av: MediaStreamTrack | undefined = w.__botAvatarVideoTrack;
|
||||||
|
if (av && av.readyState === 'live') {
|
||||||
|
try {
|
||||||
|
track.stop();
|
||||||
|
} catch {
|
||||||
|
// ignore
|
||||||
|
}
|
||||||
|
useTrack = av.clone();
|
||||||
|
// eslint-disable-next-line no-console
|
||||||
|
console.log('[AudioPlayback] pc.addTrack swapped video -> avatar id=' + useTrack.id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// ignore
|
||||||
|
}
|
||||||
|
return _origAddTrack.call(this, useTrack, ...streams);
|
||||||
|
};
|
||||||
|
RTCProto.addTransceiver = function (trackOrKind: any, init?: any) {
|
||||||
|
try {
|
||||||
|
const k = typeof trackOrKind === 'string' ? trackOrKind : trackOrKind?.kind;
|
||||||
|
// eslint-disable-next-line no-console
|
||||||
|
console.log(
|
||||||
|
'[AudioPlayback] pc.addTransceiver kind=' + k
|
||||||
|
+ ' direction=' + (init && init.direction),
|
||||||
|
);
|
||||||
|
} catch {
|
||||||
|
// ignore
|
||||||
|
}
|
||||||
|
return _origAddTransceiver.call(this, trackOrKind, init);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!w.__ttsStreamDest) {
|
||||||
|
const AudioContextClass = (window as any).AudioContext || (window as any).webkitAudioContext;
|
||||||
|
const ctx: AudioContext = new AudioContextClass();
|
||||||
|
const streamDest: MediaStreamAudioDestinationNode = ctx.createMediaStreamDestination();
|
||||||
|
w.__ttsAudioContext = ctx;
|
||||||
|
w.__ttsStreamDest = streamDest;
|
||||||
|
w.__ttsAudioStream = streamDest.stream;
|
||||||
|
}
|
||||||
|
const streamDest = w.__ttsStreamDest as MediaStreamAudioDestinationNode;
|
||||||
|
if (!streamDest) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const _fps = 15;
|
||||||
|
w.__startBotAvatarStream = () => {
|
||||||
|
if (
|
||||||
|
w.__botAvatarStreamStarted
|
||||||
|
&& w.__botAvatarVideoTrack
|
||||||
|
&& w.__botAvatarVideoTrack.readyState === 'live'
|
||||||
|
&& w.__botAvatarCanvas
|
||||||
|
&& w.__botAvatarCanvas.isConnected
|
||||||
|
) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (w.__botAvatarDrawInterval) {
|
||||||
|
clearInterval(w.__botAvatarDrawInterval);
|
||||||
|
w.__botAvatarDrawInterval = null;
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
w.__botAvatarVideoTrack?.stop?.();
|
||||||
|
} catch {
|
||||||
|
// ignore
|
||||||
|
}
|
||||||
|
w.__botAvatarStreamStarted = true;
|
||||||
|
w.__botAvatarDisplayLabel = displayLabel;
|
||||||
|
const canvas = document.createElement('canvas');
|
||||||
|
canvas.width = 640;
|
||||||
|
canvas.height = 360;
|
||||||
|
canvas.setAttribute('data-poweron-avatar', '1');
|
||||||
|
canvas.style.cssText =
|
||||||
|
'position:fixed;right:0;bottom:0;width:4px;height:4px;z-index:2147483646;opacity:1;pointer-events:none;';
|
||||||
|
(document.body || document.documentElement).appendChild(canvas);
|
||||||
|
w.__botAvatarCanvas = canvas;
|
||||||
|
const c2d = canvas.getContext('2d');
|
||||||
|
let t = 0;
|
||||||
|
const draw = () => {
|
||||||
|
if (!c2d) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
t += 0.04;
|
||||||
|
const wPx = canvas.width;
|
||||||
|
const hPx = canvas.height;
|
||||||
|
c2d.fillStyle = '#061525';
|
||||||
|
c2d.fillRect(0, 0, wPx, hPx);
|
||||||
|
const g = c2d.createLinearGradient(0, 0, wPx, hPx);
|
||||||
|
g.addColorStop(0, '#1a4f8c');
|
||||||
|
g.addColorStop(0.5, '#0c305a');
|
||||||
|
g.addColorStop(1, '#132e6e');
|
||||||
|
c2d.fillStyle = g;
|
||||||
|
c2d.fillRect(0, 0, wPx, hPx);
|
||||||
|
c2d.strokeStyle = 'rgba(255, 200, 80, 0.95)';
|
||||||
|
c2d.lineWidth = 3;
|
||||||
|
c2d.strokeRect(6, 6, wPx - 12, hPx - 12);
|
||||||
|
c2d.fillStyle = 'rgba(255, 220, 120, 0.95)';
|
||||||
|
c2d.font = '600 13px system-ui, "Segoe UI", sans-serif';
|
||||||
|
c2d.textAlign = 'left';
|
||||||
|
c2d.textBaseline = 'top';
|
||||||
|
c2d.fillText('PORTA', 14, 10);
|
||||||
|
c2d.textAlign = 'center';
|
||||||
|
c2d.textBaseline = 'middle';
|
||||||
|
c2d.fillStyle = '#ffffff';
|
||||||
|
c2d.font = 'bold 28px system-ui, "Segoe UI", sans-serif';
|
||||||
|
const line = (w.__botAvatarDisplayLabel || displayLabel).toString().slice(0, 72);
|
||||||
|
c2d.fillText(line, wPx / 2, hPx / 2 - 6);
|
||||||
|
c2d.fillStyle = 'rgba(255,255,255,0.78)';
|
||||||
|
c2d.font = '14px system-ui, "Segoe UI", sans-serif';
|
||||||
|
c2d.fillText('poweron', wPx / 2, hPx / 2 + 26);
|
||||||
|
const pulse = 0.75 + 0.25 * Math.sin(t);
|
||||||
|
c2d.fillStyle = 'rgba(120, 200, 255, ' + 0.15 * pulse + ')';
|
||||||
|
c2d.fillRect(0, 0, wPx, 6);
|
||||||
|
c2d.fillRect(0, hPx - 6, wPx, 6);
|
||||||
|
};
|
||||||
|
draw();
|
||||||
|
w.__botAvatarDrawInterval = window.setInterval(draw, 1000 / _fps);
|
||||||
|
const cap = canvas.captureStream(_fps);
|
||||||
|
w.__botAvatarVideoTrack = cap.getVideoTracks()[0];
|
||||||
|
if (w.__botAvatarVideoTrack) {
|
||||||
|
w.__botAvatarVideoTrack.enabled = true;
|
||||||
|
try {
|
||||||
|
w.__botAvatarVideoTrack.contentHint = 'motion';
|
||||||
|
} catch {
|
||||||
|
// ignore
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// eslint-disable-next-line no-console
|
||||||
|
console.log(
|
||||||
|
'[AudioPlayback] canvas avatar stream (re)built, videoTrack=',
|
||||||
|
w.__botAvatarVideoTrack ? w.__botAvatarVideoTrack.id : 'none',
|
||||||
|
);
|
||||||
|
};
|
||||||
|
|
||||||
|
w.__forceVideoTrackToSenders = async () => {
|
||||||
|
if (!useCanvasVideo) {
|
||||||
|
return { replaced: 0, pcs: 0, reason: 'canvas-video-off' };
|
||||||
|
}
|
||||||
|
w.__startBotAvatarStream();
|
||||||
|
const src: MediaStreamTrack | undefined = w.__botAvatarVideoTrack;
|
||||||
|
if (!src) {
|
||||||
|
return { replaced: 0, pcs: 0, reason: 'no-avatar-track' };
|
||||||
|
}
|
||||||
|
const pcs: RTCPeerConnection[] = (w.__audioCapturePeerConnections || []) as RTCPeerConnection[];
|
||||||
|
let replaced = 0;
|
||||||
|
let added = 0;
|
||||||
|
let videoTransceivers = 0;
|
||||||
|
let videoSendersWithTrack = 0;
|
||||||
|
let videoSendersWithoutTrack = 0;
|
||||||
|
let totalTransceivers = 0;
|
||||||
|
const directionsBefore: string[] = [];
|
||||||
|
const directionsAfter: string[] = [];
|
||||||
|
for (const pc of pcs) {
|
||||||
|
const transceivers = (pc as any).getTransceivers?.() || [];
|
||||||
|
totalTransceivers += transceivers.length;
|
||||||
|
let pcHasVideoSender = false;
|
||||||
|
for (const t of transceivers) {
|
||||||
|
const sender = t.sender;
|
||||||
|
if (!sender) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const senderKind = sender.track?.kind;
|
||||||
|
const receiverKind = t.receiver?.track?.kind;
|
||||||
|
const txKind = (t as any).kind || senderKind || receiverKind || null;
|
||||||
|
if (txKind !== 'video') {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
videoTransceivers++;
|
||||||
|
pcHasVideoSender = true;
|
||||||
|
directionsBefore.push(t.direction);
|
||||||
|
if (sender.track) {
|
||||||
|
videoSendersWithTrack++;
|
||||||
|
} else {
|
||||||
|
videoSendersWithoutTrack++;
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
// eslint-disable-next-line no-await-in-loop
|
||||||
|
await sender.replaceTrack(src.clone());
|
||||||
|
replaced++;
|
||||||
|
const tr = sender.track;
|
||||||
|
if (tr && !tr.enabled) {
|
||||||
|
tr.enabled = true;
|
||||||
|
}
|
||||||
|
if (t.direction === 'inactive' || t.direction === 'recvonly') {
|
||||||
|
try {
|
||||||
|
t.direction = 'sendrecv';
|
||||||
|
} catch {
|
||||||
|
// ignore
|
||||||
|
}
|
||||||
|
}
|
||||||
|
directionsAfter.push(t.direction);
|
||||||
|
} catch (err: any) {
|
||||||
|
directionsAfter.push('err:' + String(err && err.message ? err.message : err).slice(0, 32));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!pcHasVideoSender) {
|
||||||
|
try {
|
||||||
|
const newSender = (pc as any).addTrack(src.clone(), w.__botAvatarCanvas?.captureStream
|
||||||
|
? w.__botAvatarCanvas.captureStream(15)
|
||||||
|
: new MediaStream([src.clone()]));
|
||||||
|
if (newSender) {
|
||||||
|
added++;
|
||||||
|
}
|
||||||
|
} catch (err) {
|
||||||
|
directionsAfter.push('addTrack-err:' + String((err as any)?.message || err).slice(0, 32));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return {
|
||||||
|
replaced,
|
||||||
|
added,
|
||||||
|
pcs: pcs.length,
|
||||||
|
reason: 'ok',
|
||||||
|
videoTransceivers,
|
||||||
|
videoSendersWithTrack,
|
||||||
|
videoSendersWithoutTrack,
|
||||||
|
totalTransceivers,
|
||||||
|
directionsBefore,
|
||||||
|
directionsAfter,
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
const _wrappedGUM = async (constraints?: MediaStreamConstraints) => {
|
||||||
|
// eslint-disable-next-line no-console
|
||||||
|
console.log(
|
||||||
|
'[AudioPlayback] gUM call audio=' + !!(constraints && constraints.audio)
|
||||||
|
+ ' video=' + !!(constraints && constraints.video),
|
||||||
|
);
|
||||||
|
// eslint-disable-next-line no-restricted-globals
|
||||||
|
const realStream = await w.__gumChromium(constraints);
|
||||||
|
const wantAudio = !!(constraints && constraints.audio);
|
||||||
|
const wantVideo = !!(constraints && constraints.video);
|
||||||
|
|
||||||
|
if (useCanvasVideo && wantVideo) {
|
||||||
|
w.__startBotAvatarStream();
|
||||||
|
const vt: MediaStreamTrack | undefined = w.__botAvatarVideoTrack;
|
||||||
|
if (!vt) {
|
||||||
|
return realStream;
|
||||||
|
}
|
||||||
|
const vClone = vt.clone();
|
||||||
|
if (wantAudio) {
|
||||||
|
const combinedStream = new MediaStream();
|
||||||
|
streamDest.stream.getAudioTracks().forEach((t: MediaStreamTrack) => combinedStream.addTrack(t.clone()));
|
||||||
|
combinedStream.addTrack(vClone);
|
||||||
|
try {
|
||||||
|
realStream.getTracks().forEach(t => t.stop());
|
||||||
|
} catch {
|
||||||
|
// ignore
|
||||||
|
}
|
||||||
|
// eslint-disable-next-line no-console
|
||||||
|
console.log(
|
||||||
|
'[AudioPlayback] getUserMedia (canvas+tts): a=' + combinedStream.getAudioTracks().length
|
||||||
|
+ ' v=' + combinedStream.getVideoTracks().length,
|
||||||
|
);
|
||||||
|
return combinedStream;
|
||||||
|
}
|
||||||
|
const videoOnly = new MediaStream();
|
||||||
|
videoOnly.addTrack(vClone);
|
||||||
|
try {
|
||||||
|
realStream.getTracks().forEach(t => t.stop());
|
||||||
|
} catch {
|
||||||
|
// ignore
|
||||||
|
}
|
||||||
|
return videoOnly;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (wantAudio) {
|
||||||
|
const combinedStream = new MediaStream();
|
||||||
|
streamDest.stream.getAudioTracks().forEach((t: MediaStreamTrack) => combinedStream.addTrack(t.clone()));
|
||||||
|
realStream.getVideoTracks().forEach(t => combinedStream.addTrack(t));
|
||||||
|
// eslint-disable-next-line no-console
|
||||||
|
console.log(
|
||||||
|
'[AudioPlayback] gUM audio: a=' + combinedStream.getAudioTracks().length
|
||||||
|
+ ' v=' + combinedStream.getVideoTracks().length,
|
||||||
|
);
|
||||||
|
return combinedStream;
|
||||||
|
}
|
||||||
|
return realStream;
|
||||||
|
};
|
||||||
|
|
||||||
|
try {
|
||||||
|
Object.defineProperty(navigator.mediaDevices, 'getUserMedia', {
|
||||||
|
configurable: true,
|
||||||
|
enumerable: true,
|
||||||
|
writable: true,
|
||||||
|
value: _wrappedGUM,
|
||||||
|
});
|
||||||
|
} catch {
|
||||||
|
(navigator.mediaDevices as any).getUserMedia = _wrappedGUM;
|
||||||
|
}
|
||||||
|
// Some libraries cache navigator.getUserMedia (legacy)
|
||||||
|
try {
|
||||||
|
(navigator as any).getUserMedia = (constraints: MediaStreamConstraints, ok: any, err: any) => {
|
||||||
|
_wrappedGUM(constraints).then(ok, err);
|
||||||
|
};
|
||||||
|
} catch {
|
||||||
|
// ignore
|
||||||
|
}
|
||||||
|
|
||||||
|
w.__forceTtsTrackToSenders = async () => {
|
||||||
|
const pcs: RTCPeerConnection[] = (w.__audioCapturePeerConnections || []) as RTCPeerConnection[];
|
||||||
|
const ttsTrack = streamDest.stream.getAudioTracks()?.[0];
|
||||||
|
if (!ttsTrack) {
|
||||||
|
return { replaced: 0, pcs: pcs?.length || 0, reason: 'no-tts-track' };
|
||||||
|
}
|
||||||
|
const diag: Record<string, any> = {
|
||||||
|
ttsTrackId: ttsTrack.id,
|
||||||
|
ttsTrackEnabled: ttsTrack.enabled,
|
||||||
|
ttsTrackReadyState: ttsTrack.readyState,
|
||||||
|
ttsTrackMuted: ttsTrack.muted,
|
||||||
|
beforeSenderTrackIds: [] as string[],
|
||||||
|
afterSenderTrackIds: [] as string[],
|
||||||
|
};
|
||||||
|
let replaced = 0;
|
||||||
|
for (const pc of pcs) {
|
||||||
|
try {
|
||||||
|
const senders = pc.getSenders?.() || [];
|
||||||
|
for (const sender of senders) {
|
||||||
|
if (sender?.track?.kind === 'audio') {
|
||||||
|
diag.beforeSenderTrackIds.push(sender.track.id);
|
||||||
|
const freshClone = ttsTrack.clone();
|
||||||
|
// eslint-disable-next-line no-await-in-loop
|
||||||
|
await sender.replaceTrack(freshClone);
|
||||||
|
replaced++;
|
||||||
|
const afterTrack = sender.track;
|
||||||
|
diag.afterSenderTrackIds.push(afterTrack ? afterTrack.id : 'null');
|
||||||
|
diag.afterSenderTrackEnabled = afterTrack ? afterTrack.enabled : undefined;
|
||||||
|
diag.afterSenderTrackReadyState = afterTrack ? afterTrack.readyState : undefined;
|
||||||
|
diag.originalTrackState = ttsTrack.readyState;
|
||||||
|
if (afterTrack && !afterTrack.enabled) {
|
||||||
|
afterTrack.enabled = true;
|
||||||
|
diag.forcedEnabled = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (err: any) {
|
||||||
|
diag.error = String(err && err.message ? err.message : err);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return { replaced, pcs: pcs?.length || 0, reason: 'ok', diag };
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
@ -15,10 +15,11 @@ import { AudioCaptureProcedure } from './audioCaptureProcedure';
|
||||||
import { ChatProcedure, ChatMessageEntry } from './chatProcedure';
|
import { ChatProcedure, ChatMessageEntry } from './chatProcedure';
|
||||||
import { AuthProcedure, MfaChallenge } from './authProcedure';
|
import { AuthProcedure, MfaChallenge } from './authProcedure';
|
||||||
import { TeamsActionsService } from './teamsActionsService';
|
import { TeamsActionsService } from './teamsActionsService';
|
||||||
|
import { BackgroundProcedure } from './backgroundProcedure';
|
||||||
import { isValidMeetingUrl, getMeetingLaunchUrl, resolveLaunchUrl } from './meetingUrlParser';
|
import { isValidMeetingUrl, getMeetingLaunchUrl, resolveLaunchUrl } from './meetingUrlParser';
|
||||||
|
|
||||||
// Camera / fake video injection is disabled for now to focus on stability.
|
// Optional: canvas "avatar" video (config.botUseCanvasVideo) replaces the Chromium
|
||||||
// The Y4M fake video file was causing browser crashes when audio started flowing.
|
// fake test pattern when the camera is on. Y4M file injection remains disabled.
|
||||||
|
|
||||||
export interface OrchestratorCallbacks {
|
export interface OrchestratorCallbacks {
|
||||||
onStateChange: (state: BotState, message?: string) => void;
|
onStateChange: (state: BotState, message?: string) => void;
|
||||||
|
|
@ -76,6 +77,11 @@ export class BotOrchestrator {
|
||||||
private _chatQueueProcessing: boolean = false;
|
private _chatQueueProcessing: boolean = false;
|
||||||
private _mfaResolver: ((response: { action: string; code?: string }) => void) | null = null;
|
private _mfaResolver: ((response: { action: string; code?: string }) => void) | null = null;
|
||||||
|
|
||||||
|
/** Debounce Teams iframe navigations (media runs in a child frame) */
|
||||||
|
private _frameNavMediaRebindTimer: ReturnType<typeof setTimeout> | null = null;
|
||||||
|
/** Re-apply gUM + video senders for a few seconds after join */
|
||||||
|
private _canvasRebindTimer: ReturnType<typeof setInterval> | null = null;
|
||||||
|
|
||||||
constructor(
|
constructor(
|
||||||
sessionId: string,
|
sessionId: string,
|
||||||
meetingUrl: string,
|
meetingUrl: string,
|
||||||
|
|
@ -205,6 +211,11 @@ export class BotOrchestrator {
|
||||||
|
|
||||||
// Ensure microphone is ON (required for voice playback)
|
// Ensure microphone is ON (required for voice playback)
|
||||||
await this._ensureMicOn();
|
await this._ensureMicOn();
|
||||||
|
if (config.botUseCanvasVideo) {
|
||||||
|
await this._ensureCameraOn();
|
||||||
|
const bg = new BackgroundProcedure(this._page!, this._logger);
|
||||||
|
void bg.trySelectNoVirtualBackground();
|
||||||
|
}
|
||||||
|
|
||||||
// STEP 2: Enter bot name and click "Join now"
|
// STEP 2: Enter bot name and click "Join now"
|
||||||
await this._takeScreenshot('anon-step2-before-join', this._isDebugMode);
|
await this._takeScreenshot('anon-step2-before-join', this._isDebugMode);
|
||||||
|
|
@ -234,6 +245,10 @@ export class BotOrchestrator {
|
||||||
|
|
||||||
// Initialize audio playback
|
// Initialize audio playback
|
||||||
await this._audioProcedure!.initialize();
|
await this._audioProcedure!.initialize();
|
||||||
|
if (config.botUseCanvasVideo) {
|
||||||
|
await this._ensureCameraOnInMeeting();
|
||||||
|
this._startCanvasRebindAfterJoin();
|
||||||
|
}
|
||||||
|
|
||||||
// Enable transcript capture (captions or audio based on transferMode)
|
// Enable transcript capture (captions or audio based on transferMode)
|
||||||
await this._enableTranscriptCapture();
|
await this._enableTranscriptCapture();
|
||||||
|
|
@ -414,6 +429,11 @@ export class BotOrchestrator {
|
||||||
|
|
||||||
// Ensure microphone is ON before joining (required for voice playback)
|
// Ensure microphone is ON before joining (required for voice playback)
|
||||||
await this._ensureMicOn();
|
await this._ensureMicOn();
|
||||||
|
if (config.botUseCanvasVideo) {
|
||||||
|
await this._ensureCameraOn();
|
||||||
|
const bg = new BackgroundProcedure(this._page!, this._logger);
|
||||||
|
void bg.trySelectNoVirtualBackground();
|
||||||
|
}
|
||||||
|
|
||||||
// STEP 5: Poll for "Join now" on the pre-join screen
|
// STEP 5: Poll for "Join now" on the pre-join screen
|
||||||
await this._takeScreenshot('step5-before-join-now', this._isDebugMode);
|
await this._takeScreenshot('step5-before-join-now', this._isDebugMode);
|
||||||
|
|
@ -436,11 +456,37 @@ export class BotOrchestrator {
|
||||||
|
|
||||||
this._startKeepAlive();
|
this._startKeepAlive();
|
||||||
await this._audioProcedure!.initialize();
|
await this._audioProcedure!.initialize();
|
||||||
|
if (config.botUseCanvasVideo) {
|
||||||
|
await this._ensureCameraOnInMeeting();
|
||||||
|
this._startCanvasRebindAfterJoin();
|
||||||
|
}
|
||||||
await this._enableTranscriptCapture();
|
await this._enableTranscriptCapture();
|
||||||
await this._enableChat();
|
await this._enableChat();
|
||||||
await this._sendJoinGreeting();
|
await this._sendJoinGreeting();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private _startCanvasRebindAfterJoin(): void {
|
||||||
|
this._stopCanvasRebindAfterJoin();
|
||||||
|
if (!config.botUseCanvasVideo || !this._audioProcedure) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
let n = 0;
|
||||||
|
this._canvasRebindTimer = setInterval(() => {
|
||||||
|
n += 1;
|
||||||
|
void this._audioProcedure?.reinstallMediaPatchInAllFrames();
|
||||||
|
if (n >= 35) {
|
||||||
|
this._stopCanvasRebindAfterJoin();
|
||||||
|
}
|
||||||
|
}, 400);
|
||||||
|
}
|
||||||
|
|
||||||
|
private _stopCanvasRebindAfterJoin(): void {
|
||||||
|
if (this._canvasRebindTimer) {
|
||||||
|
clearInterval(this._canvasRebindTimer);
|
||||||
|
this._canvasRebindTimer = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Ensure the camera is turned on in the pre-join screen.
|
* Ensure the camera is turned on in the pre-join screen.
|
||||||
* When camera is on, Teams shows the profile/background image.
|
* When camera is on, Teams shows the profile/background image.
|
||||||
|
|
@ -888,6 +934,12 @@ export class BotOrchestrator {
|
||||||
this._isShuttingDown = true;
|
this._isShuttingDown = true;
|
||||||
this._logger.info('Stopping bot...');
|
this._logger.info('Stopping bot...');
|
||||||
|
|
||||||
|
if (this._frameNavMediaRebindTimer) {
|
||||||
|
clearTimeout(this._frameNavMediaRebindTimer);
|
||||||
|
this._frameNavMediaRebindTimer = null;
|
||||||
|
}
|
||||||
|
this._stopCanvasRebindAfterJoin();
|
||||||
|
|
||||||
// Stop keepalive first
|
// Stop keepalive first
|
||||||
this._stopKeepAlive();
|
this._stopKeepAlive();
|
||||||
|
|
||||||
|
|
@ -1077,7 +1129,10 @@ export class BotOrchestrator {
|
||||||
},
|
},
|
||||||
this._options.language
|
this._options.language
|
||||||
);
|
);
|
||||||
this._audioProcedure = new AudioProcedure(this._page, this._logger);
|
this._audioProcedure = new AudioProcedure(this._page, this._logger, {
|
||||||
|
useCanvasVideo: config.botUseCanvasVideo,
|
||||||
|
displayLabel: this._botName,
|
||||||
|
});
|
||||||
this._teamsActions = new TeamsActionsService(this._page, this._logger);
|
this._teamsActions = new TeamsActionsService(this._page, this._logger);
|
||||||
this._chatProcedure = new ChatProcedure(
|
this._chatProcedure = new ChatProcedure(
|
||||||
this._page,
|
this._page,
|
||||||
|
|
@ -1100,6 +1155,19 @@ export class BotOrchestrator {
|
||||||
// Aggressive hybrid mode: always capture meeting audio as transcript source.
|
// Aggressive hybrid mode: always capture meeting audio as transcript source.
|
||||||
await this._audioCaptureProcedure!.injectCaptureOverride();
|
await this._audioCaptureProcedure!.injectCaptureOverride();
|
||||||
|
|
||||||
|
this._page.on('framenavigated', () => {
|
||||||
|
if (!config.botUseCanvasVideo || !this._audioProcedure) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (this._frameNavMediaRebindTimer) {
|
||||||
|
clearTimeout(this._frameNavMediaRebindTimer);
|
||||||
|
}
|
||||||
|
this._frameNavMediaRebindTimer = setTimeout(() => {
|
||||||
|
this._frameNavMediaRebindTimer = null;
|
||||||
|
void this._audioProcedure?.reinstallMediaPatchInAllFrames();
|
||||||
|
}, 600);
|
||||||
|
});
|
||||||
|
|
||||||
// Handle page errors
|
// Handle page errors
|
||||||
this._page.on('pageerror', (error) => {
|
this._page.on('pageerror', (error) => {
|
||||||
this._logger.error('Page error:', error);
|
this._logger.error('Page error:', error);
|
||||||
|
|
@ -1134,6 +1202,7 @@ export class BotOrchestrator {
|
||||||
* Close the browser.
|
* Close the browser.
|
||||||
*/
|
*/
|
||||||
private async _closeBrowser(): Promise<void> {
|
private async _closeBrowser(): Promise<void> {
|
||||||
|
this._stopCanvasRebindAfterJoin();
|
||||||
try {
|
try {
|
||||||
if (this._page) {
|
if (this._page) {
|
||||||
await this._page.close();
|
await this._page.close();
|
||||||
|
|
@ -1282,43 +1351,31 @@ export class BotOrchestrator {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Send a greeting message in the meeting chat AND via voice after joining.
|
* Signal "bot has joined the meeting" to the Gateway. The Gateway owns
|
||||||
* Uses the bot's display name and the configured language.
|
* greeting generation: it produces a localised greeting via the AI
|
||||||
* Voice greeting confirms that the audio pipeline (TTS -> mic) is working.
|
* service in the configured language + persona, then dispatches it back
|
||||||
|
* to this bot via the regular `sendChatMessage` command (chat) and the
|
||||||
|
* `playAudio` pipeline (voice). NO hardcoded greeting strings or
|
||||||
|
* language branches live in the bot — the bot is purely a transport.
|
||||||
|
*
|
||||||
|
* We still wait briefly so the chat panel + input have settled in the
|
||||||
|
* Teams DOM before the Gateway-driven `sendChatMessage` arrives.
|
||||||
*/
|
*/
|
||||||
private async _sendJoinGreeting(): Promise<void> {
|
private async _sendJoinGreeting(): Promise<void> {
|
||||||
try {
|
try {
|
||||||
const firstName = this._botName.split(' ')[0] || this._botName;
|
this._logger.info('Requesting join greeting from Gateway');
|
||||||
const lang = (this._options.language || 'de-DE').toLowerCase();
|
|
||||||
|
|
||||||
let greeting: string;
|
|
||||||
if (lang.startsWith('de')) {
|
|
||||||
greeting = `Hallo, hier ist ${firstName}. Ich bin bereit.`;
|
|
||||||
} else if (lang.startsWith('fr')) {
|
|
||||||
greeting = `Bonjour, c'est ${firstName}. Je suis prête.`;
|
|
||||||
} else if (lang.startsWith('it')) {
|
|
||||||
greeting = `Ciao, sono ${firstName}. Sono pronta.`;
|
|
||||||
} else {
|
|
||||||
greeting = `Hello, this is ${firstName}. I'm ready.`;
|
|
||||||
}
|
|
||||||
|
|
||||||
this._logger.info(`Sending join greeting (chat + voice): ${greeting}`);
|
|
||||||
|
|
||||||
// Brief delay so chat input is ready after panel open (Teams DOM can lag)
|
|
||||||
await new Promise((r) => setTimeout(r, 800));
|
await new Promise((r) => setTimeout(r, 800));
|
||||||
|
|
||||||
// Chat greeting (queued; retries if input not found)
|
|
||||||
await this.sendChatMessageToMeeting(greeting);
|
|
||||||
|
|
||||||
// Voice greeting — ask Gateway to generate TTS and send back playAudio
|
|
||||||
this._sendToGateway({
|
this._sendToGateway({
|
||||||
type: 'voiceGreeting',
|
type: 'requestGreeting',
|
||||||
sessionId: this._sessionId,
|
sessionId: this._sessionId,
|
||||||
text: greeting,
|
// Hint the Gateway about display name + language; Gateway already
|
||||||
language: this._options.language || 'de-DE',
|
// has the canonical config but passing them here keeps the contract
|
||||||
|
// self-contained and avoids a DB lookup just for greeting text.
|
||||||
|
botName: this._botName,
|
||||||
|
language: this._options.language || '',
|
||||||
});
|
});
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
this._logger.warn('Could not send join greeting:', error);
|
this._logger.warn('Could not request join greeting:', error);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1505,15 +1562,21 @@ export class BotOrchestrator {
|
||||||
fs.writeFileSync(filepath, buffer);
|
fs.writeFileSync(filepath, buffer);
|
||||||
this._logger.info(`Screenshot saved: ${filepath}`);
|
this._logger.info(`Screenshot saved: ${filepath}`);
|
||||||
|
|
||||||
// Also log as base64 for Azure logs (truncated for readability)
|
// Optional: also stream the PNG as base64 chunks into the log. Nobody
|
||||||
|
// parses these chunks back into images — they exist purely so that
|
||||||
|
// cloud deployments without disk access (e.g. Azure App Service) can
|
||||||
|
// recover screenshots from log search. The UI loads screenshots via
|
||||||
|
// the REST proxy, NOT from these log lines, so we keep this OFF by
|
||||||
|
// default to avoid spamming the bot log with ~200 KB blobs per shot.
|
||||||
|
if (config.screenshotLogBase64) {
|
||||||
const base64 = buffer.toString('base64');
|
const base64 = buffer.toString('base64');
|
||||||
this._logger.info(`SCREENSHOT_BASE64_START:${name}`);
|
this._logger.info(`SCREENSHOT_BASE64_START:${name}`);
|
||||||
// Log in chunks to avoid log line limits
|
|
||||||
const chunkSize = 50000;
|
const chunkSize = 50000;
|
||||||
for (let i = 0; i < base64.length; i += chunkSize) {
|
for (let i = 0; i < base64.length; i += chunkSize) {
|
||||||
this._logger.info(`SCREENSHOT_CHUNK:${base64.substring(i, i + chunkSize)}`);
|
this._logger.info(`SCREENSHOT_CHUNK:${base64.substring(i, i + chunkSize)}`);
|
||||||
}
|
}
|
||||||
this._logger.info(`SCREENSHOT_BASE64_END:${name}`);
|
this._logger.info(`SCREENSHOT_BASE64_END:${name}`);
|
||||||
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
this._logger.error('Error taking screenshot:', error);
|
this._logger.error('Error taking screenshot:', error);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -14,6 +14,11 @@ export const config = {
|
||||||
// Bot
|
// Bot
|
||||||
botName: process.env.BOT_NAME || 'PowerOn AI',
|
botName: process.env.BOT_NAME || 'PowerOn AI',
|
||||||
botHeadless: process.env.BOT_HEADLESS !== 'false',
|
botHeadless: process.env.BOT_HEADLESS !== 'false',
|
||||||
|
/**
|
||||||
|
* Replace Chromium's fake test-pattern video with a canvas stream (gradient + label).
|
||||||
|
* Unset in production with BOT_USE_CANVAS_VIDEO=false if you need camera off / profile tile only.
|
||||||
|
*/
|
||||||
|
botUseCanvasVideo: process.env.BOT_USE_CANVAS_VIDEO !== 'false',
|
||||||
|
|
||||||
// Logging
|
// Logging
|
||||||
logLevel: process.env.LOG_LEVEL || 'info',
|
logLevel: process.env.LOG_LEVEL || 'info',
|
||||||
|
|
@ -22,6 +27,12 @@ export const config = {
|
||||||
// Screenshots
|
// Screenshots
|
||||||
screenshotDir: process.env.SCREENSHOT_DIR || './output/screenshots',
|
screenshotDir: process.env.SCREENSHOT_DIR || './output/screenshots',
|
||||||
screenshotOnError: process.env.SCREENSHOT_ON_ERROR === 'true',
|
screenshotOnError: process.env.SCREENSHOT_ON_ERROR === 'true',
|
||||||
|
// Stream screenshot bytes as base64 chunks into the bot log. Only useful in
|
||||||
|
// cloud deployments (e.g. Azure App Service) where the screenshot files on
|
||||||
|
// disk are not reachable. Locally the UI loads them via the REST proxy
|
||||||
|
// (/api/teamsbot/{instanceId}/screenshots/{file}), so this just bloats the
|
||||||
|
// log. Default OFF.
|
||||||
|
screenshotLogBase64: process.env.SCREENSHOT_LOG_BASE64 === 'true',
|
||||||
|
|
||||||
// Timeouts (in milliseconds)
|
// Timeouts (in milliseconds)
|
||||||
timeouts: {
|
timeouts: {
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue