From 2293ba95522c222b16637ddaf3a0df4f7f988e3c Mon Sep 17 00:00:00 2001
From: ValueOn AG
Date: Sat, 25 Apr 2026 01:13:30 +0200
Subject: [PATCH] fixes
---
package-lock.json | 1 +
src/bot/audioCaptureProcedure.ts | 4 +-
src/bot/audioProcedure.ts | 219 ++++----
src/bot/backgroundProcedure.ts | 63 +++
src/bot/chatProcedure.ts | 862 ++++++++++++++++++++++++++----
src/bot/mediaGetUserMediaPatch.ts | 385 +++++++++++++
src/bot/orchestrator.ts | 143 +++--
src/config.ts | 11 +
8 files changed, 1447 insertions(+), 241 deletions(-)
create mode 100644 src/bot/mediaGetUserMediaPatch.ts
diff --git a/package-lock.json b/package-lock.json
index 27280e6..f1052e2 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -7,6 +7,7 @@
"": {
"name": "service-teams-browser-bot",
"version": "1.0.0",
+ "hasInstallScript": true,
"dependencies": {
"dotenv": "^16.4.1",
"express": "^4.18.2",
diff --git a/src/bot/audioCaptureProcedure.ts b/src/bot/audioCaptureProcedure.ts
index 3e9a670..c140fc9 100644
--- a/src/bot/audioCaptureProcedure.ts
+++ b/src/bot/audioCaptureProcedure.ts
@@ -154,9 +154,9 @@ export class AudioCaptureProcedure {
async injectCaptureOverride(): Promise {
if (this._injected) return;
- this._logger.info('[AudioCapture] Injecting RTCPeerConnection wrapper...');
+ this._logger.info('[AudioCapture] Injecting RTCPeerConnection wrapper (all frames)...');
- await this._page.addInitScript((workletCode: string) => {
+ await this._page.context().addInitScript((workletCode: string) => {
(window as any).__audioCaptureChunks = [] as any[];
(window as any).__audioCaptureProcessors = {} as Record;
(window as any).__audioCaptureContexts = {} as Record;
diff --git a/src/bot/audioProcedure.ts b/src/bot/audioProcedure.ts
index b301482..73bb827 100644
--- a/src/bot/audioProcedure.ts
+++ b/src/bot/audioProcedure.ts
@@ -1,5 +1,6 @@
import { Page } from 'playwright';
import { Logger } from 'winston';
+import { poweronMediaPatchInstall } from './mediaGetUserMediaPatch';
/**
* Handles audio playback in the Teams meeting.
@@ -11,139 +12,113 @@ import { Logger } from 'winston';
* - When Teams calls getUserMedia, the wrapper:
* 1. Calls the REAL getUserMedia (which returns Chromium's fake device stream)
* 2. Replaces the audio track with one from our MediaStreamDestination
- * 3. Returns the modified stream (our audio + Chromium's fake video)
+ * 3. Returns the modified stream; optional canvas video track instead of fake video
* - When TTS audio is played, it's piped into the MediaStreamDestination,
* and Teams sends it via WebRTC to other meeting participants.
*/
+export type AudioProcedureOptions = {
+ useCanvasVideo?: boolean;
+ /** Shown in the center of the canvas (e.g. bot display name) */
+ displayLabel?: string;
+};
+
export class AudioProcedure {
private _page: Page;
private _logger: Logger;
+ private _useCanvasVideo: boolean;
+ private _displayLabel: string;
private _audioContext: boolean = false;
private _initScriptInjected: boolean = false;
private _audioQueue: Array<{ audioData: string; format: 'mp3' | 'wav' | 'pcm' }> = [];
private _isPlaying: boolean = false;
private _stopRequested: boolean = false;
- constructor(page: Page, logger: Logger) {
+ constructor(page: Page, logger: Logger, options?: AudioProcedureOptions) {
this._page = page;
this._logger = logger;
+ this._useCanvasVideo = !!options?.useCanvasVideo;
+ this._displayLabel = (options?.displayLabel || 'Bot').trim() || 'Bot';
}
/**
* Inject the getUserMedia wrapper BEFORE any page navigation.
* This MUST be called before navigating to Teams.
- * Uses page.addInitScript so it runs in every new document context.
+ * Uses browserContext.addInitScript so the hook runs in the main page and
+ * in embedded frames (Teams often runs media/WebRTC in an iframe; page-only
+ * injection would miss getUserMedia and you would only see the fake device).
*/
async injectAudioOverride(): Promise {
if (this._initScriptInjected) {
return;
}
- this._logger.info('Injecting audio getUserMedia override...');
+ this._logger.info(
+ `Injecting audio getUserMedia override (canvasVideo=${this._useCanvasVideo}, label="${this._displayLabel}")...`,
+ );
- await this._page.addInitScript(() => {
- // Create a shared AudioContext and MediaStreamDestination for TTS injection
- const AudioContextClass = window.AudioContext || (window as any).webkitAudioContext;
- const ctx = new AudioContextClass();
- const streamDest = ctx.createMediaStreamDestination();
-
- // Store globally for later TTS injection
- (window as any).__ttsAudioContext = ctx;
- (window as any).__ttsStreamDest = streamDest;
- (window as any).__ttsAudioStream = streamDest.stream;
-
- // Wrap getUserMedia to replace audio tracks with our TTS-injectable stream
- const originalGetUserMedia = navigator.mediaDevices.getUserMedia.bind(navigator.mediaDevices);
- navigator.mediaDevices.getUserMedia = async (constraints?: MediaStreamConstraints) => {
- // Get the real stream (from Chromium's fake devices)
- const realStream = await originalGetUserMedia(constraints);
-
- if (constraints && constraints.audio) {
- // Build a new stream: our TTS audio track + their video tracks
- const combinedStream = new MediaStream();
-
- // Clone the TTS track so Teams can't kill the original via track.stop()
- streamDest.stream.getAudioTracks().forEach(t => combinedStream.addTrack(t.clone()));
-
- // Keep the real video tracks (from fake camera)
- realStream.getVideoTracks().forEach(t => combinedStream.addTrack(t));
-
- // Diagnostic signal for production logs: confirms override really feeds Teams.
- try {
- const audioTracks = combinedStream.getAudioTracks();
- const videoTracks = combinedStream.getVideoTracks();
- console.log(
- `[AudioPlayback] getUserMedia override active: audioTracks=${audioTracks.length}, videoTracks=${videoTracks.length}, audioLabel="${audioTracks[0]?.label || 'n/a'}"`,
- );
- } catch {
- // ignore
- }
-
- return combinedStream;
- }
-
- // No audio requested - return the real stream as-is
- return realStream;
- };
-
- // Force all RTCPeerConnection audio senders to use our TTS track.
- // This ensures Teams actually sends our audio even if getUserMedia
- // override happened in a different context or was renegotiated.
- (window as any).__forceTtsTrackToSenders = async () => {
- const pcs = ((window as any).__audioCapturePeerConnections || []) as RTCPeerConnection[];
- const ttsTrack = streamDest.stream.getAudioTracks()?.[0];
- if (!ttsTrack) return { replaced: 0, pcs: pcs?.length || 0, reason: 'no-tts-track' };
-
- // #region agent log
- const diag: Record = {
- ttsTrackId: ttsTrack.id,
- ttsTrackEnabled: ttsTrack.enabled,
- ttsTrackReadyState: ttsTrack.readyState,
- ttsTrackMuted: ttsTrack.muted,
- beforeSenderTrackIds: [] as string[],
- afterSenderTrackIds: [] as string[],
- };
- // #endregion
-
- let replaced = 0;
- for (const pc of pcs) {
- try {
- const senders = pc.getSenders?.() || [];
- for (const sender of senders) {
- if (sender?.track?.kind === 'audio') {
- // #region agent log
- diag.beforeSenderTrackIds.push(sender.track.id);
- // #endregion
- const freshClone = ttsTrack.clone();
- await sender.replaceTrack(freshClone);
- replaced++;
- // #region agent log
- const afterTrack = sender.track;
- diag.afterSenderTrackIds.push(afterTrack?.id || 'null');
- diag.afterSenderTrackEnabled = afterTrack?.enabled;
- diag.afterSenderTrackReadyState = afterTrack?.readyState;
- diag.originalTrackState = ttsTrack.readyState;
- if (afterTrack && !afterTrack.enabled) {
- afterTrack.enabled = true;
- diag.forcedEnabled = true;
- }
- // #endregion
- }
- }
- } catch (err: any) {
- // #region agent log
- diag.error = String(err?.message || err);
- // #endregion
- }
- }
- return { replaced, pcs: pcs?.length || 0, reason: 'ok', diag };
- };
+ await this._page.context().addInitScript(poweronMediaPatchInstall, {
+ useCanvasVideo: this._useCanvasVideo,
+ displayLabel: this._displayLabel,
});
this._initScriptInjected = true;
this._logger.info('Audio getUserMedia override injected');
}
+ /**
+ * Re-run the media patch in every frame. Needed when Teams replaces the document
+ * in an iframe (addInitScript runs too early) or overwrites getUserMedia.
+ */
+ async reinstallMediaPatchInAllFrames(): Promise {
+ const payload = { useCanvasVideo: this._useCanvasVideo, displayLabel: this._displayLabel };
+ for (const frame of this._page.frames()) {
+ try {
+ await frame.evaluate(poweronMediaPatchInstall, payload);
+ } catch (e) {
+ this._logger.info(`[mediaPatch] frame skipped: ${e}`);
+ }
+ }
+ await this._forceCanvasVideoInAllFrames('reinstall');
+ }
+
+ /**
+ * Replace outbound video in every frame. Teams may run WebRTC in a subframe;
+ * only touching the main window leaves Chromium's default fake (green) video.
+ */
+ private async _forceCanvasVideoInAllFrames(phase: string): Promise {
+ if (!this._useCanvasVideo) {
+ return;
+ }
+ const parts: string[] = [];
+ for (const frame of this._page.frames()) {
+ try {
+ const r = await frame.evaluate(async () => {
+ const w = window as any;
+ w.__startBotAvatarStream?.();
+ return w.__forceVideoTrackToSenders?.();
+ });
+ const shortUrl = (() => {
+ try {
+ return frame.url().substring(0, 100);
+ } catch {
+ return '(no-url)';
+ }
+ })();
+ const rr: any = r || {};
+ parts.push(
+ `[${shortUrl}] r=${rr.replaced ?? 0} add=${rr.added ?? 0} pcs=${rr.pcs ?? 0} `
+ + `tx=${rr.totalTransceivers ?? 0} vidTx=${rr.videoTransceivers ?? 0} `
+ + `vidWith=${rr.videoSendersWithTrack ?? 0} vidNoTrack=${rr.videoSendersWithoutTrack ?? 0} `
+ + `dirB=[${(rr.directionsBefore || []).join(',')}] dirA=[${(rr.directionsAfter || []).join(',')}] `
+ + `${rr.reason || ''}`.trim(),
+ );
+ } catch (e: any) {
+ parts.push(`err=${String(e?.message || e).slice(0, 64)}`);
+ }
+ }
+ this._logger.info(`Canvas video ${phase}: ${parts.join(' | ')}`);
+ }
+
/**
* Initialize the audio context in the browser for TTS playback.
* Must be called after joining the meeting (user gesture context).
@@ -175,6 +150,10 @@ export class AudioProcedure {
}
});
+ if (this._useCanvasVideo) {
+ await this._forceCanvasVideoInAllFrames('init');
+ }
+
this._audioContext = true;
this._logger.info('Audio context initialized');
}
@@ -279,6 +258,10 @@ export class AudioProcedure {
);
// #endregion
+ if (this._useCanvasVideo) {
+ await this._forceCanvasVideoInAllFrames('tts');
+ }
+
// Collect WebRTC stats BEFORE playback
// #region agent log
const statsBefore = await this._page.evaluate(async () => {
@@ -405,12 +388,36 @@ export class AudioProcedure {
*/
async cleanup(): Promise {
try {
- await this._page.evaluate(() => {
- const ctx = (window as any).__ttsAudioContext as AudioContext;
- if (ctx) {
- ctx.close();
+ for (const frame of this._page.frames()) {
+ try {
+ await frame.evaluate(() => {
+ const w = window as any;
+ if (w.__botAvatarDrawInterval) {
+ clearInterval(w.__botAvatarDrawInterval);
+ w.__botAvatarDrawInterval = null;
+ }
+ if (w.__botAvatarVideoTrack) {
+ try {
+ w.__botAvatarVideoTrack.stop();
+ } catch {
+ // ignore
+ }
+ w.__botAvatarVideoTrack = null;
+ }
+ if (w.__botAvatarCanvas && w.__botAvatarCanvas.remove) {
+ w.__botAvatarCanvas.remove();
+ w.__botAvatarCanvas = null;
+ }
+ w.__botAvatarStreamStarted = false;
+ const actx = w.__ttsAudioContext as AudioContext;
+ if (actx) {
+ actx.close();
+ }
+ });
+ } catch {
+ // cross-origin or closed frame
}
- });
+ }
} catch {
// Page might be closed
}
diff --git a/src/bot/backgroundProcedure.ts b/src/bot/backgroundProcedure.ts
index c100dc9..f3d94a1 100644
--- a/src/bot/backgroundProcedure.ts
+++ b/src/bot/backgroundProcedure.ts
@@ -19,6 +19,69 @@ export class BackgroundProcedure {
this._logger = logger;
}
+ /**
+ * Open background effects and select "no" virtual background (camera only).
+ * Teams can show a flat green/gray placeholder when a background effect is
+ * on even when the feed is a fake or canvas source.
+ */
+ async trySelectNoVirtualBackground(): Promise {
+ try {
+ const opened = await this._openBackgroundEffectsPanel();
+ if (!opened) {
+ return false;
+ }
+ await this._page.waitForTimeout(500);
+
+ const noEffectSelectors: string[] = [
+ 'button[aria-label*="None" i]',
+ 'button[aria-label*="Kein" i]',
+ 'button[aria-label*="ohne" i]',
+ 'button[aria-label*="off" i][aria-label*="background" i]',
+ 'button[aria-label*="Hintergrund entfernen" i]',
+ '[data-tid="background-item-none"]',
+ 'button[role="tab"][name="None" i]',
+ ];
+ for (const sel of noEffectSelectors) {
+ const btn = await this._page.$(sel);
+ if (btn) {
+ await btn.click();
+ this._logger.info(`Selected no background effect: ${sel}`);
+ await this._page.waitForTimeout(500);
+ await this._dismissPanelIfOpen();
+ return true;
+ }
+ }
+
+ // First gallery tile (often "none" or blur off) in many Teams builds
+ const tile = await this._page.$(
+ '[data-tid="background-image"], [class*="background-item"], li[role="listitem"] button',
+ );
+ if (tile) {
+ await tile.click();
+ this._logger.info('Clicked first background effects tile (often no effect)');
+ await this._page.waitForTimeout(400);
+ await this._dismissPanelIfOpen();
+ return true;
+ }
+
+ await this._dismissPanelIfOpen();
+ this._logger.warn('Could not find "no background" control');
+ return false;
+ } catch (e) {
+ this._logger.warn(`trySelectNoVirtualBackground: ${e}`);
+ return false;
+ }
+ }
+
+ private async _dismissPanelIfOpen(): Promise {
+ try {
+ await this._page.keyboard.press('Escape');
+ await this._page.waitForTimeout(200);
+ } catch {
+ // ignore
+ }
+ }
+
/**
* Set a virtual background from a URL on the Teams pre-join screen.
*
diff --git a/src/bot/chatProcedure.ts b/src/bot/chatProcedure.ts
index 7368964..d5d92a8 100644
--- a/src/bot/chatProcedure.ts
+++ b/src/bot/chatProcedure.ts
@@ -47,9 +47,16 @@ export class ChatProcedure {
/**
* Open the chat panel and start monitoring messages.
+ *
+ * Diagnostics are dumped UPFRONT (not just on failure) so even when the
+ * very first toggle attempt succeeds we still have a record of which
+ * chat-related controls Teams shipped on this build — important because
+ * the anonymous / compact in-meeting layout uses different ids than the
+ * authenticated Teams meeting layout.
*/
async enableChatMonitoring(): Promise {
this._logger.info('Enabling chat monitoring...');
+ await this._dumpChatButtonDiagnostics();
await this._openChatPanel();
await this._page.waitForTimeout(2000);
@@ -66,24 +73,48 @@ export class ChatProcedure {
/**
* Check if the chat panel is currently visible by probing for known
* UI elements (chat input, message list, or aria-pressed toggle).
+ *
+ * IMPORTANT — what we DO NOT accept as proof of an open chat panel:
+ * * a generic ``[data-tid="ckeditor"]`` / ``[role=textbox]`` somewhere in
+ * the page. In the anonymous / compact in-meeting layout Teams renders
+ * a separate compose box (e.g. for reactions / inline comments) whose
+ * parent has NO data-tid at all and which does NOT post into the
+ * meeting chat. If we treated that as "panel open" the bot would skip
+ * the toggle and silently lose every chat send.
+ * * a "Besprechungschat" / "Meeting chat" heading. Teams keeps the side-
+ * pane heading mounted even when the pane is ``vdi-occlusion`` / h=0.
+ *
+ * What we DO accept:
+ * * ``#chat-button[aria-pressed="true"]`` — explicit toggle state.
+ * * a known **chat-side-panel-scoped** input (selectors anchored under
+ * ``chat-pane-compose-message-footer`` / ``message-pane-footer``).
+ * * a chat **message list container** with non-trivial height.
*/
private async _isChatPanelOpen(): Promise {
return this._page.evaluate(() => {
- // 1. Chat button aria-pressed state (most reliable when available)
+ // 1. Chat button aria-pressed state. This is the ONLY safe short-
+ // circuit — Teams keeps the panel "open" semantically even when its
+ // layout pane is briefly collapsed (h=0, vdi-occlusion). If we were
+ // stricter here the periodic scan / send path would re-trigger
+ // _openChatPanel which CLICKS the button — and a click on an already-
+ // pressed button TOGGLES the panel CLOSED. So aria-pressed=true must
+ // short-circuit to true.
const chatBtn = document.querySelector('#chat-button, button[id="chat-button"]') as HTMLElement | null;
if (chatBtn?.getAttribute('aria-pressed') === 'true') return true;
- // 2. Chat input / compose box visible (definitive proof the chat panel is open)
+ // 2. Chat input / compose box visible — but ONLY accept selectors
+ // that are scoped to the actual chat side-pane (footer ancestors).
+ // Generic [contenteditable] / [role=textbox] matches would also hit
+ // the compact in-meeting compose box used by anonymous / pre-join
+ // overlays, which is NOT the meeting chat.
const inputSelectors = [
'[data-tid="ckeditor-replyConversation"]',
+ '[data-tid="chat-pane-compose-message-footer"] div[contenteditable="true"]',
+ '[data-tid="chat-pane-compose-message-footer"] div[role="textbox"]',
+ '[data-tid="message-pane-footer"] div[contenteditable="true"]',
+ '[data-tid="message-pane-footer"] div[role="textbox"]',
'div[role="textbox"][data-tid*="chat"]',
'div[role="textbox"][data-tid*="message"]',
- 'div[role="textbox"][aria-label*="message" i]',
- 'div[role="textbox"][aria-label*="Nachricht" i]',
- '[contenteditable="true"][aria-label*="message" i]',
- '[contenteditable="true"][aria-label*="Nachricht" i]',
- 'div[aria-label="Type a message"]',
- 'div[aria-label*="Neue Nachricht" i]',
];
for (const sel of inputSelectors) {
const el = document.querySelector(sel) as HTMLElement | null;
@@ -102,21 +133,24 @@ export class ChatProcedure {
if (el && el.offsetHeight > 50) return true;
}
- // 4. "Meeting chat" / "Besprechungschat" heading visible
- const headings = document.querySelectorAll('h2, h3, [role="heading"]');
- for (const h of Array.from(headings)) {
- const txt = (h as HTMLElement).innerText?.toLowerCase() || '';
- if (txt.includes('meeting chat') || txt.includes('besprechungschat')) return true;
- }
-
return false;
});
}
/**
* Open the chat panel by clicking the chat button.
- * In authenticated Teams, the chat panel may already be open (meeting loads
- * from a chat thread). Clicking again would TOGGLE it closed.
+ *
+ * In authenticated Teams, the chat panel may already be open (meeting
+ * loads from a chat thread). Clicking again would TOGGLE it closed —
+ * that's why we always check ``_isChatPanelOpen()`` first.
+ *
+ * The selector list below covers BOTH layouts:
+ * * authenticated full Teams meeting → ``#chat-button`` etc.
+ * * anonymous / compact in-meeting toolbar (Teams Live / pre-join stage)
+ * where the toggle has no stable id and only carries
+ * ``data-tid``/``aria-label`` hints. We therefore include data-tid-
+ * based and broader role-based fallbacks so the bot does not silently
+ * fall back to typing into a non-chat compose box.
*/
private async _openChatPanel(): Promise {
if (await this._isChatPanelOpen()) {
@@ -126,11 +160,21 @@ export class ChatProcedure {
const chatButtonSelectors = [
'#chat-button',
+ 'button[id="chat-button"]',
+ 'button[data-tid="toggle-chat"]',
+ 'button[data-tid*="chat" i]',
+ 'button[data-tid*="conversation" i]',
'button[aria-label="Chat"]',
'button[aria-label*="Chat" i]',
'button[aria-label*="Unterhaltung" i]',
'button[aria-label*="Besprechungschat" i]',
'button[aria-label*="Meeting chat" i]',
+ 'button[title*="Chat" i]',
+ 'button[title*="Besprechungschat" i]',
+ // role-based fallbacks for the compact / anonymous toolbar
+ '[role="button"][aria-label*="Chat" i]',
+ '[role="button"][aria-label*="Besprechungschat" i]',
+ '[role="menuitem"][aria-label*="Chat" i]',
];
const maxAttempts = 12;
@@ -143,19 +187,20 @@ export class ChatProcedure {
}
let clicked = false;
+ const triedSelectors: string[] = [];
for (const selector of chatButtonSelectors) {
try {
const button = await this._page.$(selector);
- if (button) {
- const isVisible = await button.isVisible().catch(() => false);
- if (!isVisible) continue;
- await button.click();
- clicked = true;
- this._logger.info(`Clicked chat button: ${selector} (attempt ${attempt}/${maxAttempts})`);
- break;
- }
- } catch {
- // Continue to next selector
+ if (!button) continue;
+ const isVisible = await button.isVisible().catch(() => false);
+ triedSelectors.push(`${selector}=${isVisible ? 'visible' : 'hidden'}`);
+ if (!isVisible) continue;
+ await button.click();
+ clicked = true;
+ this._logger.info(`Clicked chat button: ${selector} (attempt ${attempt}/${maxAttempts})`);
+ break;
+ } catch (err) {
+ triedSelectors.push(`${selector}=err:${String(err).substring(0, 40)}`);
}
}
@@ -168,17 +213,63 @@ export class ChatProcedure {
this._logger.info('Chat button clicked but panel not detected yet, waiting before next attempt');
await this._page.waitForTimeout(pollIntervalMs);
} else {
+ // Log which selectors were tried — without this we can't tell whether
+ // the buttons are missing entirely or just hidden behind another layer.
+ this._logger.info(
+ `Chat button not found, retry ${attempt}/${maxAttempts}` +
+ (triedSelectors.length ? ` | tried: ${triedSelectors.join(', ')}` : ''),
+ );
+ // On the very first miss dump the full button diagnostics so the
+ // next code change has a real selector hint to work from instead
+ // of guessing. After that we throttle to avoid log spam.
+ if (attempt === 1) {
+ await this._dumpChatButtonDiagnostics();
+ }
if (attempt < maxAttempts) {
- this._logger.info(`Chat button not found, retry ${attempt}/${maxAttempts}`);
await this._page.waitForTimeout(pollIntervalMs);
}
}
}
this._logger.warn('Could not open chat panel after polling - chat will not work');
+ await this._dumpChatButtonDiagnostics();
return false;
}
+ /**
+ * One-shot diagnostic when _openChatPanel fails: list every button in the
+ * page whose id / data-tid / aria-label hints at "chat" so we can update
+ * chatButtonSelectors when Teams ships a UI change.
+ */
+ private async _dumpChatButtonDiagnostics(): Promise {
+ try {
+ const dump = await this._page.evaluate(() => {
+ const all = Array.from(document.querySelectorAll('button')) as HTMLButtonElement[];
+ const candidates = all.filter((b) => {
+ const id = (b.id || '').toLowerCase();
+ const tid = (b.getAttribute('data-tid') || '').toLowerCase();
+ const aria = (b.getAttribute('aria-label') || '').toLowerCase();
+ const title = (b.getAttribute('title') || '').toLowerCase();
+ return [id, tid, aria, title].some((v) =>
+ v.includes('chat') || v.includes('conversation') || v.includes('unterhalt') || v.includes('besprechung'),
+ );
+ });
+ return candidates.slice(0, 12).map((b) => ({
+ id: b.id || '',
+ tid: b.getAttribute('data-tid') || '',
+ aria: b.getAttribute('aria-label') || '',
+ title: b.getAttribute('title') || '',
+ pressed: b.getAttribute('aria-pressed') || '',
+ h: b.offsetHeight || 0,
+ visible: b.offsetParent !== null,
+ }));
+ });
+ this._logger.warn(`[ChatBtnDiag] candidates=${JSON.stringify(dump)}`);
+ } catch (e) {
+ this._logger.warn(`[ChatBtnDiag] failed: ${e}`);
+ }
+ }
+
/**
* Subscribe to chat messages using MutationObserver.
*/
@@ -253,11 +344,19 @@ export class ChatProcedure {
}
function _extractChatMessage(el: HTMLElement): boolean {
+ // Strategy 1: explicit data-tid / class-based message wrappers (covers
+ // legacy AND Fluent UI v9 chat bubbles). The class[*=...] form catches
+ // ``fui-ChatMessage``/``fui-ChatMyMessage`` etc. without depending on
+ // any specific data-tid (modern Teams often ships with empty tids).
const messageSelectors = [
'[data-tid="chat-message"]',
- '.fui-ChatMessage',
- '[data-tid*="message-body"]',
+ '[data-tid="chat-pane-message"]',
'[data-tid*="chat-pane-message"]',
+ '[data-tid*="message-list-item"]',
+ '[data-tid*="message-body"]',
+ '[class*="fui-ChatMessage"]',
+ '[class*="fui-ChatMyMessage"]',
+ '[role="listitem"]',
];
let messageEl: HTMLElement | null = null;
@@ -266,38 +365,60 @@ export class ChatProcedure {
if (messageEl) break;
}
- if (messageEl) {
+ const _findAuthor = (root: HTMLElement | null): string => {
+ if (!root) return 'Unknown';
const authorSelectors = [
'[data-tid="message-author"]',
'[data-tid="message-author-name"]',
- '.fui-ChatMessage__author',
'[data-tid*="author"]',
+ '[class*="ChatMessage__author"]',
+ '[class*="-author"]',
+ 'span[class*="fui-Persona__primaryText"]',
];
- let author = 'Unknown';
for (const sel of authorSelectors) {
- const authorEl = messageEl.querySelector(sel) || el.querySelector(sel);
- if (authorEl?.textContent) {
- author = authorEl.textContent.trim();
- break;
- }
+ const authorEl = root.querySelector(sel) || el.querySelector(sel);
+ const t = (authorEl as HTMLElement | null)?.textContent?.trim();
+ if (t) return t;
}
+ return 'Unknown';
+ };
+ const _findBody = (root: HTMLElement | null): string => {
+ if (!root) return '';
const bodySelectors = [
+ '[data-tid="message-body-content"]',
'[data-tid="message-body"]',
- '.fui-ChatMessage__body',
'[data-tid="chat-message-text"]',
'[data-tid*="message-body"]',
+ '[class*="ChatMessage__body"]',
+ '[class*="-body"] [class*="content"]',
+ '[class*="messageContent"]',
];
- let text = '';
for (const sel of bodySelectors) {
- const bodyEl = messageEl.querySelector(sel) || el.querySelector(sel);
- if (bodyEl) {
- text = (bodyEl as HTMLElement).innerText?.trim() || '';
- break;
+ const bodyEl = root.querySelector(sel) || el.querySelector(sel);
+ const t = (bodyEl as HTMLElement | null)?.innerText?.trim();
+ if (t) return t;
+ }
+ return '';
+ };
+
+ if (messageEl) {
+ const author = _findAuthor(messageEl);
+ let text = _findBody(messageEl);
+
+ // Last resort: take innerText minus the author name & metadata so we
+ // at least surface something when the body wrapper changes again.
+ if (!text) {
+ const full = (messageEl.innerText || '').trim();
+ if (full) {
+ text = full
+ .replace(author, '')
+ .replace(/\b(now|just now|gerade|jetzt)\b/gi, '')
+ .trim();
}
}
- if (text && text.length > 0) {
+ if (text && !_isNoise(text)) {
const teamsTs = _extractTeamsTimestamp(messageEl) || _extractTeamsTimestamp(el);
(window as any).__onChatMessageEvent({
speaker: author,
@@ -310,19 +431,14 @@ export class ChatProcedure {
}
}
- // Strategy 2: Structural fallback for authenticated Teams chat
- // Chat messages typically have: author element + body element as children
+ // Strategy 2: Structural fallback for authenticated Teams chat.
const fullText = el.innerText?.trim() || '';
if (!fullText || fullText.length < 2 || _isNoise(fullText)) return false;
-
- // Skip typing indicators, system messages
const tid = el.getAttribute('data-tid') || '';
if (tid === 'typing-indicator') return false;
- // Look for elements that look like user messages (have author-like + body-like children)
const children = Array.from(el.children) as HTMLElement[];
if (children.length >= 2) {
- // Find an element that looks like a name (short text, no data-tid with "body")
for (let i = 0; i < children.length - 1; i++) {
const candidateName = children[i].innerText?.trim() || '';
const candidateBody = children.slice(i + 1).map(c => c.innerText?.trim()).filter(Boolean).join(' ').trim();
@@ -331,11 +447,16 @@ export class ChatProcedure {
candidateName.length > 1 && candidateName.length < 60 &&
candidateBody.length > 1 &&
!_isNoise(candidateBody) &&
- !candidateName.includes('meeting') && !candidateName.includes('Meeting')
+ !candidateName.toLowerCase().includes('meeting')
) {
- // Check if this looks like a time-stamped message (not just any two children)
+ const childCls = (children[i].className?.toString?.() || '');
const hasTid = children[i].getAttribute('data-tid') || '';
- if (hasTid.includes('author') || hasTid.includes('name') || hasTid.includes('sender')) {
+ const looksLikeAuthor =
+ hasTid.includes('author') ||
+ hasTid.includes('name') ||
+ hasTid.includes('sender') ||
+ /author|persona|sender|name/i.test(childCls);
+ if (looksLikeAuthor) {
const teamsTs = _extractTeamsTimestamp(el);
(window as any).__onChatMessageEvent({
speaker: candidateName,
@@ -353,30 +474,33 @@ export class ChatProcedure {
return false;
}
- // Teams chat containers - try multiple selectors
+ // Teams chat containers — prefer the most chat-specific tids first.
+ // IMPORTANT: never use [role="log"] alone here — Teams reuses that role
+ // for the captions panel, which would silently steal the observer target
+ // and cause "candidates=0" forever.
const chatContainerSelectors = [
'[data-tid="message-pane-list"]',
- '[data-tid="chat-pane"]',
'[data-tid="chat-pane-list"]',
+ '[data-tid="chat-pane"]',
+ '[data-tid="message-pane-layout"]',
'.ts-message-list-container',
- '[role="log"]',
];
let chatContainer: Element | null = null;
let matchedSelector = '';
for (const sel of chatContainerSelectors) {
- chatContainer = document.querySelector(sel);
- if (chatContainer) {
+ const el = document.querySelector(sel) as HTMLElement | null;
+ if (el && (el.offsetHeight > 50 || sel.includes('message-pane-layout'))) {
+ chatContainer = el;
matchedSelector = sel;
break;
}
}
if (!chatContainer) {
- const candidates = document.querySelectorAll('[data-tid*="chat"], [data-tid*="message"]');
+ const candidates = document.querySelectorAll('[data-tid*="chat"], [data-tid*="message-pane"]');
for (const c of Array.from(candidates)) {
const cTid = c.getAttribute('data-tid') || '';
- // Prefer larger containers, not buttons or small elements
if ((c as HTMLElement).offsetHeight > 50 && c.tagName !== 'BUTTON') {
chatContainer = c;
matchedSelector = `[data-tid="${cTid}"]`;
@@ -385,6 +509,19 @@ export class ChatProcedure {
}
}
+ // Last resort: a [role="log"] only if it's actually inside a chat-y
+ // ancestor (so we don't latch onto the captions panel).
+ if (!chatContainer) {
+ const logEls = Array.from(document.querySelectorAll('[role="log"]')) as HTMLElement[];
+ for (const log of logEls) {
+ if (log.closest('[data-tid*="chat"], [data-tid*="message-pane"]')) {
+ chatContainer = log;
+ matchedSelector = '[role="log"] in chat ancestor';
+ break;
+ }
+ }
+ }
+
// Use found container or fall back to document.body
const target = chatContainer || document.body;
@@ -482,63 +619,142 @@ export class ChatProcedure {
const results: Array<{ speaker: string; text: string; timestamp: string; teamsTimestamp?: string; messageKey: string }> = [];
const seenThisScan = new Set();
- // Strategy 1: known selectors
+ // Find a chat container — prefer chat-specific tids; never plain
+ // [role="log"] (captions panel reuses it), unless it has a chat
+ // ancestor.
const containerSelectors = [
- '[data-tid="message-pane-list"]', '[data-tid="chat-pane-list"]',
- '[data-tid="chat-pane"]', '[role="log"]', '.ts-message-list-container',
+ '[data-tid="message-pane-list"]',
+ '[data-tid="chat-pane-list"]',
+ '[data-tid="chat-pane"]',
+ '[data-tid="message-pane-layout"]',
+ '.ts-message-list-container',
];
- let container: Element | null = null;
+ let container: HTMLElement | null = null;
+ let containerSrc = '';
for (const sel of containerSelectors) {
- container = document.querySelector(sel);
- if (container) break;
+ const el = document.querySelector(sel) as HTMLElement | null;
+ if (el && (el.offsetHeight > 50 || sel.includes('message-pane-layout'))) {
+ container = el;
+ containerSrc = sel;
+ break;
+ }
+ }
+ if (!container) {
+ const logs = Array.from(document.querySelectorAll('[role="log"]')) as HTMLElement[];
+ for (const l of logs) {
+ if (l.closest('[data-tid*="chat"], [data-tid*="message-pane"]') && l.offsetHeight > 50) {
+ container = l;
+ containerSrc = '[role="log"] in chat ancestor';
+ break;
+ }
+ }
}
+ // Modern Teams chat bubbles have NO data-tid on the wrapper —
+ // we match on Fluent UI v9 class prefixes and role="listitem".
const messageSelectors = [
- '[data-tid="chat-message"]', '.fui-ChatMessage',
- '[data-tid*="chat-pane-message"]', '[data-tid*="message-body"]',
+ '[data-tid="chat-message"]',
+ '[data-tid="chat-pane-message"]',
+ '[data-tid*="chat-pane-message"]',
+ '[data-tid*="message-list-item"]',
+ '[data-tid*="message-body"]',
+ '[class*="fui-ChatMessage"]',
+ '[class*="fui-ChatMyMessage"]',
+ '[role="listitem"]',
];
const target = container || document.body;
const candidates = target.querySelectorAll(messageSelectors.join(', '));
- for (const el of Array.from(candidates) as HTMLElement[]) {
- const messageEl = el.closest?.('[data-tid="chat-message"], .fui-ChatMessage') || el;
- let author = 'Unknown';
- const authorSels = [
+ const findAuthor = (root: HTMLElement, fallbackEl: HTMLElement): string => {
+ const sels = [
'[data-tid="message-author"]', '[data-tid="message-author-name"]',
- '.fui-ChatMessage__author', '[data-tid*="author"]',
+ '[data-tid*="author"]',
+ '[class*="ChatMessage__author"]', '[class*="-author"]',
+ 'span[class*="fui-Persona__primaryText"]',
];
- for (const sel of authorSels) {
- const authorEl = messageEl.querySelector(sel) || el.querySelector(sel);
- if (authorEl?.textContent) { author = authorEl.textContent.trim(); break; }
+ for (const sel of sels) {
+ const a = (root.querySelector(sel) || fallbackEl.querySelector(sel)) as HTMLElement | null;
+ const t = a?.textContent?.trim();
+ if (t) return t;
}
- const bodySels = [
- '[data-tid="message-body"]', '.fui-ChatMessage__body',
+ return 'Unknown';
+ };
+ const findBody = (root: HTMLElement, fallbackEl: HTMLElement): string => {
+ const sels = [
+ '[data-tid="message-body-content"]', '[data-tid="message-body"]',
'[data-tid="chat-message-text"]', '[data-tid*="message-body"]',
+ '[class*="ChatMessage__body"]', '[class*="messageContent"]',
+ '[class*="-body"] [class*="content"]',
];
- let text = '';
- for (const sel of bodySels) {
- const bodyEl = messageEl.querySelector(sel) || el.querySelector(sel);
- if (bodyEl) { text = (bodyEl as HTMLElement).innerText?.trim() || ''; break; }
+ for (const sel of sels) {
+ const b = (root.querySelector(sel) || fallbackEl.querySelector(sel)) as HTMLElement | null;
+ const t = b?.innerText?.trim();
+ if (t) return t;
+ }
+ return '';
+ };
+
+ for (const el of Array.from(candidates) as HTMLElement[]) {
+ const messageEl = (el.closest?.('[data-tid*="chat-message"], [data-tid*="message-list-item"], [class*="fui-ChatMessage"], [class*="fui-ChatMyMessage"]') as HTMLElement | null) || el;
+ const author = findAuthor(messageEl, el);
+ let text = findBody(messageEl, el);
+ if (!text) {
+ const full = (messageEl.innerText || '').trim();
+ if (full) {
+ text = full
+ .replace(author, '')
+ .replace(/\b(now|just now|gerade|jetzt)\b/gi, '')
+ .trim();
+ }
}
if (!text || text.length < 2 || isNoise(text)) continue;
const key = `${author}::${text}`;
if (known.has(key) || seenThisScan.has(key)) continue;
seenThisScan.add(key);
const timeEl = messageEl.querySelector('time[datetime], [data-tid*="timestamp"] time');
- const ts = timeEl?.getAttribute?.('datetime') || new Date().toISOString();
+ const ts = (timeEl as HTMLElement | null)?.getAttribute?.('datetime') || new Date().toISOString();
results.push({ speaker: author, text, timestamp: ts, teamsTimestamp: ts, messageKey: key });
}
- // Diagnostics (once per 20s, controlled by caller)
+ // Diagnostics — emit every ~20 s, AND additionally any time the
+ // scan finds a container but ZERO messages (so we can adapt).
let diag: string | undefined;
if ((window as any).__chatScanDiagCounter === undefined) (window as any).__chatScanDiagCounter = 0;
(window as any).__chatScanDiagCounter++;
- if ((window as any).__chatScanDiagCounter % 4 === 1) {
+ const periodic = (window as any).__chatScanDiagCounter % 4 === 1;
+ const zeroButContainer = !!container && candidates.length === 0;
+ if (periodic || zeroButContainer) {
const info: string[] = [];
- info.push(`container=${container?.tagName || 'body'}[${container?.getAttribute?.('data-tid') || ''}]`);
+ info.push(`container=${container?.tagName || 'body'}[${container?.getAttribute?.('data-tid') || containerSrc || ''}] h=${container?.offsetHeight || 0}`);
info.push(`candidates=${candidates.length}`);
- // Dump all chat-ish elements for debugging
- const allChat = document.querySelectorAll('[data-tid*="chat"], [data-tid*="message"], [role="log"], .fui-Chat');
+ // Modern indicator counts (helps confirm whether messages exist
+ // under a totally different selector set).
+ info.push(
+ `globalCounts={listitem:${document.querySelectorAll('[role="listitem"]').length},` +
+ `chatMessage:${document.querySelectorAll('[class*="fui-ChatMessage"]').length},` +
+ `chatMyMessage:${document.querySelectorAll('[class*="fui-ChatMyMessage"]').length},` +
+ `messageBodyTid:${document.querySelectorAll('[data-tid*="message-body"]').length},` +
+ `chatPaneMessageTid:${document.querySelectorAll('[data-tid*="chat-pane-message"]').length}}`,
+ );
+ // Dump the container's first children (so we can craft selectors).
+ if (container) {
+ const kids = Array.from(container.children).slice(0, 6).map((c) => {
+ const e = c as HTMLElement;
+ const tid = e.getAttribute('data-tid') || '';
+ const role = e.getAttribute('role') || '';
+ const cls = (e.className?.toString?.() || '').substring(0, 60);
+ const h = e.offsetHeight || 0;
+ const childCount = e.children.length;
+ return `<${e.tagName} tid="${tid}" role="${role}" cls="${cls}" h=${h} kids=${childCount}>`;
+ });
+ info.push(`containerChildren=[${kids.join(', ')}]`);
+ if (zeroButContainer && container.firstElementChild) {
+ const firstHtml = (container.firstElementChild.outerHTML || '').substring(0, 400).replace(/\s+/g, ' ');
+ info.push(`firstChildHtml="${firstHtml}"`);
+ }
+ }
+ // All chat-ish elements (legacy diagnostic, kept for context)
+ const allChat = document.querySelectorAll('[data-tid*="chat"], [data-tid*="message"], [role="log"], [class*="fui-Chat"]');
const tags: string[] = [];
for (const c of Array.from(allChat).slice(0, 15)) {
const tid = c.getAttribute('data-tid') || '';
@@ -615,13 +831,46 @@ export class ChatProcedure {
/**
* Send a chat message in the meeting.
- * Finds the chat input (with retry), types the message, and sends it.
+ *
+ * Renders the body as Markdown -> HTML (bold/italic/code/lists/links/headers)
+ * and pastes it into the Teams compose box via ``document.execCommand("insertHTML")``
+ * so the message appears formatted in the meeting chat. Falls back to plain
+ * keyboard typing if HTML insertion is rejected by Teams.
*/
async sendChatMessage(text: string): Promise {
this._logger.info(`Sending chat message: ${text.substring(0, 60)}...`);
+ // IMPORTANT: do NOT call _openChatPanel() from here. The chat panel is
+ // opened once via enableChatMonitoring() (the toggle entry-point) and
+ // re-opened by the periodic scan if it ever drops. _openChatPanel clicks
+ // the chat button — and clicking an already-pressed button toggles the
+ // panel CLOSED, which silently breaks every subsequent send.
+
+ // Pre-flight: if the chat panel is provably NOT open we must abort
+ // immediately. Otherwise the input-finding loop below would happily
+ // match a non-chat compose box (e.g. the compact in-meeting compose
+ // overlay used in anonymous / pre-join layouts has a generic
+ // [data-tid="ckeditor"] [role="textbox"] in a floating layer that
+ // looks like a chat input but does NOT post into the meeting chat).
+ // Surfacing the failure fast lets the periodic scan re-toggle the
+ // panel and the Gateway resend the message.
+ const panelOpen = await this._isChatPanelOpen();
+ if (!panelOpen) {
+ this._logger.warn('Chat panel not open — aborting send so the periodic scan can re-toggle it');
+ return false;
+ }
+
+ // Note: order matters — most specific selectors first; the `chat-pane-compose-message-footer`
+ // ancestor lookup is needed because Teams Fluent UI v9 scopes the contenteditable inside it.
+ // Modern Teams meeting chat uses CKEditor 5 (`.ck-editor__editable`) and its compose root
+ // often has NO `data-tid` at all, so class-/aria-based fallbacks are required.
const inputSelectors = [
+ // Classic data-tid selectors (older Teams builds)
'[data-tid="ckeditor-replyConversation"]',
+ '[data-tid="chat-pane-compose-message-footer"] div[contenteditable="true"]',
+ '[data-tid="chat-pane-compose-message-footer"] div[role="textbox"]',
+ '[data-tid="message-pane-footer"] div[contenteditable="true"]',
+ '[data-tid="message-pane-footer"] div[role="textbox"]',
'div[role="textbox"][data-tid*="chat"]',
'div[role="textbox"][data-tid*="message"]',
'div[role="textbox"][aria-label*="message" i]',
@@ -629,25 +878,50 @@ export class ChatProcedure {
'div[contenteditable="true"][data-tid*="message"]',
'div[contenteditable="true"][data-tid*="chat"]',
'div[contenteditable="true"][aria-label*="message" i]',
+ 'div[contenteditable="true"][aria-label*="Nachricht" i]',
'[aria-label*="Type a new message" i]',
'[aria-label="Type a message"]',
'[aria-label*="Neue Nachricht eingeben" i]',
'[placeholder*="Type a message" i]',
'[placeholder*="Nachricht" i]',
+ // Fluent UI v9 / CKEditor 5 — class-/aria-based fallbacks (no data-tid required)
+ 'div.ck-editor__editable[contenteditable="true"]',
+ 'div.ck-content[contenteditable="true"]',
+ 'div.ck-editor__editable',
+ 'div[aria-placeholder*="message" i][contenteditable="true"]',
+ 'div[aria-placeholder*="Nachricht" i][contenteditable="true"]',
+ 'div[aria-placeholder*="Type a new message" i]',
+ 'div[aria-label*="Compose" i][role="textbox"]',
+ 'div[aria-label*="Verfassen" i][role="textbox"]',
+ // Last-resort generic — only matches if nothing else hit
'div[contenteditable="true"][role="textbox"]',
];
+ const html = ChatProcedure._markdownToHtml(text);
const maxAttempts = 5;
const retryDelayMs = 600;
+ // Single combined-selector wait: lets Playwright surface the element as
+ // soon as Teams mounts it (instead of busy-polling N selectors per attempt).
+ const combinedSelector = inputSelectors.join(', ');
+ try {
+ await this._page.waitForSelector(combinedSelector, { state: 'visible', timeout: 5000 });
+ } catch {
+ // Fall through — the per-selector loop below logs richer diagnostics.
+ }
+
for (let attempt = 1; attempt <= maxAttempts; attempt++) {
try {
let input: any = null;
+ let matchedSelector = '';
for (const selector of inputSelectors) {
input = await this._page.$(selector);
if (input) {
const isVisible = await input.isVisible().catch(() => false);
- if (isVisible) break;
+ if (isVisible) {
+ matchedSelector = selector;
+ break;
+ }
await input.dispose();
input = null;
}
@@ -657,10 +931,131 @@ export class ChatProcedure {
await input.click();
await this._page.waitForTimeout(200);
- await this._page.keyboard.type(text, { delay: 10 });
+ // Snapshot the editor BEFORE writing so we can detect whether each
+ // stage actually deposited content (defaultPrevented alone is not
+ // a reliable signal in CKEditor 5).
+ const beforeLen = await this._page.evaluate((selector) => {
+ const el = document.querySelector(selector) as HTMLElement | null;
+ return (el?.textContent ?? '').length;
+ }, matchedSelector);
+
+ // ---------------------------------------------------------------
+ // Stage 1: execCommand('insertHTML', html)
+ // Fast on legacy Teams; CKEditor 5 normally returns false.
+ // ---------------------------------------------------------------
+ let stageUsed: 'execCommand' | 'syntheticPaste' | 'plainType' = 'plainType';
+ let writeOk = false;
+
+ const execInserted = await this._page.evaluate(({ selector, html: rawHtml }) => {
+ const el = document.querySelector(selector) as HTMLElement | null;
+ if (!el) return false;
+ try { el.focus(); } catch { /* ignore */ }
+ try {
+ return document.execCommand('insertHTML', false, rawHtml);
+ } catch {
+ return false;
+ }
+ }, { selector: matchedSelector, html });
+
+ if (execInserted) {
+ const afterLen = await this._page.evaluate((selector) => {
+ const el = document.querySelector(selector) as HTMLElement | null;
+ return (el?.textContent ?? '').length;
+ }, matchedSelector);
+ if (afterLen > beforeLen) {
+ stageUsed = 'execCommand';
+ writeOk = true;
+ this._logger.info(`Chat insert OK via execCommand insertHTML (${beforeLen} -> ${afterLen} chars)`);
+ }
+ }
+
+ // ---------------------------------------------------------------
+ // Stage 2: synthetic ClipboardEvent('paste') with text/html +
+ // text/plain. CKEditor 5 has its own paste-handler which ingests
+ // the HTML as ONE atomic block (one chat post, multiple paragraphs).
+ // ---------------------------------------------------------------
+ if (!writeOk) {
+ await this._page.evaluate(
+ async ({ selector, html: rawHtml, plain }) => {
+ const el = document.querySelector(selector) as HTMLElement | null;
+ if (!el) return false;
+ try { el.focus(); } catch { /* ignore */ }
+ try {
+ const dt = new DataTransfer();
+ dt.setData('text/html', rawHtml);
+ dt.setData('text/plain', plain);
+ const evt = new ClipboardEvent('paste', {
+ clipboardData: dt,
+ bubbles: true,
+ cancelable: true,
+ });
+ el.dispatchEvent(evt);
+ } catch {
+ return false;
+ }
+ return true;
+ },
+ { selector: matchedSelector, html, plain: text },
+ );
+ await this._page.waitForTimeout(120);
+ const afterLen = await this._page.evaluate((selector) => {
+ const el = document.querySelector(selector) as HTMLElement | null;
+ return (el?.textContent ?? '').length;
+ }, matchedSelector);
+ if (afterLen > beforeLen) {
+ stageUsed = 'syntheticPaste';
+ writeOk = true;
+ this._logger.info(`Chat insert OK via synthetic ClipboardEvent (${beforeLen} -> ${afterLen} chars)`);
+ }
+ }
+
+ // ---------------------------------------------------------------
+ // Stage 3: typed plain text — LAST RESORT. CRITICAL details:
+ // a) `keyboard.type(text)` treats every `\n` as Enter, which
+ // Teams maps to "send message" → splits the message into
+ // one chat post per line. Therefore we split on newlines
+ // and inject Shift+Enter (= soft break) between segments,
+ // then a SINGLE Enter at the very end to send.
+ // b) CKEditor 5 in Teams runs Markdown auto-format heuristics
+ // while the user types: a leading `* `, `- `, `+ `, or
+ // `1. ` triggers auto-list conversion which SWALLOWS the
+ // marker AND the first character of the item ("D" in
+ // "* Datenmigration" → "atenmigration"). Same trap for
+ // `# heading`, `**bold**`, and time-like `1:23` patterns.
+ // We therefore neutralise every line BEFORE typing — drop
+ // all Markdown markers and replace bullets with the Unicode
+ // bullet `\u2022` (which is NOT a CKEditor trigger).
+ // ---------------------------------------------------------------
+ if (!writeOk) {
+ const beforeText = text;
+ const editorSafe = ChatProcedure._neutraliseForPlainType(text);
+ this._logger.warn(
+ `Chat insert falling back to plain typing — neutralised markdown markers ` +
+ `(orig=${beforeText.length} chars, neutral=${editorSafe.length} chars, ` +
+ `lineBreaks=${(editorSafe.match(/\n/g) || []).length})`,
+ );
+ const segments = editorSafe.split(/\r?\n/);
+ for (let i = 0; i < segments.length; i++) {
+ if (i > 0) {
+ await this._page.keyboard.down('Shift');
+ await this._page.keyboard.press('Enter');
+ await this._page.keyboard.up('Shift');
+ // Give CKEditor a tick to settle the soft-break model
+ // change before we start typing again, otherwise the
+ // very first character can occasionally be dropped.
+ await this._page.waitForTimeout(50);
+ }
+ if (segments[i].length > 0) {
+ await this._page.keyboard.type(segments[i], { delay: 8 });
+ }
+ }
+ stageUsed = 'plainType';
+ writeOk = true;
+ }
+
await this._page.waitForTimeout(200);
await this._page.keyboard.press('Enter');
- this._logger.info('Chat message sent');
+ this._logger.info(`Chat message sent (stage=${stageUsed})`);
return true;
}
@@ -679,9 +1074,290 @@ export class ChatProcedure {
}
this._logger.warn('Could not find chat input field');
+ await this._dumpComposeDiagnostics();
return false;
}
+ /**
+ * Dump rich diagnostics about contenteditable / textbox candidates in the
+ * page so we can adapt selectors when Teams ships a UI change.
+ */
+ private async _dumpComposeDiagnostics(): Promise {
+ try {
+ const dump = await this._page.evaluate(() => {
+ const describe = (el: Element): string => {
+ const e = el as HTMLElement;
+ const tid = e.getAttribute('data-tid') || '';
+ const role = e.getAttribute('role') || '';
+ const ariaLabel = e.getAttribute('aria-label') || '';
+ const ariaPlaceholder = e.getAttribute('aria-placeholder') || '';
+ const ce = e.getAttribute('contenteditable') || '';
+ const cls = (e.className?.toString?.() || '').substring(0, 80);
+ const h = e.offsetHeight || 0;
+ const w = e.offsetWidth || 0;
+ // Climb up to find the closest data-tid ancestor for context.
+ let parentTid = '';
+ let walk: Element | null = e.parentElement;
+ let depth = 0;
+ while (walk && depth < 6) {
+ const t = walk.getAttribute('data-tid');
+ if (t) { parentTid = t; break; }
+ walk = walk.parentElement;
+ depth++;
+ }
+ return `<${e.tagName} tid="${tid}" role="${role}" ce="${ce}" ` +
+ `aria-label="${ariaLabel.substring(0, 40)}" aria-ph="${ariaPlaceholder.substring(0, 40)}" ` +
+ `cls="${cls}" parentTid="${parentTid}" w=${w} h=${h}>`;
+ };
+
+ const editable = Array.from(document.querySelectorAll('[contenteditable="true"]'))
+ .slice(0, 12).map(describe);
+ const textboxes = Array.from(document.querySelectorAll('[role="textbox"]'))
+ .slice(0, 12).map(describe);
+ const composeFooters = Array.from(document.querySelectorAll(
+ '[data-tid*="footer"], [data-tid*="compose"], [data-tid*="message-pane"], .ck-editor, .ck-editor__main',
+ )).slice(0, 12).map(describe);
+ const chatBtn = document.querySelector('#chat-button') as HTMLElement | null;
+ const pressed = chatBtn?.getAttribute('aria-pressed') || '';
+ return {
+ chatBtnPressed: pressed,
+ editable,
+ textboxes,
+ composeFooters,
+ };
+ });
+ this._logger.warn(
+ `[ComposeDiag] chatBtnPressed=${dump.chatBtnPressed} | ` +
+ `editable=${JSON.stringify(dump.editable)} | ` +
+ `textboxes=${JSON.stringify(dump.textboxes)} | ` +
+ `composeFooters=${JSON.stringify(dump.composeFooters)}`,
+ );
+ } catch (e) {
+ this._logger.warn(`[ComposeDiag] failed to collect diagnostics: ${e}`);
+ }
+ }
+
+ /**
+ * Minimal Markdown -> HTML converter for chat messages.
+ *
+ * Handles the subset our agent answers commonly produce: paragraphs, headers,
+ * bold/italic, inline code, fenced code, ordered/unordered lists and links.
+ * The output is plain HTML (no scripts/styles) intended for pasting into the
+ * Teams compose box via ``document.execCommand("insertHTML")``.
+ */
+ /**
+ * Strip / replace Markdown markers that CKEditor 5's auto-format heuristics
+ * react to while characters are typed. The plain-text fallback in
+ * sendChatMessage() goes through this — otherwise leading `* `, `- `, `+ `
+ * or `1. ` triggers auto-list conversion, which swallows the marker AND
+ * the first character of the item ("Datenmigration" → "atenmigration").
+ *
+ * We intentionally KEEP newlines so the caller can inject Shift+Enter
+ * soft breaks between lines. We do NOT keep the marker glyphs themselves
+ * (`#`, `*`, `_`, `~`, backtick) because CKEditor inline auto-format
+ * (bold/italic/code) reacts to them mid-type as well.
+ */
+ private static _neutraliseForPlainType(input: string): string {
+ if (!input) return '';
+ let out = input;
+
+ out = out.replace(/```[\s\S]*?```/g, ' ');
+ out = out.replace(/`([^`\n]+)`/g, '$1');
+
+ out = out.replace(/^\s*#{1,6}\s+/gm, '');
+
+ out = out.replace(/^\s*[-*+]\s+/gm, '\u2022 ');
+
+ out = out.replace(/^\s*(\d+)[\.\)]\s+/gm, '$1) ');
+
+ out = out.replace(/\*\*([^*]+)\*\*/g, '$1');
+ out = out.replace(/__([^_]+)__/g, '$1');
+ out = out.replace(/(?
+ /-/.test(line) && !/[A-Za-z0-9]/.test(line) ? '' : line,
+ );
+ out = out.replace(/^\s*\|?(.+?)\|?\s*$/gm, (line) => {
+ if (!line.includes('|')) return line;
+ const cells = line
+ .trim()
+ .replace(/^\|/, '')
+ .replace(/\|\s*$/, '')
+ .split('|')
+ .map((c) => c.trim())
+ .filter((c) => c.length > 0);
+ return cells.length >= 2 ? cells.join(' - ') : line;
+ });
+
+ out = out.replace(/^[ \t]*[-*_]{3,}[ \t]*$/gm, '');
+
+ out = out.replace(/[ \t]+/g, ' ');
+ out = out.replace(/\n{3,}/g, '\n\n');
+
+ return out.trim();
+ }
+
+ private static _markdownToHtml(input: string): string {
+ if (!input) return '';
+
+ const escapeHtml = (s: string): string =>
+ s.replace(/&/g, '&').replace(//g, '>');
+
+ // Inject a blank line BEFORE every Markdown table header so the block
+ // splitter (split on \n{2,}) puts the table into its own block. AI
+ // models routinely emit tables right after a sentence like "Hier ist
+ // die Tabelle:" without a blank line in between, which would otherwise
+ // glue the intro into the same block and break table detection.
+ const preLines = input.split('\n');
+ const normalised: string[] = [];
+ for (let i = 0; i < preLines.length; i++) {
+ const cur = preLines[i];
+ const next = preLines[i + 1] ?? '';
+ const curHasPipe = /\|/.test(cur);
+ const nextIsSep = /^\s*\|?[\s\-:|]+\|?\s*$/.test(next.trim()) && /-/.test(next);
+ const prev = i > 0 ? preLines[i - 1] : '';
+ if (curHasPipe && nextIsSep && i > 0 && prev.trim() !== '') {
+ normalised.push('');
+ }
+ normalised.push(cur);
+ }
+ input = normalised.join('\n');
+
+ const codeBlocks: string[] = [];
+ let working = input.replace(/```([\s\S]*?)```/g, (_match, body: string) => {
+ const idx = codeBlocks.push(`${escapeHtml(body.trim())}
`) - 1;
+ return `\u0000CODEBLOCK_${idx}\u0000`;
+ });
+
+ working = escapeHtml(working);
+
+ const inlineCodes: string[] = [];
+ working = working.replace(/`([^`\n]+)`/g, (_m, body: string) => {
+ const idx = inlineCodes.push(`${body}`) - 1;
+ return `\u0000INLINECODE_${idx}\u0000`;
+ });
+
+ const blocks = working.split(/\n{2,}/);
+ const renderedBlocks: string[] = [];
+
+ for (const rawBlock of blocks) {
+ const block = rawBlock.trim();
+ if (!block) continue;
+
+ const codeBlockMatch = block.match(/^\u0000CODEBLOCK_(\d+)\u0000$/);
+ if (codeBlockMatch) {
+ renderedBlocks.push(codeBlocks[Number(codeBlockMatch[1])]);
+ continue;
+ }
+
+ const headerMatch = block.match(/^(#{1,6})\s+(.+)$/);
+ if (headerMatch) {
+ const level = headerMatch[1].length;
+ renderedBlocks.push(`${ChatProcedure._renderInline(headerMatch[2].trim())}`);
+ continue;
+ }
+
+ const lines = block.split('\n');
+
+ // Markdown pipe-table:
+ // | Header 1 | Header 2 |
+ // |----------|----------|
+ // | Cell 1 | Cell 2 |
+ //
+ // Detection: at least header + separator (>= 2 lines), the SECOND line
+ // looks like the divider (only -, :, |, spaces) and contains at least
+ // one dash, and at least the first or second line uses a pipe.
+ if (lines.length >= 2 && /\|/.test(lines[0])) {
+ const sep = lines[1].trim();
+ const looksLikeSep = /^\|?[\s\-:|]+\|?$/.test(sep) && /-/.test(sep);
+ if (looksLikeSep) {
+ const headerCells = ChatProcedure._splitTableRow(lines[0]);
+ const bodyRows = lines
+ .slice(2)
+ .filter((l) => l.trim().length > 0)
+ .map((l) => ChatProcedure._splitTableRow(l));
+
+ const thead =
+ `${headerCells
+ .map((c) => `| ${ChatProcedure._renderInline(c)} | `)
+ .join('')}
`;
+
+ const tbody = bodyRows.length
+ ? `${bodyRows
+ .map(
+ (row) =>
+ `${row
+ .map((c) => `| ${ChatProcedure._renderInline(c)} | `)
+ .join('')}
`,
+ )
+ .join('')}`
+ : '';
+
+ renderedBlocks.push(``);
+ continue;
+ }
+ }
+
+ const isUnordered = lines.every((l) => /^\s*[-*+]\s+/.test(l));
+ const isOrdered = lines.every((l) => /^\s*\d+[.)]\s+/.test(l));
+
+ if (isUnordered && lines.length > 0) {
+ const items = lines.map((l) => l.replace(/^\s*[-*+]\s+/, '').trim());
+ renderedBlocks.push(`${items.map((i) => `- ${ChatProcedure._renderInline(i)}
`).join('')}
`);
+ continue;
+ }
+
+ if (isOrdered && lines.length > 0) {
+ const items = lines.map((l) => l.replace(/^\s*\d+[.)]\s+/, '').trim());
+ renderedBlocks.push(`${items.map((i) => `- ${ChatProcedure._renderInline(i)}
`).join('')}
`);
+ continue;
+ }
+
+ const paragraph = ChatProcedure._renderInline(block.replace(/\n/g, '
'));
+ renderedBlocks.push(`${paragraph}
`);
+ }
+
+ let html = renderedBlocks.join('');
+
+ html = html.replace(/\u0000INLINECODE_(\d+)\u0000/g, (_m, idx: string) => inlineCodes[Number(idx)]);
+ html = html.replace(/\u0000CODEBLOCK_(\d+)\u0000/g, (_m, idx: string) => codeBlocks[Number(idx)]);
+
+ return html;
+ }
+
+ /**
+ * Render inline markdown (bold/italic/links) on already-escaped text.
+ */
+ private static _renderInline(text: string): string {
+ let out = text;
+ out = out.replace(/\[([^\]]+)\]\(([^)\s]+)\)/g, (_m, label: string, url: string) => {
+ return `${label}`;
+ });
+ out = out.replace(/\*\*([^*\n]+)\*\*/g, '$1');
+ out = out.replace(/__([^_\n]+)__/g, '$1');
+ out = out.replace(/(?$1');
+ out = out.replace(/(?$1');
+ return out;
+ }
+
+ /**
+ * Split one Markdown table row into its cells. Drops the optional leading
+ * and trailing pipe and trims whitespace around every cell. Empty trailing
+ * cells (which appear when authors close the row with `|`) are removed.
+ */
+ private static _splitTableRow(line: string): string[] {
+ const trimmed = line.trim().replace(/^\|/, '').replace(/\|\s*$/, '');
+ const cells = trimmed.split('|').map((c) => c.trim());
+ while (cells.length > 0 && cells[cells.length - 1] === '') cells.pop();
+ return cells;
+ }
+
/**
* Stop monitoring chat messages.
*/
diff --git a/src/bot/mediaGetUserMediaPatch.ts b/src/bot/mediaGetUserMediaPatch.ts
new file mode 100644
index 0000000..cc4456b
--- /dev/null
+++ b/src/bot/mediaGetUserMediaPatch.ts
@@ -0,0 +1,385 @@
+/**
+ * Injected in the browser: wraps getUserMedia, TTS destination, optional canvas
+ * video. Must be a single self-contained function for Playwright serialization.
+ * Re-calling this on the same document re-patches gUM and reuses the saved
+ * Chromium getUserMedia + AudioContext when present (Teams can replace
+ * navigator.mediaDevices.getUserMedia after a document/iframe refresh).
+ */
+export type MediaGetUserMediaPatchOptions = {
+ useCanvasVideo: boolean;
+ displayLabel: string;
+};
+
+export const poweronMediaPatchInstall = (opts: MediaGetUserMediaPatchOptions) => {
+ 'use strict';
+ const { useCanvasVideo, displayLabel } = opts;
+ const w: any = window as any;
+
+ if (!w.__gumChromium) {
+ w.__gumChromium = (navigator.mediaDevices as any).getUserMedia.bind(navigator.mediaDevices);
+ }
+
+ // Patch RTCPeerConnection.prototype methods once per realm to observe + react to Teams' track placement.
+ if (!w.__poweronRtcPatched && (window as any).RTCPeerConnection) {
+ w.__poweronRtcPatched = true;
+ const RTCProto: any = (window as any).RTCPeerConnection.prototype;
+ const _origAddTrack = RTCProto.addTrack;
+ const _origAddTransceiver = RTCProto.addTransceiver;
+ RTCProto.addTrack = function (track: MediaStreamTrack, ...streams: MediaStream[]) {
+ try {
+ // eslint-disable-next-line no-console
+ console.log(
+ '[AudioPlayback] pc.addTrack kind=' + (track && track.kind)
+ + ' id=' + (track && track.id)
+ + ' enabled=' + (track && track.enabled),
+ );
+ } catch {
+ // ignore
+ }
+ let useTrack: MediaStreamTrack = track;
+ try {
+ if (useCanvasVideo && track && track.kind === 'video') {
+ if (typeof w.__startBotAvatarStream === 'function') {
+ w.__startBotAvatarStream();
+ }
+ const av: MediaStreamTrack | undefined = w.__botAvatarVideoTrack;
+ if (av && av.readyState === 'live') {
+ try {
+ track.stop();
+ } catch {
+ // ignore
+ }
+ useTrack = av.clone();
+ // eslint-disable-next-line no-console
+ console.log('[AudioPlayback] pc.addTrack swapped video -> avatar id=' + useTrack.id);
+ }
+ }
+ } catch {
+ // ignore
+ }
+ return _origAddTrack.call(this, useTrack, ...streams);
+ };
+ RTCProto.addTransceiver = function (trackOrKind: any, init?: any) {
+ try {
+ const k = typeof trackOrKind === 'string' ? trackOrKind : trackOrKind?.kind;
+ // eslint-disable-next-line no-console
+ console.log(
+ '[AudioPlayback] pc.addTransceiver kind=' + k
+ + ' direction=' + (init && init.direction),
+ );
+ } catch {
+ // ignore
+ }
+ return _origAddTransceiver.call(this, trackOrKind, init);
+ };
+ }
+
+ if (!w.__ttsStreamDest) {
+ const AudioContextClass = (window as any).AudioContext || (window as any).webkitAudioContext;
+ const ctx: AudioContext = new AudioContextClass();
+ const streamDest: MediaStreamAudioDestinationNode = ctx.createMediaStreamDestination();
+ w.__ttsAudioContext = ctx;
+ w.__ttsStreamDest = streamDest;
+ w.__ttsAudioStream = streamDest.stream;
+ }
+ const streamDest = w.__ttsStreamDest as MediaStreamAudioDestinationNode;
+ if (!streamDest) {
+ return;
+ }
+
+ const _fps = 15;
+ w.__startBotAvatarStream = () => {
+ if (
+ w.__botAvatarStreamStarted
+ && w.__botAvatarVideoTrack
+ && w.__botAvatarVideoTrack.readyState === 'live'
+ && w.__botAvatarCanvas
+ && w.__botAvatarCanvas.isConnected
+ ) {
+ return;
+ }
+ if (w.__botAvatarDrawInterval) {
+ clearInterval(w.__botAvatarDrawInterval);
+ w.__botAvatarDrawInterval = null;
+ }
+ try {
+ w.__botAvatarVideoTrack?.stop?.();
+ } catch {
+ // ignore
+ }
+ w.__botAvatarStreamStarted = true;
+ w.__botAvatarDisplayLabel = displayLabel;
+ const canvas = document.createElement('canvas');
+ canvas.width = 640;
+ canvas.height = 360;
+ canvas.setAttribute('data-poweron-avatar', '1');
+ canvas.style.cssText =
+ 'position:fixed;right:0;bottom:0;width:4px;height:4px;z-index:2147483646;opacity:1;pointer-events:none;';
+ (document.body || document.documentElement).appendChild(canvas);
+ w.__botAvatarCanvas = canvas;
+ const c2d = canvas.getContext('2d');
+ let t = 0;
+ const draw = () => {
+ if (!c2d) {
+ return;
+ }
+ t += 0.04;
+ const wPx = canvas.width;
+ const hPx = canvas.height;
+ c2d.fillStyle = '#061525';
+ c2d.fillRect(0, 0, wPx, hPx);
+ const g = c2d.createLinearGradient(0, 0, wPx, hPx);
+ g.addColorStop(0, '#1a4f8c');
+ g.addColorStop(0.5, '#0c305a');
+ g.addColorStop(1, '#132e6e');
+ c2d.fillStyle = g;
+ c2d.fillRect(0, 0, wPx, hPx);
+ c2d.strokeStyle = 'rgba(255, 200, 80, 0.95)';
+ c2d.lineWidth = 3;
+ c2d.strokeRect(6, 6, wPx - 12, hPx - 12);
+ c2d.fillStyle = 'rgba(255, 220, 120, 0.95)';
+ c2d.font = '600 13px system-ui, "Segoe UI", sans-serif';
+ c2d.textAlign = 'left';
+ c2d.textBaseline = 'top';
+ c2d.fillText('PORTA', 14, 10);
+ c2d.textAlign = 'center';
+ c2d.textBaseline = 'middle';
+ c2d.fillStyle = '#ffffff';
+ c2d.font = 'bold 28px system-ui, "Segoe UI", sans-serif';
+ const line = (w.__botAvatarDisplayLabel || displayLabel).toString().slice(0, 72);
+ c2d.fillText(line, wPx / 2, hPx / 2 - 6);
+ c2d.fillStyle = 'rgba(255,255,255,0.78)';
+ c2d.font = '14px system-ui, "Segoe UI", sans-serif';
+ c2d.fillText('poweron', wPx / 2, hPx / 2 + 26);
+ const pulse = 0.75 + 0.25 * Math.sin(t);
+ c2d.fillStyle = 'rgba(120, 200, 255, ' + 0.15 * pulse + ')';
+ c2d.fillRect(0, 0, wPx, 6);
+ c2d.fillRect(0, hPx - 6, wPx, 6);
+ };
+ draw();
+ w.__botAvatarDrawInterval = window.setInterval(draw, 1000 / _fps);
+ const cap = canvas.captureStream(_fps);
+ w.__botAvatarVideoTrack = cap.getVideoTracks()[0];
+ if (w.__botAvatarVideoTrack) {
+ w.__botAvatarVideoTrack.enabled = true;
+ try {
+ w.__botAvatarVideoTrack.contentHint = 'motion';
+ } catch {
+ // ignore
+ }
+ }
+ // eslint-disable-next-line no-console
+ console.log(
+ '[AudioPlayback] canvas avatar stream (re)built, videoTrack=',
+ w.__botAvatarVideoTrack ? w.__botAvatarVideoTrack.id : 'none',
+ );
+ };
+
+ w.__forceVideoTrackToSenders = async () => {
+ if (!useCanvasVideo) {
+ return { replaced: 0, pcs: 0, reason: 'canvas-video-off' };
+ }
+ w.__startBotAvatarStream();
+ const src: MediaStreamTrack | undefined = w.__botAvatarVideoTrack;
+ if (!src) {
+ return { replaced: 0, pcs: 0, reason: 'no-avatar-track' };
+ }
+ const pcs: RTCPeerConnection[] = (w.__audioCapturePeerConnections || []) as RTCPeerConnection[];
+ let replaced = 0;
+ let added = 0;
+ let videoTransceivers = 0;
+ let videoSendersWithTrack = 0;
+ let videoSendersWithoutTrack = 0;
+ let totalTransceivers = 0;
+ const directionsBefore: string[] = [];
+ const directionsAfter: string[] = [];
+ for (const pc of pcs) {
+ const transceivers = (pc as any).getTransceivers?.() || [];
+ totalTransceivers += transceivers.length;
+ let pcHasVideoSender = false;
+ for (const t of transceivers) {
+ const sender = t.sender;
+ if (!sender) {
+ continue;
+ }
+ const senderKind = sender.track?.kind;
+ const receiverKind = t.receiver?.track?.kind;
+ const txKind = (t as any).kind || senderKind || receiverKind || null;
+ if (txKind !== 'video') {
+ continue;
+ }
+ videoTransceivers++;
+ pcHasVideoSender = true;
+ directionsBefore.push(t.direction);
+ if (sender.track) {
+ videoSendersWithTrack++;
+ } else {
+ videoSendersWithoutTrack++;
+ }
+ try {
+ // eslint-disable-next-line no-await-in-loop
+ await sender.replaceTrack(src.clone());
+ replaced++;
+ const tr = sender.track;
+ if (tr && !tr.enabled) {
+ tr.enabled = true;
+ }
+ if (t.direction === 'inactive' || t.direction === 'recvonly') {
+ try {
+ t.direction = 'sendrecv';
+ } catch {
+ // ignore
+ }
+ }
+ directionsAfter.push(t.direction);
+ } catch (err: any) {
+ directionsAfter.push('err:' + String(err && err.message ? err.message : err).slice(0, 32));
+ }
+ }
+ if (!pcHasVideoSender) {
+ try {
+ const newSender = (pc as any).addTrack(src.clone(), w.__botAvatarCanvas?.captureStream
+ ? w.__botAvatarCanvas.captureStream(15)
+ : new MediaStream([src.clone()]));
+ if (newSender) {
+ added++;
+ }
+ } catch (err) {
+ directionsAfter.push('addTrack-err:' + String((err as any)?.message || err).slice(0, 32));
+ }
+ }
+ }
+ return {
+ replaced,
+ added,
+ pcs: pcs.length,
+ reason: 'ok',
+ videoTransceivers,
+ videoSendersWithTrack,
+ videoSendersWithoutTrack,
+ totalTransceivers,
+ directionsBefore,
+ directionsAfter,
+ };
+ };
+
+ const _wrappedGUM = async (constraints?: MediaStreamConstraints) => {
+ // eslint-disable-next-line no-console
+ console.log(
+ '[AudioPlayback] gUM call audio=' + !!(constraints && constraints.audio)
+ + ' video=' + !!(constraints && constraints.video),
+ );
+ // eslint-disable-next-line no-restricted-globals
+ const realStream = await w.__gumChromium(constraints);
+ const wantAudio = !!(constraints && constraints.audio);
+ const wantVideo = !!(constraints && constraints.video);
+
+ if (useCanvasVideo && wantVideo) {
+ w.__startBotAvatarStream();
+ const vt: MediaStreamTrack | undefined = w.__botAvatarVideoTrack;
+ if (!vt) {
+ return realStream;
+ }
+ const vClone = vt.clone();
+ if (wantAudio) {
+ const combinedStream = new MediaStream();
+ streamDest.stream.getAudioTracks().forEach((t: MediaStreamTrack) => combinedStream.addTrack(t.clone()));
+ combinedStream.addTrack(vClone);
+ try {
+ realStream.getTracks().forEach(t => t.stop());
+ } catch {
+ // ignore
+ }
+ // eslint-disable-next-line no-console
+ console.log(
+ '[AudioPlayback] getUserMedia (canvas+tts): a=' + combinedStream.getAudioTracks().length
+ + ' v=' + combinedStream.getVideoTracks().length,
+ );
+ return combinedStream;
+ }
+ const videoOnly = new MediaStream();
+ videoOnly.addTrack(vClone);
+ try {
+ realStream.getTracks().forEach(t => t.stop());
+ } catch {
+ // ignore
+ }
+ return videoOnly;
+ }
+
+ if (wantAudio) {
+ const combinedStream = new MediaStream();
+ streamDest.stream.getAudioTracks().forEach((t: MediaStreamTrack) => combinedStream.addTrack(t.clone()));
+ realStream.getVideoTracks().forEach(t => combinedStream.addTrack(t));
+ // eslint-disable-next-line no-console
+ console.log(
+ '[AudioPlayback] gUM audio: a=' + combinedStream.getAudioTracks().length
+ + ' v=' + combinedStream.getVideoTracks().length,
+ );
+ return combinedStream;
+ }
+ return realStream;
+ };
+
+ try {
+ Object.defineProperty(navigator.mediaDevices, 'getUserMedia', {
+ configurable: true,
+ enumerable: true,
+ writable: true,
+ value: _wrappedGUM,
+ });
+ } catch {
+ (navigator.mediaDevices as any).getUserMedia = _wrappedGUM;
+ }
+ // Some libraries cache navigator.getUserMedia (legacy)
+ try {
+ (navigator as any).getUserMedia = (constraints: MediaStreamConstraints, ok: any, err: any) => {
+ _wrappedGUM(constraints).then(ok, err);
+ };
+ } catch {
+ // ignore
+ }
+
+ w.__forceTtsTrackToSenders = async () => {
+ const pcs: RTCPeerConnection[] = (w.__audioCapturePeerConnections || []) as RTCPeerConnection[];
+ const ttsTrack = streamDest.stream.getAudioTracks()?.[0];
+ if (!ttsTrack) {
+ return { replaced: 0, pcs: pcs?.length || 0, reason: 'no-tts-track' };
+ }
+ const diag: Record = {
+ ttsTrackId: ttsTrack.id,
+ ttsTrackEnabled: ttsTrack.enabled,
+ ttsTrackReadyState: ttsTrack.readyState,
+ ttsTrackMuted: ttsTrack.muted,
+ beforeSenderTrackIds: [] as string[],
+ afterSenderTrackIds: [] as string[],
+ };
+ let replaced = 0;
+ for (const pc of pcs) {
+ try {
+ const senders = pc.getSenders?.() || [];
+ for (const sender of senders) {
+ if (sender?.track?.kind === 'audio') {
+ diag.beforeSenderTrackIds.push(sender.track.id);
+ const freshClone = ttsTrack.clone();
+ // eslint-disable-next-line no-await-in-loop
+ await sender.replaceTrack(freshClone);
+ replaced++;
+ const afterTrack = sender.track;
+ diag.afterSenderTrackIds.push(afterTrack ? afterTrack.id : 'null');
+ diag.afterSenderTrackEnabled = afterTrack ? afterTrack.enabled : undefined;
+ diag.afterSenderTrackReadyState = afterTrack ? afterTrack.readyState : undefined;
+ diag.originalTrackState = ttsTrack.readyState;
+ if (afterTrack && !afterTrack.enabled) {
+ afterTrack.enabled = true;
+ diag.forcedEnabled = true;
+ }
+ }
+ }
+ } catch (err: any) {
+ diag.error = String(err && err.message ? err.message : err);
+ }
+ }
+ return { replaced, pcs: pcs?.length || 0, reason: 'ok', diag };
+ };
+};
diff --git a/src/bot/orchestrator.ts b/src/bot/orchestrator.ts
index 2f00028..624306d 100644
--- a/src/bot/orchestrator.ts
+++ b/src/bot/orchestrator.ts
@@ -15,10 +15,11 @@ import { AudioCaptureProcedure } from './audioCaptureProcedure';
import { ChatProcedure, ChatMessageEntry } from './chatProcedure';
import { AuthProcedure, MfaChallenge } from './authProcedure';
import { TeamsActionsService } from './teamsActionsService';
+import { BackgroundProcedure } from './backgroundProcedure';
import { isValidMeetingUrl, getMeetingLaunchUrl, resolveLaunchUrl } from './meetingUrlParser';
-// Camera / fake video injection is disabled for now to focus on stability.
-// The Y4M fake video file was causing browser crashes when audio started flowing.
+// Optional: canvas "avatar" video (config.botUseCanvasVideo) replaces the Chromium
+// fake test pattern when the camera is on. Y4M file injection remains disabled.
export interface OrchestratorCallbacks {
onStateChange: (state: BotState, message?: string) => void;
@@ -76,6 +77,11 @@ export class BotOrchestrator {
private _chatQueueProcessing: boolean = false;
private _mfaResolver: ((response: { action: string; code?: string }) => void) | null = null;
+ /** Debounce Teams iframe navigations (media runs in a child frame) */
+ private _frameNavMediaRebindTimer: ReturnType | null = null;
+ /** Re-apply gUM + video senders for a few seconds after join */
+ private _canvasRebindTimer: ReturnType | null = null;
+
constructor(
sessionId: string,
meetingUrl: string,
@@ -205,6 +211,11 @@ export class BotOrchestrator {
// Ensure microphone is ON (required for voice playback)
await this._ensureMicOn();
+ if (config.botUseCanvasVideo) {
+ await this._ensureCameraOn();
+ const bg = new BackgroundProcedure(this._page!, this._logger);
+ void bg.trySelectNoVirtualBackground();
+ }
// STEP 2: Enter bot name and click "Join now"
await this._takeScreenshot('anon-step2-before-join', this._isDebugMode);
@@ -234,6 +245,10 @@ export class BotOrchestrator {
// Initialize audio playback
await this._audioProcedure!.initialize();
+ if (config.botUseCanvasVideo) {
+ await this._ensureCameraOnInMeeting();
+ this._startCanvasRebindAfterJoin();
+ }
// Enable transcript capture (captions or audio based on transferMode)
await this._enableTranscriptCapture();
@@ -414,6 +429,11 @@ export class BotOrchestrator {
// Ensure microphone is ON before joining (required for voice playback)
await this._ensureMicOn();
+ if (config.botUseCanvasVideo) {
+ await this._ensureCameraOn();
+ const bg = new BackgroundProcedure(this._page!, this._logger);
+ void bg.trySelectNoVirtualBackground();
+ }
// STEP 5: Poll for "Join now" on the pre-join screen
await this._takeScreenshot('step5-before-join-now', this._isDebugMode);
@@ -436,11 +456,37 @@ export class BotOrchestrator {
this._startKeepAlive();
await this._audioProcedure!.initialize();
+ if (config.botUseCanvasVideo) {
+ await this._ensureCameraOnInMeeting();
+ this._startCanvasRebindAfterJoin();
+ }
await this._enableTranscriptCapture();
await this._enableChat();
await this._sendJoinGreeting();
}
+ private _startCanvasRebindAfterJoin(): void {
+ this._stopCanvasRebindAfterJoin();
+ if (!config.botUseCanvasVideo || !this._audioProcedure) {
+ return;
+ }
+ let n = 0;
+ this._canvasRebindTimer = setInterval(() => {
+ n += 1;
+ void this._audioProcedure?.reinstallMediaPatchInAllFrames();
+ if (n >= 35) {
+ this._stopCanvasRebindAfterJoin();
+ }
+ }, 400);
+ }
+
+ private _stopCanvasRebindAfterJoin(): void {
+ if (this._canvasRebindTimer) {
+ clearInterval(this._canvasRebindTimer);
+ this._canvasRebindTimer = null;
+ }
+ }
+
/**
* Ensure the camera is turned on in the pre-join screen.
* When camera is on, Teams shows the profile/background image.
@@ -888,6 +934,12 @@ export class BotOrchestrator {
this._isShuttingDown = true;
this._logger.info('Stopping bot...');
+ if (this._frameNavMediaRebindTimer) {
+ clearTimeout(this._frameNavMediaRebindTimer);
+ this._frameNavMediaRebindTimer = null;
+ }
+ this._stopCanvasRebindAfterJoin();
+
// Stop keepalive first
this._stopKeepAlive();
@@ -1077,7 +1129,10 @@ export class BotOrchestrator {
},
this._options.language
);
- this._audioProcedure = new AudioProcedure(this._page, this._logger);
+ this._audioProcedure = new AudioProcedure(this._page, this._logger, {
+ useCanvasVideo: config.botUseCanvasVideo,
+ displayLabel: this._botName,
+ });
this._teamsActions = new TeamsActionsService(this._page, this._logger);
this._chatProcedure = new ChatProcedure(
this._page,
@@ -1100,6 +1155,19 @@ export class BotOrchestrator {
// Aggressive hybrid mode: always capture meeting audio as transcript source.
await this._audioCaptureProcedure!.injectCaptureOverride();
+ this._page.on('framenavigated', () => {
+ if (!config.botUseCanvasVideo || !this._audioProcedure) {
+ return;
+ }
+ if (this._frameNavMediaRebindTimer) {
+ clearTimeout(this._frameNavMediaRebindTimer);
+ }
+ this._frameNavMediaRebindTimer = setTimeout(() => {
+ this._frameNavMediaRebindTimer = null;
+ void this._audioProcedure?.reinstallMediaPatchInAllFrames();
+ }, 600);
+ });
+
// Handle page errors
this._page.on('pageerror', (error) => {
this._logger.error('Page error:', error);
@@ -1134,6 +1202,7 @@ export class BotOrchestrator {
* Close the browser.
*/
private async _closeBrowser(): Promise {
+ this._stopCanvasRebindAfterJoin();
try {
if (this._page) {
await this._page.close();
@@ -1282,43 +1351,31 @@ export class BotOrchestrator {
}
/**
- * Send a greeting message in the meeting chat AND via voice after joining.
- * Uses the bot's display name and the configured language.
- * Voice greeting confirms that the audio pipeline (TTS -> mic) is working.
+ * Signal "bot has joined the meeting" to the Gateway. The Gateway owns
+ * greeting generation: it produces a localised greeting via the AI
+ * service in the configured language + persona, then dispatches it back
+ * to this bot via the regular `sendChatMessage` command (chat) and the
+ * `playAudio` pipeline (voice). NO hardcoded greeting strings or
+ * language branches live in the bot — the bot is purely a transport.
+ *
+ * We still wait briefly so the chat panel + input have settled in the
+ * Teams DOM before the Gateway-driven `sendChatMessage` arrives.
*/
private async _sendJoinGreeting(): Promise {
try {
- const firstName = this._botName.split(' ')[0] || this._botName;
- const lang = (this._options.language || 'de-DE').toLowerCase();
-
- let greeting: string;
- if (lang.startsWith('de')) {
- greeting = `Hallo, hier ist ${firstName}. Ich bin bereit.`;
- } else if (lang.startsWith('fr')) {
- greeting = `Bonjour, c'est ${firstName}. Je suis prête.`;
- } else if (lang.startsWith('it')) {
- greeting = `Ciao, sono ${firstName}. Sono pronta.`;
- } else {
- greeting = `Hello, this is ${firstName}. I'm ready.`;
- }
-
- this._logger.info(`Sending join greeting (chat + voice): ${greeting}`);
-
- // Brief delay so chat input is ready after panel open (Teams DOM can lag)
+ this._logger.info('Requesting join greeting from Gateway');
await new Promise((r) => setTimeout(r, 800));
-
- // Chat greeting (queued; retries if input not found)
- await this.sendChatMessageToMeeting(greeting);
-
- // Voice greeting — ask Gateway to generate TTS and send back playAudio
this._sendToGateway({
- type: 'voiceGreeting',
+ type: 'requestGreeting',
sessionId: this._sessionId,
- text: greeting,
- language: this._options.language || 'de-DE',
+ // Hint the Gateway about display name + language; Gateway already
+ // has the canonical config but passing them here keeps the contract
+ // self-contained and avoids a DB lookup just for greeting text.
+ botName: this._botName,
+ language: this._options.language || '',
});
} catch (error) {
- this._logger.warn('Could not send join greeting:', error);
+ this._logger.warn('Could not request join greeting:', error);
}
}
@@ -1505,15 +1562,21 @@ export class BotOrchestrator {
fs.writeFileSync(filepath, buffer);
this._logger.info(`Screenshot saved: ${filepath}`);
- // Also log as base64 for Azure logs (truncated for readability)
- const base64 = buffer.toString('base64');
- this._logger.info(`SCREENSHOT_BASE64_START:${name}`);
- // Log in chunks to avoid log line limits
- const chunkSize = 50000;
- for (let i = 0; i < base64.length; i += chunkSize) {
- this._logger.info(`SCREENSHOT_CHUNK:${base64.substring(i, i + chunkSize)}`);
+ // Optional: also stream the PNG as base64 chunks into the log. Nobody
+ // parses these chunks back into images — they exist purely so that
+ // cloud deployments without disk access (e.g. Azure App Service) can
+ // recover screenshots from log search. The UI loads screenshots via
+ // the REST proxy, NOT from these log lines, so we keep this OFF by
+ // default to avoid spamming the bot log with ~200 KB blobs per shot.
+ if (config.screenshotLogBase64) {
+ const base64 = buffer.toString('base64');
+ this._logger.info(`SCREENSHOT_BASE64_START:${name}`);
+ const chunkSize = 50000;
+ for (let i = 0; i < base64.length; i += chunkSize) {
+ this._logger.info(`SCREENSHOT_CHUNK:${base64.substring(i, i + chunkSize)}`);
+ }
+ this._logger.info(`SCREENSHOT_BASE64_END:${name}`);
}
- this._logger.info(`SCREENSHOT_BASE64_END:${name}`);
} catch (error) {
this._logger.error('Error taking screenshot:', error);
}
diff --git a/src/config.ts b/src/config.ts
index 5978364..5e2bcd1 100644
--- a/src/config.ts
+++ b/src/config.ts
@@ -14,6 +14,11 @@ export const config = {
// Bot
botName: process.env.BOT_NAME || 'PowerOn AI',
botHeadless: process.env.BOT_HEADLESS !== 'false',
+ /**
+ * Replace Chromium's fake test-pattern video with a canvas stream (gradient + label).
+ * Unset in production with BOT_USE_CANVAS_VIDEO=false if you need camera off / profile tile only.
+ */
+ botUseCanvasVideo: process.env.BOT_USE_CANVAS_VIDEO !== 'false',
// Logging
logLevel: process.env.LOG_LEVEL || 'info',
@@ -22,6 +27,12 @@ export const config = {
// Screenshots
screenshotDir: process.env.SCREENSHOT_DIR || './output/screenshots',
screenshotOnError: process.env.SCREENSHOT_ON_ERROR === 'true',
+ // Stream screenshot bytes as base64 chunks into the bot log. Only useful in
+ // cloud deployments (e.g. Azure App Service) where the screenshot files on
+ // disk are not reachable. Locally the UI loads them via the REST proxy
+ // (/api/teamsbot/{instanceId}/screenshots/{file}), so this just bloats the
+ // log. Default OFF.
+ screenshotLogBase64: process.env.SCREENSHOT_LOG_BASE64 === 'true',
// Timeouts (in milliseconds)
timeouts: {