fixes

2026-04-25 01:13:30 +02:00 · 2026-04-25 01:13:30 +02:00 · 2293ba9552
commit 2293ba9552
parent 89e6d442ab
8 changed files with 1447 additions and 241 deletions
--- a/package-lock.json
+++ b/package-lock.json
@ -7,6 +7,7 @@
    "": {
      "name": "service-teams-browser-bot",
      "version": "1.0.0",
      "hasInstallScript": true,
      "dependencies": {
        "dotenv": "^16.4.1",
        "express": "^4.18.2",
--- a/src/bot/audioCaptureProcedure.ts
+++ b/src/bot/audioCaptureProcedure.ts
@ -154,9 +154,9 @@ export class AudioCaptureProcedure {
  async injectCaptureOverride(): Promise<void> {
    if (this._injected) return;
-    this._logger.info('[AudioCapture] Injecting RTCPeerConnection wrapper...');
+    this._logger.info('[AudioCapture] Injecting RTCPeerConnection wrapper (all frames)...');
-    await this._page.addInitScript((workletCode: string) => {
+    await this._page.context().addInitScript((workletCode: string) => {
      (window as any).__audioCaptureChunks = [] as any[];
      (window as any).__audioCaptureProcessors = {} as Record<string, any>;
      (window as any).__audioCaptureContexts = {} as Record<string, AudioContext>;
--- a/src/bot/audioProcedure.ts
+++ b/src/bot/audioProcedure.ts
@ -1,5 +1,6 @@
 import { Page } from 'playwright';
 import { Logger } from 'winston';
 import { poweronMediaPatchInstall } from './mediaGetUserMediaPatch';
 /**
 * Handles audio playback in the Teams meeting.
@ -11,139 +12,113 @@ import { Logger } from 'winston';
 * - When Teams calls getUserMedia, the wrapper:
 *   1. Calls the REAL getUserMedia (which returns Chromium's fake device stream)
 *   2. Replaces the audio track with one from our MediaStreamDestination
- *   3. Returns the modified stream (our audio + Chromium's fake video)
+ *   3. Returns the modified stream; optional canvas video track instead of fake video
 * - When TTS audio is played, it's piped into the MediaStreamDestination,
 *   and Teams sends it via WebRTC to other meeting participants.
 */
 export type AudioProcedureOptions = {
  useCanvasVideo?: boolean;
  /** Shown in the center of the canvas (e.g. bot display name) */
  displayLabel?: string;
 };
 export class AudioProcedure {
  private _page: Page;
  private _logger: Logger;
  private _useCanvasVideo: boolean;
  private _displayLabel: string;
  private _audioContext: boolean = false;
  private _initScriptInjected: boolean = false;
  private _audioQueue: Array<{ audioData: string; format: 'mp3' | 'wav' | 'pcm' }> = [];
  private _isPlaying: boolean = false;
  private _stopRequested: boolean = false;
-  constructor(page: Page, logger: Logger) {
+  constructor(page: Page, logger: Logger, options?: AudioProcedureOptions) {
    this._page = page;
    this._logger = logger;
    this._useCanvasVideo = !!options?.useCanvasVideo;
    this._displayLabel = (options?.displayLabel || 'Bot').trim() || 'Bot';
  }
  /**
   * Inject the getUserMedia wrapper BEFORE any page navigation.
   * This MUST be called before navigating to Teams.
-   * Uses page.addInitScript so it runs in every new document context.
+   * Uses browserContext.addInitScript so the hook runs in the main page and
   * in embedded frames (Teams often runs media/WebRTC in an iframe; page-only
   * injection would miss getUserMedia and you would only see the fake device).
   */
  async injectAudioOverride(): Promise<void> {
    if (this._initScriptInjected) {
      return;
    }
-    this._logger.info('Injecting audio getUserMedia override...');
+    this._logger.info(
      `Injecting audio getUserMedia override (canvasVideo=${this._useCanvasVideo}, label="${this._displayLabel}")...`,
    );
-    await this._page.addInitScript(() => {
+    await this._page.context().addInitScript(poweronMediaPatchInstall, {
-      // Create a shared AudioContext and MediaStreamDestination for TTS injection
+      useCanvasVideo: this._useCanvasVideo,
-      const AudioContextClass = window.AudioContext || (window as any).webkitAudioContext;
+      displayLabel: this._displayLabel,
      const ctx = new AudioContextClass();
      const streamDest = ctx.createMediaStreamDestination();
      // Store globally for later TTS injection
      (window as any).__ttsAudioContext = ctx;
      (window as any).__ttsStreamDest = streamDest;
      (window as any).__ttsAudioStream = streamDest.stream;
      // Wrap getUserMedia to replace audio tracks with our TTS-injectable stream
      const originalGetUserMedia = navigator.mediaDevices.getUserMedia.bind(navigator.mediaDevices);
      navigator.mediaDevices.getUserMedia = async (constraints?: MediaStreamConstraints) => {
        // Get the real stream (from Chromium's fake devices)
        const realStream = await originalGetUserMedia(constraints);
        if (constraints && constraints.audio) {
          // Build a new stream: our TTS audio track + their video tracks
          const combinedStream = new MediaStream();
          // Clone the TTS track so Teams can't kill the original via track.stop()
          streamDest.stream.getAudioTracks().forEach(t => combinedStream.addTrack(t.clone()));
          // Keep the real video tracks (from fake camera)
          realStream.getVideoTracks().forEach(t => combinedStream.addTrack(t));
          // Diagnostic signal for production logs: confirms override really feeds Teams.
          try {
            const audioTracks = combinedStream.getAudioTracks();
            const videoTracks = combinedStream.getVideoTracks();
            console.log(
              `[AudioPlayback] getUserMedia override active: audioTracks=${audioTracks.length}, videoTracks=${videoTracks.length}, audioLabel="${audioTracks[0]?.label || 'n/a'}"`,
            );
          } catch {
            // ignore
          }
          return combinedStream;
        }
        // No audio requested - return the real stream as-is
        return realStream;
      };
      // Force all RTCPeerConnection audio senders to use our TTS track.
      // This ensures Teams actually sends our audio even if getUserMedia
      // override happened in a different context or was renegotiated.
      (window as any).__forceTtsTrackToSenders = async () => {
        const pcs = ((window as any).__audioCapturePeerConnections || []) as RTCPeerConnection[];
        const ttsTrack = streamDest.stream.getAudioTracks()?.[0];
        if (!ttsTrack) return { replaced: 0, pcs: pcs?.length || 0, reason: 'no-tts-track' };
        // #region agent log
        const diag: Record<string, any> = {
          ttsTrackId: ttsTrack.id,
          ttsTrackEnabled: ttsTrack.enabled,
          ttsTrackReadyState: ttsTrack.readyState,
          ttsTrackMuted: ttsTrack.muted,
          beforeSenderTrackIds: [] as string[],
          afterSenderTrackIds: [] as string[],
        };
        // #endregion
        let replaced = 0;
        for (const pc of pcs) {
          try {
            const senders = pc.getSenders?.() || [];
            for (const sender of senders) {
              if (sender?.track?.kind === 'audio') {
                // #region agent log
                diag.beforeSenderTrackIds.push(sender.track.id);
                // #endregion
                const freshClone = ttsTrack.clone();
                await sender.replaceTrack(freshClone);
                replaced++;
                // #region agent log
                const afterTrack = sender.track;
                diag.afterSenderTrackIds.push(afterTrack?.id || 'null');
                diag.afterSenderTrackEnabled = afterTrack?.enabled;
                diag.afterSenderTrackReadyState = afterTrack?.readyState;
                diag.originalTrackState = ttsTrack.readyState;
                if (afterTrack && !afterTrack.enabled) {
                  afterTrack.enabled = true;
                  diag.forcedEnabled = true;
                }
                // #endregion
              }
            }
          } catch (err: any) {
            // #region agent log
            diag.error = String(err?.message || err);
            // #endregion
          }
        }
        return { replaced, pcs: pcs?.length || 0, reason: 'ok', diag };
      };
    });
    this._initScriptInjected = true;
    this._logger.info('Audio getUserMedia override injected');
  }
  /**
   * Re-run the media patch in every frame. Needed when Teams replaces the document
   * in an iframe (addInitScript runs too early) or overwrites getUserMedia.
   */
  async reinstallMediaPatchInAllFrames(): Promise<void> {
    const payload = { useCanvasVideo: this._useCanvasVideo, displayLabel: this._displayLabel };
    for (const frame of this._page.frames()) {
      try {
        await frame.evaluate(poweronMediaPatchInstall, payload);
      } catch (e) {
        this._logger.info(`[mediaPatch] frame skipped: ${e}`);
      }
    }
    await this._forceCanvasVideoInAllFrames('reinstall');
  }
  /**
   * Replace outbound video in every frame. Teams may run WebRTC in a subframe;
   * only touching the main window leaves Chromium's default fake (green) video.
   */
  private async _forceCanvasVideoInAllFrames(phase: string): Promise<void> {
    if (!this._useCanvasVideo) {
      return;
    }
    const parts: string[] = [];
    for (const frame of this._page.frames()) {
      try {
        const r = await frame.evaluate(async () => {
          const w = window as any;
          w.__startBotAvatarStream?.();
          return w.__forceVideoTrackToSenders?.();
        });
        const shortUrl = (() => {
          try {
            return frame.url().substring(0, 100);
          } catch {
            return '(no-url)';
          }
        })();
        const rr: any = r || {};
        parts.push(
          `[${shortUrl}] r=${rr.replaced ?? 0} add=${rr.added ?? 0} pcs=${rr.pcs ?? 0} `
          + `tx=${rr.totalTransceivers ?? 0} vidTx=${rr.videoTransceivers ?? 0} `
          + `vidWith=${rr.videoSendersWithTrack ?? 0} vidNoTrack=${rr.videoSendersWithoutTrack ?? 0} `
          + `dirB=[${(rr.directionsBefore || []).join(',')}] dirA=[${(rr.directionsAfter || []).join(',')}] `
          + `${rr.reason || ''}`.trim(),
        );
      } catch (e: any) {
        parts.push(`err=${String(e?.message || e).slice(0, 64)}`);
      }
    }
    this._logger.info(`Canvas video ${phase}: ${parts.join(' | ')}`);
  }
  /**
   * Initialize the audio context in the browser for TTS playback.
   * Must be called after joining the meeting (user gesture context).
@ -175,6 +150,10 @@ export class AudioProcedure {
      }
    });
    if (this._useCanvasVideo) {
      await this._forceCanvasVideoInAllFrames('init');
    }
    this._audioContext = true;
    this._logger.info('Audio context initialized');
  }
@ -279,6 +258,10 @@ export class AudioProcedure {
      );
      // #endregion
      if (this._useCanvasVideo) {
        await this._forceCanvasVideoInAllFrames('tts');
      }
      // Collect WebRTC stats BEFORE playback
      // #region agent log
      const statsBefore = await this._page.evaluate(async () => {
@ -405,12 +388,36 @@ export class AudioProcedure {
   */
  async cleanup(): Promise<void> {
    try {
-      await this._page.evaluate(() => {
+      for (const frame of this._page.frames()) {
-        const ctx = (window as any).__ttsAudioContext as AudioContext;
+        try {
-        if (ctx) {
+          await frame.evaluate(() => {
-          ctx.close();
+            const w = window as any;
            if (w.__botAvatarDrawInterval) {
              clearInterval(w.__botAvatarDrawInterval);
              w.__botAvatarDrawInterval = null;
            }
            if (w.__botAvatarVideoTrack) {
              try {
                w.__botAvatarVideoTrack.stop();
              } catch {
                // ignore
              }
              w.__botAvatarVideoTrack = null;
            }
            if (w.__botAvatarCanvas && w.__botAvatarCanvas.remove) {
              w.__botAvatarCanvas.remove();
              w.__botAvatarCanvas = null;
            }
            w.__botAvatarStreamStarted = false;
            const actx = w.__ttsAudioContext as AudioContext;
            if (actx) {
              actx.close();
            }
          });
        } catch {
          // cross-origin or closed frame
        }
-      });
+      }
    } catch {
      // Page might be closed
    }
--- a/src/bot/backgroundProcedure.ts
+++ b/src/bot/backgroundProcedure.ts
@ -19,6 +19,69 @@ export class BackgroundProcedure {
    this._logger = logger;
  }
  /**
   * Open background effects and select "no" virtual background (camera only).
   * Teams can show a flat green/gray placeholder when a background effect is
   * on even when the feed is a fake or canvas source.
   */
  async trySelectNoVirtualBackground(): Promise<boolean> {
    try {
      const opened = await this._openBackgroundEffectsPanel();
      if (!opened) {
        return false;
      }
      await this._page.waitForTimeout(500);
      const noEffectSelectors: string[] = [
        'button[aria-label*="None" i]',
        'button[aria-label*="Kein" i]',
        'button[aria-label*="ohne" i]',
        'button[aria-label*="off" i][aria-label*="background" i]',
        'button[aria-label*="Hintergrund entfernen" i]',
        '[data-tid="background-item-none"]',
        'button[role="tab"][name="None" i]',
      ];
      for (const sel of noEffectSelectors) {
        const btn = await this._page.$(sel);
        if (btn) {
          await btn.click();
          this._logger.info(`Selected no background effect: ${sel}`);
          await this._page.waitForTimeout(500);
          await this._dismissPanelIfOpen();
          return true;
        }
      }
      // First gallery tile (often "none" or blur off) in many Teams builds
      const tile = await this._page.$(
        '[data-tid="background-image"], [class*="background-item"], li[role="listitem"] button',
      );
      if (tile) {
        await tile.click();
        this._logger.info('Clicked first background effects tile (often no effect)');
        await this._page.waitForTimeout(400);
        await this._dismissPanelIfOpen();
        return true;
      }
      await this._dismissPanelIfOpen();
      this._logger.warn('Could not find "no background" control');
      return false;
    } catch (e) {
      this._logger.warn(`trySelectNoVirtualBackground: ${e}`);
      return false;
    }
  }
  private async _dismissPanelIfOpen(): Promise<void> {
    try {
      await this._page.keyboard.press('Escape');
      await this._page.waitForTimeout(200);
    } catch {
      // ignore
    }
  }
  /**
   * Set a virtual background from a URL on the Teams pre-join screen.
   * 
--- a/src/bot/chatProcedure.ts
+++ b/src/bot/chatProcedure.ts
--- a/src/bot/mediaGetUserMediaPatch.ts
+++ b/src/bot/mediaGetUserMediaPatch.ts
@ -0,0 +1,385 @@
 /**
 * Injected in the browser: wraps getUserMedia, TTS destination, optional canvas
 * video. Must be a single self-contained function for Playwright serialization.
 * Re-calling this on the same document re-patches gUM and reuses the saved
 * Chromium getUserMedia + AudioContext when present (Teams can replace
 * navigator.mediaDevices.getUserMedia after a document/iframe refresh).
 */
 export type MediaGetUserMediaPatchOptions = {
  useCanvasVideo: boolean;
  displayLabel: string;
 };
 export const poweronMediaPatchInstall = (opts: MediaGetUserMediaPatchOptions) => {
  'use strict';
  const { useCanvasVideo, displayLabel } = opts;
  const w: any = window as any;
  if (!w.__gumChromium) {
    w.__gumChromium = (navigator.mediaDevices as any).getUserMedia.bind(navigator.mediaDevices);
  }
  // Patch RTCPeerConnection.prototype methods once per realm to observe + react to Teams' track placement.
  if (!w.__poweronRtcPatched && (window as any).RTCPeerConnection) {
    w.__poweronRtcPatched = true;
    const RTCProto: any = (window as any).RTCPeerConnection.prototype;
    const _origAddTrack = RTCProto.addTrack;
    const _origAddTransceiver = RTCProto.addTransceiver;
    RTCProto.addTrack = function (track: MediaStreamTrack, ...streams: MediaStream[]) {
      try {
        // eslint-disable-next-line no-console
        console.log(
          '[AudioPlayback] pc.addTrack kind=' + (track && track.kind)
          + ' id=' + (track && track.id)
          + ' enabled=' + (track && track.enabled),
        );
      } catch {
        // ignore
      }
      let useTrack: MediaStreamTrack = track;
      try {
        if (useCanvasVideo && track && track.kind === 'video') {
          if (typeof w.__startBotAvatarStream === 'function') {
            w.__startBotAvatarStream();
          }
          const av: MediaStreamTrack | undefined = w.__botAvatarVideoTrack;
          if (av && av.readyState === 'live') {
            try {
              track.stop();
            } catch {
              // ignore
            }
            useTrack = av.clone();
            // eslint-disable-next-line no-console
            console.log('[AudioPlayback] pc.addTrack swapped video -> avatar id=' + useTrack.id);
          }
        }
      } catch {
        // ignore
      }
      return _origAddTrack.call(this, useTrack, ...streams);
    };
    RTCProto.addTransceiver = function (trackOrKind: any, init?: any) {
      try {
        const k = typeof trackOrKind === 'string' ? trackOrKind : trackOrKind?.kind;
        // eslint-disable-next-line no-console
        console.log(
          '[AudioPlayback] pc.addTransceiver kind=' + k
          + ' direction=' + (init && init.direction),
        );
      } catch {
        // ignore
      }
      return _origAddTransceiver.call(this, trackOrKind, init);
    };
  }
  if (!w.__ttsStreamDest) {
    const AudioContextClass = (window as any).AudioContext || (window as any).webkitAudioContext;
    const ctx: AudioContext = new AudioContextClass();
    const streamDest: MediaStreamAudioDestinationNode = ctx.createMediaStreamDestination();
    w.__ttsAudioContext = ctx;
    w.__ttsStreamDest = streamDest;
    w.__ttsAudioStream = streamDest.stream;
  }
  const streamDest = w.__ttsStreamDest as MediaStreamAudioDestinationNode;
  if (!streamDest) {
    return;
  }
  const _fps = 15;
  w.__startBotAvatarStream = () => {
    if (
      w.__botAvatarStreamStarted
      && w.__botAvatarVideoTrack
      && w.__botAvatarVideoTrack.readyState === 'live'
      && w.__botAvatarCanvas
      && w.__botAvatarCanvas.isConnected
    ) {
      return;
    }
    if (w.__botAvatarDrawInterval) {
      clearInterval(w.__botAvatarDrawInterval);
      w.__botAvatarDrawInterval = null;
    }
    try {
      w.__botAvatarVideoTrack?.stop?.();
    } catch {
      // ignore
    }
    w.__botAvatarStreamStarted = true;
    w.__botAvatarDisplayLabel = displayLabel;
    const canvas = document.createElement('canvas');
    canvas.width = 640;
    canvas.height = 360;
    canvas.setAttribute('data-poweron-avatar', '1');
    canvas.style.cssText =
      'position:fixed;right:0;bottom:0;width:4px;height:4px;z-index:2147483646;opacity:1;pointer-events:none;';
    (document.body || document.documentElement).appendChild(canvas);
    w.__botAvatarCanvas = canvas;
    const c2d = canvas.getContext('2d');
    let t = 0;
    const draw = () => {
      if (!c2d) {
        return;
      }
      t += 0.04;
      const wPx = canvas.width;
      const hPx = canvas.height;
      c2d.fillStyle = '#061525';
      c2d.fillRect(0, 0, wPx, hPx);
      const g = c2d.createLinearGradient(0, 0, wPx, hPx);
      g.addColorStop(0, '#1a4f8c');
      g.addColorStop(0.5, '#0c305a');
      g.addColorStop(1, '#132e6e');
      c2d.fillStyle = g;
      c2d.fillRect(0, 0, wPx, hPx);
      c2d.strokeStyle = 'rgba(255, 200, 80, 0.95)';
      c2d.lineWidth = 3;
      c2d.strokeRect(6, 6, wPx - 12, hPx - 12);
      c2d.fillStyle = 'rgba(255, 220, 120, 0.95)';
      c2d.font = '600 13px system-ui, "Segoe UI", sans-serif';
      c2d.textAlign = 'left';
      c2d.textBaseline = 'top';
      c2d.fillText('PORTA', 14, 10);
      c2d.textAlign = 'center';
      c2d.textBaseline = 'middle';
      c2d.fillStyle = '#ffffff';
      c2d.font = 'bold 28px system-ui, "Segoe UI", sans-serif';
      const line = (w.__botAvatarDisplayLabel || displayLabel).toString().slice(0, 72);
      c2d.fillText(line, wPx / 2, hPx / 2 - 6);
      c2d.fillStyle = 'rgba(255,255,255,0.78)';
      c2d.font = '14px system-ui, "Segoe UI", sans-serif';
      c2d.fillText('poweron', wPx / 2, hPx / 2 + 26);
      const pulse = 0.75 + 0.25 * Math.sin(t);
      c2d.fillStyle = 'rgba(120, 200, 255, ' + 0.15 * pulse + ')';
      c2d.fillRect(0, 0, wPx, 6);
      c2d.fillRect(0, hPx - 6, wPx, 6);
    };
    draw();
    w.__botAvatarDrawInterval = window.setInterval(draw, 1000 / _fps);
    const cap = canvas.captureStream(_fps);
    w.__botAvatarVideoTrack = cap.getVideoTracks()[0];
    if (w.__botAvatarVideoTrack) {
      w.__botAvatarVideoTrack.enabled = true;
      try {
        w.__botAvatarVideoTrack.contentHint = 'motion';
      } catch {
        // ignore
      }
    }
    // eslint-disable-next-line no-console
    console.log(
      '[AudioPlayback] canvas avatar stream (re)built, videoTrack=',
      w.__botAvatarVideoTrack ? w.__botAvatarVideoTrack.id : 'none',
    );
  };
  w.__forceVideoTrackToSenders = async () => {
    if (!useCanvasVideo) {
      return { replaced: 0, pcs: 0, reason: 'canvas-video-off' };
    }
    w.__startBotAvatarStream();
    const src: MediaStreamTrack | undefined = w.__botAvatarVideoTrack;
    if (!src) {
      return { replaced: 0, pcs: 0, reason: 'no-avatar-track' };
    }
    const pcs: RTCPeerConnection[] = (w.__audioCapturePeerConnections || []) as RTCPeerConnection[];
    let replaced = 0;
    let added = 0;
    let videoTransceivers = 0;
    let videoSendersWithTrack = 0;
    let videoSendersWithoutTrack = 0;
    let totalTransceivers = 0;
    const directionsBefore: string[] = [];
    const directionsAfter: string[] = [];
    for (const pc of pcs) {
      const transceivers = (pc as any).getTransceivers?.() || [];
      totalTransceivers += transceivers.length;
      let pcHasVideoSender = false;
      for (const t of transceivers) {
        const sender = t.sender;
        if (!sender) {
          continue;
        }
        const senderKind = sender.track?.kind;
        const receiverKind = t.receiver?.track?.kind;
        const txKind = (t as any).kind || senderKind || receiverKind || null;
        if (txKind !== 'video') {
          continue;
        }
        videoTransceivers++;
        pcHasVideoSender = true;
        directionsBefore.push(t.direction);
        if (sender.track) {
          videoSendersWithTrack++;
        } else {
          videoSendersWithoutTrack++;
        }
        try {
          // eslint-disable-next-line no-await-in-loop
          await sender.replaceTrack(src.clone());
          replaced++;
          const tr = sender.track;
          if (tr && !tr.enabled) {
            tr.enabled = true;
          }
          if (t.direction === 'inactive' || t.direction === 'recvonly') {
            try {
              t.direction = 'sendrecv';
            } catch {
              // ignore
            }
          }
          directionsAfter.push(t.direction);
        } catch (err: any) {
          directionsAfter.push('err:' + String(err && err.message ? err.message : err).slice(0, 32));
        }
      }
      if (!pcHasVideoSender) {
        try {
          const newSender = (pc as any).addTrack(src.clone(), w.__botAvatarCanvas?.captureStream
            ? w.__botAvatarCanvas.captureStream(15)
            : new MediaStream([src.clone()]));
          if (newSender) {
            added++;
          }
        } catch (err) {
          directionsAfter.push('addTrack-err:' + String((err as any)?.message || err).slice(0, 32));
        }
      }
    }
    return {
      replaced,
      added,
      pcs: pcs.length,
      reason: 'ok',
      videoTransceivers,
      videoSendersWithTrack,
      videoSendersWithoutTrack,
      totalTransceivers,
      directionsBefore,
      directionsAfter,
    };
  };
  const _wrappedGUM = async (constraints?: MediaStreamConstraints) => {
    // eslint-disable-next-line no-console
    console.log(
      '[AudioPlayback] gUM call audio=' + !!(constraints && constraints.audio)
      + ' video=' + !!(constraints && constraints.video),
    );
    // eslint-disable-next-line no-restricted-globals
    const realStream = await w.__gumChromium(constraints);
    const wantAudio = !!(constraints && constraints.audio);
    const wantVideo = !!(constraints && constraints.video);
    if (useCanvasVideo && wantVideo) {
      w.__startBotAvatarStream();
      const vt: MediaStreamTrack | undefined = w.__botAvatarVideoTrack;
      if (!vt) {
        return realStream;
      }
      const vClone = vt.clone();
      if (wantAudio) {
        const combinedStream = new MediaStream();
        streamDest.stream.getAudioTracks().forEach((t: MediaStreamTrack) => combinedStream.addTrack(t.clone()));
        combinedStream.addTrack(vClone);
        try {
          realStream.getTracks().forEach(t => t.stop());
        } catch {
          // ignore
        }
        // eslint-disable-next-line no-console
        console.log(
          '[AudioPlayback] getUserMedia (canvas+tts): a=' + combinedStream.getAudioTracks().length
          + ' v=' + combinedStream.getVideoTracks().length,
        );
        return combinedStream;
      }
      const videoOnly = new MediaStream();
      videoOnly.addTrack(vClone);
      try {
        realStream.getTracks().forEach(t => t.stop());
      } catch {
        // ignore
      }
      return videoOnly;
    }
    if (wantAudio) {
      const combinedStream = new MediaStream();
      streamDest.stream.getAudioTracks().forEach((t: MediaStreamTrack) => combinedStream.addTrack(t.clone()));
      realStream.getVideoTracks().forEach(t => combinedStream.addTrack(t));
      // eslint-disable-next-line no-console
      console.log(
        '[AudioPlayback] gUM audio: a=' + combinedStream.getAudioTracks().length
        + ' v=' + combinedStream.getVideoTracks().length,
      );
      return combinedStream;
    }
    return realStream;
  };
  try {
    Object.defineProperty(navigator.mediaDevices, 'getUserMedia', {
      configurable: true,
      enumerable: true,
      writable: true,
      value: _wrappedGUM,
    });
  } catch {
    (navigator.mediaDevices as any).getUserMedia = _wrappedGUM;
  }
  // Some libraries cache navigator.getUserMedia (legacy)
  try {
    (navigator as any).getUserMedia = (constraints: MediaStreamConstraints, ok: any, err: any) => {
      _wrappedGUM(constraints).then(ok, err);
    };
  } catch {
    // ignore
  }
  w.__forceTtsTrackToSenders = async () => {
    const pcs: RTCPeerConnection[] = (w.__audioCapturePeerConnections || []) as RTCPeerConnection[];
    const ttsTrack = streamDest.stream.getAudioTracks()?.[0];
    if (!ttsTrack) {
      return { replaced: 0, pcs: pcs?.length || 0, reason: 'no-tts-track' };
    }
    const diag: Record<string, any> = {
      ttsTrackId: ttsTrack.id,
      ttsTrackEnabled: ttsTrack.enabled,
      ttsTrackReadyState: ttsTrack.readyState,
      ttsTrackMuted: ttsTrack.muted,
      beforeSenderTrackIds: [] as string[],
      afterSenderTrackIds: [] as string[],
    };
    let replaced = 0;
    for (const pc of pcs) {
      try {
        const senders = pc.getSenders?.() || [];
        for (const sender of senders) {
          if (sender?.track?.kind === 'audio') {
            diag.beforeSenderTrackIds.push(sender.track.id);
            const freshClone = ttsTrack.clone();
            // eslint-disable-next-line no-await-in-loop
            await sender.replaceTrack(freshClone);
            replaced++;
            const afterTrack = sender.track;
            diag.afterSenderTrackIds.push(afterTrack ? afterTrack.id : 'null');
            diag.afterSenderTrackEnabled = afterTrack ? afterTrack.enabled : undefined;
            diag.afterSenderTrackReadyState = afterTrack ? afterTrack.readyState : undefined;
            diag.originalTrackState = ttsTrack.readyState;
            if (afterTrack && !afterTrack.enabled) {
              afterTrack.enabled = true;
              diag.forcedEnabled = true;
            }
          }
        }
      } catch (err: any) {
        diag.error = String(err && err.message ? err.message : err);
      }
    }
    return { replaced, pcs: pcs?.length || 0, reason: 'ok', diag };
  };
 };
--- a/src/bot/orchestrator.ts
+++ b/src/bot/orchestrator.ts
@ -15,10 +15,11 @@ import { AudioCaptureProcedure } from './audioCaptureProcedure';
 import { ChatProcedure, ChatMessageEntry } from './chatProcedure';
 import { AuthProcedure, MfaChallenge } from './authProcedure';
 import { TeamsActionsService } from './teamsActionsService';
 import { BackgroundProcedure } from './backgroundProcedure';
 import { isValidMeetingUrl, getMeetingLaunchUrl, resolveLaunchUrl } from './meetingUrlParser';
-// Camera / fake video injection is disabled for now to focus on stability.
+// Optional: canvas "avatar" video (config.botUseCanvasVideo) replaces the Chromium
-// The Y4M fake video file was causing browser crashes when audio started flowing.
+// fake test pattern when the camera is on. Y4M file injection remains disabled.
 export interface OrchestratorCallbacks {
  onStateChange: (state: BotState, message?: string) => void;
@ -76,6 +77,11 @@ export class BotOrchestrator {
  private _chatQueueProcessing: boolean = false;
  private _mfaResolver: ((response: { action: string; code?: string }) => void) | null = null;
  /** Debounce Teams iframe navigations (media runs in a child frame) */
  private _frameNavMediaRebindTimer: ReturnType<typeof setTimeout> | null = null;
  /** Re-apply gUM + video senders for a few seconds after join */
  private _canvasRebindTimer: ReturnType<typeof setInterval> | null = null;
  constructor(
    sessionId: string,
    meetingUrl: string,
@ -205,6 +211,11 @@ export class BotOrchestrator {
    // Ensure microphone is ON (required for voice playback)
    await this._ensureMicOn();
    if (config.botUseCanvasVideo) {
      await this._ensureCameraOn();
      const bg = new BackgroundProcedure(this._page!, this._logger);
      void bg.trySelectNoVirtualBackground();
    }
    // STEP 2: Enter bot name and click "Join now"
    await this._takeScreenshot('anon-step2-before-join', this._isDebugMode);
@ -234,6 +245,10 @@ export class BotOrchestrator {
    // Initialize audio playback
    await this._audioProcedure!.initialize();
    if (config.botUseCanvasVideo) {
      await this._ensureCameraOnInMeeting();
      this._startCanvasRebindAfterJoin();
    }
    // Enable transcript capture (captions or audio based on transferMode)
    await this._enableTranscriptCapture();
@ -414,6 +429,11 @@ export class BotOrchestrator {
    // Ensure microphone is ON before joining (required for voice playback)
    await this._ensureMicOn();
    if (config.botUseCanvasVideo) {
      await this._ensureCameraOn();
      const bg = new BackgroundProcedure(this._page!, this._logger);
      void bg.trySelectNoVirtualBackground();
    }
    // STEP 5: Poll for "Join now" on the pre-join screen
    await this._takeScreenshot('step5-before-join-now', this._isDebugMode);
@ -436,11 +456,37 @@ export class BotOrchestrator {
    this._startKeepAlive();
    await this._audioProcedure!.initialize();
    if (config.botUseCanvasVideo) {
      await this._ensureCameraOnInMeeting();
      this._startCanvasRebindAfterJoin();
    }
    await this._enableTranscriptCapture();
    await this._enableChat();
    await this._sendJoinGreeting();
  }
  private _startCanvasRebindAfterJoin(): void {
    this._stopCanvasRebindAfterJoin();
    if (!config.botUseCanvasVideo || !this._audioProcedure) {
      return;
    }
    let n = 0;
    this._canvasRebindTimer = setInterval(() => {
      n += 1;
      void this._audioProcedure?.reinstallMediaPatchInAllFrames();
      if (n >= 35) {
        this._stopCanvasRebindAfterJoin();
      }
    }, 400);
  }
  private _stopCanvasRebindAfterJoin(): void {
    if (this._canvasRebindTimer) {
      clearInterval(this._canvasRebindTimer);
      this._canvasRebindTimer = null;
    }
  }
  /**
   * Ensure the camera is turned on in the pre-join screen.
   * When camera is on, Teams shows the profile/background image.
@ -888,6 +934,12 @@ export class BotOrchestrator {
    this._isShuttingDown = true;
    this._logger.info('Stopping bot...');
    if (this._frameNavMediaRebindTimer) {
      clearTimeout(this._frameNavMediaRebindTimer);
      this._frameNavMediaRebindTimer = null;
    }
    this._stopCanvasRebindAfterJoin();
    // Stop keepalive first
    this._stopKeepAlive();
@ -1077,7 +1129,10 @@ export class BotOrchestrator {
      },
      this._options.language
    );
-    this._audioProcedure = new AudioProcedure(this._page, this._logger);
+    this._audioProcedure = new AudioProcedure(this._page, this._logger, {
      useCanvasVideo: config.botUseCanvasVideo,
      displayLabel: this._botName,
    });
    this._teamsActions = new TeamsActionsService(this._page, this._logger);
    this._chatProcedure = new ChatProcedure(
      this._page,
@ -1100,6 +1155,19 @@ export class BotOrchestrator {
    // Aggressive hybrid mode: always capture meeting audio as transcript source.
    await this._audioCaptureProcedure!.injectCaptureOverride();
    this._page.on('framenavigated', () => {
      if (!config.botUseCanvasVideo || !this._audioProcedure) {
        return;
      }
      if (this._frameNavMediaRebindTimer) {
        clearTimeout(this._frameNavMediaRebindTimer);
      }
      this._frameNavMediaRebindTimer = setTimeout(() => {
        this._frameNavMediaRebindTimer = null;
        void this._audioProcedure?.reinstallMediaPatchInAllFrames();
      }, 600);
    });
    // Handle page errors
    this._page.on('pageerror', (error) => {
      this._logger.error('Page error:', error);
@ -1134,6 +1202,7 @@ export class BotOrchestrator {
   * Close the browser.
   */
  private async _closeBrowser(): Promise<void> {
    this._stopCanvasRebindAfterJoin();
    try {
      if (this._page) {
        await this._page.close();
@ -1282,43 +1351,31 @@ export class BotOrchestrator {
  }
  /**
-   * Send a greeting message in the meeting chat AND via voice after joining.
+   * Signal "bot has joined the meeting" to the Gateway. The Gateway owns
-   * Uses the bot's display name and the configured language.
+   * greeting generation: it produces a localised greeting via the AI
-   * Voice greeting confirms that the audio pipeline (TTS -> mic) is working.
+   * service in the configured language + persona, then dispatches it back
   * to this bot via the regular `sendChatMessage` command (chat) and the
   * `playAudio` pipeline (voice). NO hardcoded greeting strings or
   * language branches live in the bot — the bot is purely a transport.
   *
   * We still wait briefly so the chat panel + input have settled in the
   * Teams DOM before the Gateway-driven `sendChatMessage` arrives.
   */
  private async _sendJoinGreeting(): Promise<void> {
    try {
-      const firstName = this._botName.split(' ')[0] || this._botName;
+      this._logger.info('Requesting join greeting from Gateway');
      const lang = (this._options.language || 'de-DE').toLowerCase();
      let greeting: string;
      if (lang.startsWith('de')) {
        greeting = `Hallo, hier ist ${firstName}. Ich bin bereit.`;
      } else if (lang.startsWith('fr')) {
        greeting = `Bonjour, c'est ${firstName}. Je suis prête.`;
      } else if (lang.startsWith('it')) {
        greeting = `Ciao, sono ${firstName}. Sono pronta.`;
      } else {
        greeting = `Hello, this is ${firstName}. I'm ready.`;
      }
      this._logger.info(`Sending join greeting (chat + voice): ${greeting}`);
      // Brief delay so chat input is ready after panel open (Teams DOM can lag)
      await new Promise((r) => setTimeout(r, 800));
      // Chat greeting (queued; retries if input not found)
      await this.sendChatMessageToMeeting(greeting);
      // Voice greeting — ask Gateway to generate TTS and send back playAudio
      this._sendToGateway({
-        type: 'voiceGreeting',
+        type: 'requestGreeting',
        sessionId: this._sessionId,
-        text: greeting,
+        // Hint the Gateway about display name + language; Gateway already
-        language: this._options.language || 'de-DE',
+        // has the canonical config but passing them here keeps the contract
        // self-contained and avoids a DB lookup just for greeting text.
        botName: this._botName,
        language: this._options.language || '',
      });
    } catch (error) {
-      this._logger.warn('Could not send join greeting:', error);
+      this._logger.warn('Could not request join greeting:', error);
    }
  }
@ -1505,15 +1562,21 @@ export class BotOrchestrator {
      fs.writeFileSync(filepath, buffer);
      this._logger.info(`Screenshot saved: ${filepath}`);
-      // Also log as base64 for Azure logs (truncated for readability)
+      // Optional: also stream the PNG as base64 chunks into the log. Nobody
-      const base64 = buffer.toString('base64');
+      // parses these chunks back into images — they exist purely so that
-      this._logger.info(`SCREENSHOT_BASE64_START:${name}`);
+      // cloud deployments without disk access (e.g. Azure App Service) can
-      // Log in chunks to avoid log line limits
+      // recover screenshots from log search. The UI loads screenshots via
-      const chunkSize = 50000;
+      // the REST proxy, NOT from these log lines, so we keep this OFF by
-      for (let i = 0; i < base64.length; i += chunkSize) {
+      // default to avoid spamming the bot log with ~200 KB blobs per shot.
-        this._logger.info(`SCREENSHOT_CHUNK:${base64.substring(i, i + chunkSize)}`);
+      if (config.screenshotLogBase64) {
        const base64 = buffer.toString('base64');
        this._logger.info(`SCREENSHOT_BASE64_START:${name}`);
        const chunkSize = 50000;
        for (let i = 0; i < base64.length; i += chunkSize) {
          this._logger.info(`SCREENSHOT_CHUNK:${base64.substring(i, i + chunkSize)}`);
        }
        this._logger.info(`SCREENSHOT_BASE64_END:${name}`);
      }
      this._logger.info(`SCREENSHOT_BASE64_END:${name}`);
    } catch (error) {
      this._logger.error('Error taking screenshot:', error);
    }
--- a/src/config.ts
+++ b/src/config.ts
@ -14,6 +14,11 @@ export const config = {
  // Bot
  botName: process.env.BOT_NAME || 'PowerOn AI',
  botHeadless: process.env.BOT_HEADLESS !== 'false',
  /**
   * Replace Chromium's fake test-pattern video with a canvas stream (gradient + label).
   * Unset in production with BOT_USE_CANVAS_VIDEO=false if you need camera off / profile tile only.
   */
  botUseCanvasVideo: process.env.BOT_USE_CANVAS_VIDEO !== 'false',
  // Logging
  logLevel: process.env.LOG_LEVEL || 'info',
@ -22,6 +27,12 @@ export const config = {
  // Screenshots
  screenshotDir: process.env.SCREENSHOT_DIR || './output/screenshots',
  screenshotOnError: process.env.SCREENSHOT_ON_ERROR === 'true',
  // Stream screenshot bytes as base64 chunks into the bot log. Only useful in
  // cloud deployments (e.g. Azure App Service) where the screenshot files on
  // disk are not reachable. Locally the UI loads them via the REST proxy
  // (/api/teamsbot/{instanceId}/screenshots/{file}), so this just bloats the
  // log. Default OFF.
  screenshotLogBase64: process.env.SCREENSHOT_LOG_BASE64 === 'true',
  // Timeouts (in milliseconds)
  timeouts: {