frontend_nyla/src/pages/views/commcoach/useVoiceController.ts

/**
 * Voice Controller - imperative state machine for CommCoach voice interaction.
 *
 * States: idle | listening | botSpeaking | interrupted
 * Muted: orthogonal boolean flag (independent of main state)
 *
 * Uses the generic useVoiceStream hook for mic capture + STT streaming.
 * Google Streaming STT handles silence detection natively.
 * STT language is loaded from central voice preferences (/api/voice/preferences).
 */

import { useState, useRef, useCallback, useEffect } from 'react';
import { useVoiceStream } from '../../../hooks/useSpeechAudioCapture';
import api from '../../../api';

export type VoiceState = 'idle' | 'listening' | 'botSpeaking' | 'interrupted';

export interface VoiceControllerApi {
  state: VoiceState;
  muted: boolean;
  liveTranscript: string;
  activate: () => void;
  deactivate: () => void;
  ttsPlaying: () => void;
  ttsPaused: () => void;
  ttsEnded: () => void;
  ttsStopped: () => void;
  toggleMute: () => void;
}

export interface VoiceControllerCallbacks {
  onFinalText?: (text: string) => void | Promise<void>;
  onInterimText?: (text: string) => void;
}

const _DEFAULT_STT_LANGUAGE = 'de-DE';

export function useVoiceController(callbacks: VoiceControllerCallbacks): VoiceControllerApi {
  const [state, setState] = useState<VoiceState>('idle');
  const [muted, setMuted] = useState(false);
  const stateRef = useRef<VoiceState>('idle');
  const mutedRef = useRef(false);
  const cbRef = useRef(callbacks);
  cbRef.current = callbacks;

  const sttLanguageRef = useRef<string>(_DEFAULT_STT_LANGUAGE);

  useEffect(() => {
    let cancelled = false;
    api.get('/api/voice/preferences').then((res) => {
      if (cancelled) return;
      const lang = res.data?.sttLanguage || res.data?.ttsLanguage;
      if (lang) sttLanguageRef.current = lang;
    }).catch(() => {});
    return () => { cancelled = true; };
  }, []);

  const _dlog = useCallback((tag: string, info?: string) => {
    const t = new Date();
    const ts = `${t.getMinutes()}:${String(t.getSeconds()).padStart(2, '0')}.${String(t.getMilliseconds()).padStart(3, '0')}`;
    (window as any).__dlog?.(`[${ts}] ${tag}${info ? ' ' + info : ''}`);
  }, []);

  const _setState = useCallback((next: VoiceState) => {
    const prev = stateRef.current;
    if (prev === next) return;
    _dlog('VOICE', `${prev} -> ${next}`);
    stateRef.current = next;
    setState(next);
  }, [_dlog]);

  const _setMuted = useCallback((next: boolean) => {
    mutedRef.current = next;
    setMuted(next);
    _dlog('MUTED', String(next));
  }, [_dlog]);

  const voiceStream = useVoiceStream({
    onFinal: (text) => {
      cbRef.current.onFinalText?.(text);
    },
    onInterim: (text) => {
      cbRef.current.onInterimText?.(text);
    },
    onError: (err) => _dlog('VOICE-ERR', String(err)),
  });

  const _startStream = useCallback(() => {
    return voiceStream.start(sttLanguageRef.current);
  }, [voiceStream]);

  const activate = useCallback(async () => {
    if (stateRef.current !== 'idle') return;
    _setState('listening');
    try {
      await _startStream();
    } catch (err) {
      _dlog('MIC-ERR', String(err));
      _setState('idle');
    }
  }, [_setState, _startStream, _dlog]);

  const deactivate = useCallback(() => {
    voiceStream.stop();
    _setState('idle');
  }, [_setState, voiceStream]);

  const ttsPlaying = useCallback(() => {
    const cur = stateRef.current;
    if (cur === 'idle') return;
    voiceStream.stop();
    _setState('botSpeaking');
  }, [_setState, voiceStream]);

  const ttsPaused = useCallback(() => {
    if (stateRef.current !== 'botSpeaking') return;
    _setState('interrupted');
    _startStream().catch((err) => _dlog('MIC-ERR', String(err)));
  }, [_setState, _startStream, _dlog]);

  const ttsEnded = useCallback(() => {
    const cur = stateRef.current;
    if (cur !== 'botSpeaking' && cur !== 'interrupted') return;
    _setState('listening');
    _startStream().catch((err) => _dlog('MIC-ERR', String(err)));
  }, [_setState, _startStream, _dlog]);

  const ttsStopped = useCallback(() => {
    const cur = stateRef.current;
    if (cur !== 'botSpeaking' && cur !== 'interrupted') return;
    voiceStream.stop();
    if (mutedRef.current) {
      _setState('interrupted');
      return;
    }
    _setState('listening');
    _startStream().catch((err) => _dlog('MIC-ERR', String(err)));
  }, [_setState, _startStream, _dlog, voiceStream]);

  const toggleMute = useCallback(() => {
    const cur = stateRef.current;
    if (cur === 'idle') return;
    if (mutedRef.current) {
      _setMuted(false);
      if (cur === 'listening' || cur === 'interrupted') {
        _startStream().catch((err) => _dlog('MIC-ERR', String(err)));
      }
    } else {
      _setMuted(true);
      voiceStream.stop();
    }
  }, [_setMuted, _startStream, voiceStream, _dlog]);

  return {
    state,
    muted,
    liveTranscript: voiceStream.interimText,
    activate,
    deactivate,
    ttsPlaying,
    ttsPaused,
    ttsEnded,
    ttsStopped,
    toggleMute,
  };
}