/** * Voice Controller - imperative state machine for CommCoach voice interaction. * * States: idle | listening | botSpeaking | interrupted * Muted: orthogonal boolean flag (independent of main state) * * Uses the generic useVoiceStream hook for mic capture + STT streaming. * Google Streaming STT handles silence detection natively. * STT language is loaded from central voice preferences (/api/voice/preferences). */ import { useState, useRef, useCallback, useEffect } from 'react'; import { useVoiceStream } from '../../../hooks/useSpeechAudioCapture'; import api from '../../../api'; export type VoiceState = 'idle' | 'listening' | 'botSpeaking' | 'interrupted'; export interface VoiceControllerApi { state: VoiceState; muted: boolean; liveTranscript: string; activate: () => void; deactivate: () => void; ttsPlaying: () => void; ttsPaused: () => void; ttsEnded: () => void; ttsStopped: () => void; toggleMute: () => void; } export interface VoiceControllerCallbacks { onFinalText?: (text: string) => void | Promise; onInterimText?: (text: string) => void; } const _DEFAULT_STT_LANGUAGE = 'de-DE'; export function useVoiceController(callbacks: VoiceControllerCallbacks): VoiceControllerApi { const [state, setState] = useState('idle'); const [muted, setMuted] = useState(false); const stateRef = useRef('idle'); const mutedRef = useRef(false); const cbRef = useRef(callbacks); cbRef.current = callbacks; const sttLanguageRef = useRef(_DEFAULT_STT_LANGUAGE); useEffect(() => { let cancelled = false; api.get('/api/voice/preferences').then((res) => { if (cancelled) return; const lang = res.data?.sttLanguage || res.data?.ttsLanguage; if (lang) sttLanguageRef.current = lang; }).catch(() => {}); return () => { cancelled = true; }; }, []); const _dlog = useCallback((tag: string, info?: string) => { const t = new Date(); const ts = `${t.getMinutes()}:${String(t.getSeconds()).padStart(2, '0')}.${String(t.getMilliseconds()).padStart(3, '0')}`; (window as any).__dlog?.(`[${ts}] ${tag}${info ? ' ' + info : ''}`); }, []); const _setState = useCallback((next: VoiceState) => { const prev = stateRef.current; if (prev === next) return; _dlog('VOICE', `${prev} -> ${next}`); stateRef.current = next; setState(next); }, [_dlog]); const _setMuted = useCallback((next: boolean) => { mutedRef.current = next; setMuted(next); _dlog('MUTED', String(next)); }, [_dlog]); const voiceStream = useVoiceStream({ onFinal: (text) => { cbRef.current.onFinalText?.(text); }, onInterim: (text) => { cbRef.current.onInterimText?.(text); }, onError: (err) => _dlog('VOICE-ERR', String(err)), }); const _startStream = useCallback(() => { return voiceStream.start(sttLanguageRef.current); }, [voiceStream]); const activate = useCallback(async () => { if (stateRef.current !== 'idle') return; _setState('listening'); try { await _startStream(); } catch (err) { _dlog('MIC-ERR', String(err)); _setState('idle'); } }, [_setState, _startStream, _dlog]); const deactivate = useCallback(() => { voiceStream.stop(); _setState('idle'); }, [_setState, voiceStream]); const ttsPlaying = useCallback(() => { const cur = stateRef.current; if (cur === 'idle') return; voiceStream.stop(); _setState('botSpeaking'); }, [_setState, voiceStream]); const ttsPaused = useCallback(() => { if (stateRef.current !== 'botSpeaking') return; _setState('interrupted'); _startStream().catch((err) => _dlog('MIC-ERR', String(err))); }, [_setState, _startStream, _dlog]); const ttsEnded = useCallback(() => { const cur = stateRef.current; if (cur !== 'botSpeaking' && cur !== 'interrupted') return; _setState('listening'); _startStream().catch((err) => _dlog('MIC-ERR', String(err))); }, [_setState, _startStream, _dlog]); const ttsStopped = useCallback(() => { const cur = stateRef.current; if (cur !== 'botSpeaking' && cur !== 'interrupted') return; voiceStream.stop(); if (mutedRef.current) { _setState('interrupted'); return; } _setState('listening'); _startStream().catch((err) => _dlog('MIC-ERR', String(err))); }, [_setState, _startStream, _dlog, voiceStream]); const toggleMute = useCallback(() => { const cur = stateRef.current; if (cur === 'idle') return; if (mutedRef.current) { _setMuted(false); if (cur === 'listening' || cur === 'interrupted') { _startStream().catch((err) => _dlog('MIC-ERR', String(err))); } } else { _setMuted(true); voiceStream.stop(); } }, [_setMuted, _startStream, voiceStream, _dlog]); return { state, muted, liveTranscript: voiceStream.interimText, activate, deactivate, ttsPlaying, ttsPaused, ttsEnded, ttsStopped, toggleMute, }; }