frontend_nyla/src/pages/views/commcoach/useVoiceController.ts

166 lines
4.9 KiB
TypeScript

/**
* Voice Controller - imperative state machine for CommCoach voice interaction.
*
* States: idle | listening | botSpeaking | interrupted
* Muted: orthogonal boolean flag (independent of main state)
*
* Uses the generic useVoiceStream hook for mic capture + STT streaming.
* Google Streaming STT handles silence detection natively.
* STT language is loaded from central voice preferences (/api/voice/preferences).
*/
import { useState, useRef, useCallback, useEffect } from 'react';
import { useVoiceStream } from '../../../hooks/useSpeechAudioCapture';
import api from '../../../api';
export type VoiceState = 'idle' | 'listening' | 'botSpeaking' | 'interrupted';
export interface VoiceControllerApi {
state: VoiceState;
muted: boolean;
liveTranscript: string;
activate: () => void;
deactivate: () => void;
ttsPlaying: () => void;
ttsPaused: () => void;
ttsEnded: () => void;
ttsStopped: () => void;
toggleMute: () => void;
}
export interface VoiceControllerCallbacks {
onFinalText?: (text: string) => void | Promise<void>;
onInterimText?: (text: string) => void;
}
const _DEFAULT_STT_LANGUAGE = 'de-DE';
export function useVoiceController(callbacks: VoiceControllerCallbacks): VoiceControllerApi {
const [state, setState] = useState<VoiceState>('idle');
const [muted, setMuted] = useState(false);
const stateRef = useRef<VoiceState>('idle');
const mutedRef = useRef(false);
const cbRef = useRef(callbacks);
cbRef.current = callbacks;
const sttLanguageRef = useRef<string>(_DEFAULT_STT_LANGUAGE);
useEffect(() => {
let cancelled = false;
api.get('/api/voice/preferences').then((res) => {
if (cancelled) return;
const lang = res.data?.sttLanguage || res.data?.ttsLanguage;
if (lang) sttLanguageRef.current = lang;
}).catch(() => {});
return () => { cancelled = true; };
}, []);
const _dlog = useCallback((tag: string, info?: string) => {
const t = new Date();
const ts = `${t.getMinutes()}:${String(t.getSeconds()).padStart(2, '0')}.${String(t.getMilliseconds()).padStart(3, '0')}`;
(window as any).__dlog?.(`[${ts}] ${tag}${info ? ' ' + info : ''}`);
}, []);
const _setState = useCallback((next: VoiceState) => {
const prev = stateRef.current;
if (prev === next) return;
_dlog('VOICE', `${prev} -> ${next}`);
stateRef.current = next;
setState(next);
}, [_dlog]);
const _setMuted = useCallback((next: boolean) => {
mutedRef.current = next;
setMuted(next);
_dlog('MUTED', String(next));
}, [_dlog]);
const voiceStream = useVoiceStream({
onFinal: (text) => {
cbRef.current.onFinalText?.(text);
},
onInterim: (text) => {
cbRef.current.onInterimText?.(text);
},
onError: (err) => _dlog('VOICE-ERR', String(err)),
});
const _startStream = useCallback(() => {
return voiceStream.start(sttLanguageRef.current);
}, [voiceStream]);
const activate = useCallback(async () => {
if (stateRef.current !== 'idle') return;
_setState('listening');
try {
await _startStream();
} catch (err) {
_dlog('MIC-ERR', String(err));
_setState('idle');
}
}, [_setState, _startStream, _dlog]);
const deactivate = useCallback(() => {
voiceStream.stop();
_setState('idle');
}, [_setState, voiceStream]);
const ttsPlaying = useCallback(() => {
const cur = stateRef.current;
if (cur === 'idle') return;
voiceStream.stop();
_setState('botSpeaking');
}, [_setState, voiceStream]);
const ttsPaused = useCallback(() => {
if (stateRef.current !== 'botSpeaking') return;
_setState('interrupted');
_startStream().catch((err) => _dlog('MIC-ERR', String(err)));
}, [_setState, _startStream, _dlog]);
const ttsEnded = useCallback(() => {
const cur = stateRef.current;
if (cur !== 'botSpeaking' && cur !== 'interrupted') return;
_setState('listening');
_startStream().catch((err) => _dlog('MIC-ERR', String(err)));
}, [_setState, _startStream, _dlog]);
const ttsStopped = useCallback(() => {
const cur = stateRef.current;
if (cur !== 'botSpeaking' && cur !== 'interrupted') return;
voiceStream.stop();
if (mutedRef.current) {
_setState('interrupted');
return;
}
_setState('listening');
_startStream().catch((err) => _dlog('MIC-ERR', String(err)));
}, [_setState, _startStream, _dlog, voiceStream]);
const toggleMute = useCallback(() => {
const cur = stateRef.current;
if (cur === 'idle') return;
if (mutedRef.current) {
_setMuted(false);
if (cur === 'listening' || cur === 'interrupted') {
_startStream().catch((err) => _dlog('MIC-ERR', String(err)));
}
} else {
_setMuted(true);
voiceStream.stop();
}
}, [_setMuted, _startStream, voiceStream, _dlog]);
return {
state,
muted,
liveTranscript: voiceStream.interimText,
activate,
deactivate,
ttsPlaying,
ttsPaused,
ttsEnded,
ttsStopped,
toggleMute,
};
}