166 lines
4.9 KiB
TypeScript
166 lines
4.9 KiB
TypeScript
/**
|
|
* Voice Controller - imperative state machine for CommCoach voice interaction.
|
|
*
|
|
* States: idle | listening | botSpeaking | interrupted
|
|
* Muted: orthogonal boolean flag (independent of main state)
|
|
*
|
|
* Uses the generic useVoiceStream hook for mic capture + STT streaming.
|
|
* Google Streaming STT handles silence detection natively.
|
|
* STT language is loaded from central voice preferences (/api/voice/preferences).
|
|
*/
|
|
|
|
import { useState, useRef, useCallback, useEffect } from 'react';
|
|
import { useVoiceStream } from '../../../hooks/useSpeechAudioCapture';
|
|
import api from '../../../api';
|
|
|
|
export type VoiceState = 'idle' | 'listening' | 'botSpeaking' | 'interrupted';
|
|
|
|
export interface VoiceControllerApi {
|
|
state: VoiceState;
|
|
muted: boolean;
|
|
liveTranscript: string;
|
|
activate: () => void;
|
|
deactivate: () => void;
|
|
ttsPlaying: () => void;
|
|
ttsPaused: () => void;
|
|
ttsEnded: () => void;
|
|
ttsStopped: () => void;
|
|
toggleMute: () => void;
|
|
}
|
|
|
|
export interface VoiceControllerCallbacks {
|
|
onFinalText?: (text: string) => void | Promise<void>;
|
|
onInterimText?: (text: string) => void;
|
|
}
|
|
|
|
const _DEFAULT_STT_LANGUAGE = 'de-DE';
|
|
|
|
export function useVoiceController(callbacks: VoiceControllerCallbacks): VoiceControllerApi {
|
|
const [state, setState] = useState<VoiceState>('idle');
|
|
const [muted, setMuted] = useState(false);
|
|
const stateRef = useRef<VoiceState>('idle');
|
|
const mutedRef = useRef(false);
|
|
const cbRef = useRef(callbacks);
|
|
cbRef.current = callbacks;
|
|
|
|
const sttLanguageRef = useRef<string>(_DEFAULT_STT_LANGUAGE);
|
|
|
|
useEffect(() => {
|
|
let cancelled = false;
|
|
api.get('/api/voice/preferences').then((res) => {
|
|
if (cancelled) return;
|
|
const lang = res.data?.sttLanguage || res.data?.ttsLanguage;
|
|
if (lang) sttLanguageRef.current = lang;
|
|
}).catch(() => {});
|
|
return () => { cancelled = true; };
|
|
}, []);
|
|
|
|
const _dlog = useCallback((tag: string, info?: string) => {
|
|
const t = new Date();
|
|
const ts = `${t.getMinutes()}:${String(t.getSeconds()).padStart(2, '0')}.${String(t.getMilliseconds()).padStart(3, '0')}`;
|
|
(window as any).__dlog?.(`[${ts}] ${tag}${info ? ' ' + info : ''}`);
|
|
}, []);
|
|
|
|
const _setState = useCallback((next: VoiceState) => {
|
|
const prev = stateRef.current;
|
|
if (prev === next) return;
|
|
_dlog('VOICE', `${prev} -> ${next}`);
|
|
stateRef.current = next;
|
|
setState(next);
|
|
}, [_dlog]);
|
|
|
|
const _setMuted = useCallback((next: boolean) => {
|
|
mutedRef.current = next;
|
|
setMuted(next);
|
|
_dlog('MUTED', String(next));
|
|
}, [_dlog]);
|
|
|
|
const voiceStream = useVoiceStream({
|
|
onFinal: (text) => {
|
|
cbRef.current.onFinalText?.(text);
|
|
},
|
|
onInterim: (text) => {
|
|
cbRef.current.onInterimText?.(text);
|
|
},
|
|
onError: (err) => _dlog('VOICE-ERR', String(err)),
|
|
});
|
|
|
|
const _startStream = useCallback(() => {
|
|
return voiceStream.start(sttLanguageRef.current);
|
|
}, [voiceStream]);
|
|
|
|
const activate = useCallback(async () => {
|
|
if (stateRef.current !== 'idle') return;
|
|
_setState('listening');
|
|
try {
|
|
await _startStream();
|
|
} catch (err) {
|
|
_dlog('MIC-ERR', String(err));
|
|
_setState('idle');
|
|
}
|
|
}, [_setState, _startStream, _dlog]);
|
|
|
|
const deactivate = useCallback(() => {
|
|
voiceStream.stop();
|
|
_setState('idle');
|
|
}, [_setState, voiceStream]);
|
|
|
|
const ttsPlaying = useCallback(() => {
|
|
const cur = stateRef.current;
|
|
if (cur === 'idle') return;
|
|
voiceStream.stop();
|
|
_setState('botSpeaking');
|
|
}, [_setState, voiceStream]);
|
|
|
|
const ttsPaused = useCallback(() => {
|
|
if (stateRef.current !== 'botSpeaking') return;
|
|
_setState('interrupted');
|
|
_startStream().catch((err) => _dlog('MIC-ERR', String(err)));
|
|
}, [_setState, _startStream, _dlog]);
|
|
|
|
const ttsEnded = useCallback(() => {
|
|
const cur = stateRef.current;
|
|
if (cur !== 'botSpeaking' && cur !== 'interrupted') return;
|
|
_setState('listening');
|
|
_startStream().catch((err) => _dlog('MIC-ERR', String(err)));
|
|
}, [_setState, _startStream, _dlog]);
|
|
|
|
const ttsStopped = useCallback(() => {
|
|
const cur = stateRef.current;
|
|
if (cur !== 'botSpeaking' && cur !== 'interrupted') return;
|
|
voiceStream.stop();
|
|
if (mutedRef.current) {
|
|
_setState('interrupted');
|
|
return;
|
|
}
|
|
_setState('listening');
|
|
_startStream().catch((err) => _dlog('MIC-ERR', String(err)));
|
|
}, [_setState, _startStream, _dlog, voiceStream]);
|
|
|
|
const toggleMute = useCallback(() => {
|
|
const cur = stateRef.current;
|
|
if (cur === 'idle') return;
|
|
if (mutedRef.current) {
|
|
_setMuted(false);
|
|
if (cur === 'listening' || cur === 'interrupted') {
|
|
_startStream().catch((err) => _dlog('MIC-ERR', String(err)));
|
|
}
|
|
} else {
|
|
_setMuted(true);
|
|
voiceStream.stop();
|
|
}
|
|
}, [_setMuted, _startStream, voiceStream, _dlog]);
|
|
|
|
return {
|
|
state,
|
|
muted,
|
|
liveTranscript: voiceStream.interimText,
|
|
activate,
|
|
deactivate,
|
|
ttsPlaying,
|
|
ttsPaused,
|
|
ttsEnded,
|
|
ttsStopped,
|
|
toggleMute,
|
|
};
|
|
}
|