- {voice.state === 'muted'
+ {voice.muted
? 'Stumm – Mikrofon aus'
: voice.state === 'botSpeaking'
? (coach.streamingStatus || 'Coach spricht...')
diff --git a/src/pages/views/commcoach/useVoiceController.ts b/src/pages/views/commcoach/useVoiceController.ts
index e8cb151..71d8c4b 100644
--- a/src/pages/views/commcoach/useVoiceController.ts
+++ b/src/pages/views/commcoach/useVoiceController.ts
@@ -1,22 +1,24 @@
/**
* Voice Controller - imperative state machine for CommCoach voice interaction.
*
- * States: idle | listening | botSpeaking | interrupted | muted
+ * States: idle | listening | botSpeaking | interrupted
+ * Muted: orthogonal boolean flag (independent of main state)
*
- * Key principle: SpeechRecognition is created once and lives until deactivate().
- * When botSpeaking, we ignore onresult events instead of stopping recognition.
+ * Recognition is STOPPED during botSpeaking or when muted=true.
+ * Recognition is STARTED when entering listening/interrupted AND muted=false.
+ * Each start() creates a fresh results session (processedIndex resets to 0).
*/
import { useState, useRef, useCallback, useEffect } from 'react';
-export type VoiceState = 'idle' | 'listening' | 'botSpeaking' | 'interrupted' | 'muted';
+export type VoiceState = 'idle' | 'listening' | 'botSpeaking' | 'interrupted';
-const SILENCE_TIMEOUT_MS = 1500;
-const MIN_WORDS_TO_SEND = 4;
+const SILENCE_TIMEOUT_MS = 1000;
const REC_AUTORESTART_DELAY_MS = 300;
export interface VoiceControllerApi {
state: VoiceState;
+ muted: boolean;
liveTranscript: string;
activate: () => void;
deactivate: () => void;
@@ -24,12 +26,15 @@ export interface VoiceControllerApi {
ttsPaused: () => void;
ttsEnded: () => void;
toggleMute: () => void;
+ cancelPendingSpeech: () => void;
}
export function useVoiceController(onMessage: (text: string) => void): VoiceControllerApi {
const [state, setState] = useState('idle');
+ const [muted, setMuted] = useState(false);
const [liveTranscript, setLiveTranscript] = useState('');
const stateRef = useRef('idle');
+ const mutedRef = useRef(false);
const streamRef = useRef(null);
const recognitionRef = useRef(null);
const transcriptPartsRef = useRef([]);
@@ -53,25 +58,36 @@ export function useVoiceController(onMessage: (text: string) => void): VoiceCont
setState(next);
}, [_dlog]);
+ const _setMuted = useCallback((next: boolean) => {
+ mutedRef.current = next;
+ setMuted(next);
+ _dlog('MUTED', String(next));
+ }, [_dlog]);
+
+ const _cancelSilenceTimer = useCallback(() => {
+ if (silenceTimerRef.current) {
+ clearTimeout(silenceTimerRef.current);
+ silenceTimerRef.current = null;
+ }
+ }, []);
+
const _finalizeTranscript = useCallback(() => {
const full = transcriptPartsRef.current.join(' ').trim();
- _dlog('SEND', `words=${full.split(/\s+/).filter(Boolean).length} "${full.substring(0, 60)}"`);
- if (full) {
- const wordCount = full.split(/\s+/).filter(Boolean).length;
- if (wordCount >= MIN_WORDS_TO_SEND) onMessageRef.current(full);
- }
+ _dlog('SEND', `"${full.substring(0, 80)}"`);
+ if (full) onMessageRef.current(full);
transcriptPartsRef.current = [];
setLiveTranscript('');
}, [_dlog]);
const _resetSilenceTimer = useCallback(() => {
- if (silenceTimerRef.current) clearTimeout(silenceTimerRef.current);
+ _cancelSilenceTimer();
silenceTimerRef.current = setTimeout(() => {
_finalizeTranscript();
}, SILENCE_TIMEOUT_MS);
- }, [_finalizeTranscript]);
+ }, [_cancelSilenceTimer, _finalizeTranscript]);
const _startRecognition = useCallback(() => {
+ if (mutedRef.current) return;
const rec = recognitionRef.current;
if (!rec) return;
try {
@@ -102,28 +118,24 @@ export function useVoiceController(onMessage: (text: string) => void): VoiceCont
recognition.lang = 'de-DE';
recognition.onspeechstart = () => {
- if (stateRef.current === 'botSpeaking') return;
- transcriptPartsRef.current = [];
- setLiveTranscript('');
+ if (stateRef.current !== 'listening' && stateRef.current !== 'interrupted') return;
_resetSilenceTimer();
};
recognition.onresult = (event: SpeechRecognitionEvent) => {
- const ignore = stateRef.current === 'botSpeaking';
+ if (stateRef.current !== 'listening' && stateRef.current !== 'interrupted') return;
const interimParts: string[] = [];
for (let i = processedIndexRef.current; i < event.results.length; i++) {
const r = event.results[i];
if (r.isFinal) {
const text = r[0].transcript.trim();
- if (text && !ignore) transcriptPartsRef.current.push(text);
+ if (text) transcriptPartsRef.current.push(text);
processedIndexRef.current = i + 1;
} else {
- if (ignore) continue;
const text = r[0].transcript.trim();
if (text) interimParts.push(text);
}
}
- if (ignore) return;
const currentInterim = interimParts.join(' ');
const preview = [...transcriptPartsRef.current, currentInterim].join(' ').trim();
setLiveTranscript(preview);
@@ -131,24 +143,20 @@ export function useVoiceController(onMessage: (text: string) => void): VoiceCont
};
recognition.onspeechend = () => {
- if (silenceTimerRef.current) clearTimeout(silenceTimerRef.current);
- if (stateRef.current === 'botSpeaking') {
- transcriptPartsRef.current = [];
- setLiveTranscript('');
- return;
- }
- _finalizeTranscript();
+ if (stateRef.current !== 'listening' && stateRef.current !== 'interrupted') return;
+ _resetSilenceTimer();
};
recognition.onend = () => {
- _dlog('REC-END', `state=${stateRef.current}`);
+ _dlog('REC-END', `state=${stateRef.current} muted=${mutedRef.current}`);
if (recognitionRef.current !== recognition) return;
const cur = stateRef.current;
- if (cur === 'botSpeaking' || cur === 'muted' || cur === 'idle') return;
+ if (cur === 'botSpeaking' || cur === 'idle' || mutedRef.current) return;
processedIndexRef.current = 0;
setTimeout(() => {
if (recognitionRef.current !== recognition) return;
if (stateRef.current !== 'listening' && stateRef.current !== 'interrupted') return;
+ if (mutedRef.current) return;
try {
recognition.start();
_dlog('REC-AUTOSTART', 'ok');
@@ -166,11 +174,14 @@ export function useVoiceController(onMessage: (text: string) => void): VoiceCont
recognitionRef.current = recognition;
_startRecognition();
- }, [_dlog, _resetSilenceTimer, _finalizeTranscript, _startRecognition]);
+ }, [_dlog, _resetSilenceTimer, _startRecognition]);
const activate = useCallback(async () => {
if (stateRef.current !== 'idle') return;
_setState('listening');
+ transcriptPartsRef.current = [];
+ processedIndexRef.current = 0;
+ setLiveTranscript('');
try {
if (!streamRef.current) {
@@ -187,11 +198,8 @@ export function useVoiceController(onMessage: (text: string) => void): VoiceCont
}, [_setState, _createRecognition]);
const deactivate = useCallback(() => {
+ _cancelSilenceTimer();
_setState('idle');
- if (silenceTimerRef.current) {
- clearTimeout(silenceTimerRef.current);
- silenceTimerRef.current = null;
- }
if (recognitionRef.current) {
try { recognitionRef.current.stop(); } catch { /* ignore */ }
recognitionRef.current = null;
@@ -203,36 +211,57 @@ export function useVoiceController(onMessage: (text: string) => void): VoiceCont
transcriptPartsRef.current = [];
processedIndexRef.current = 0;
setLiveTranscript('');
- }, [_setState]);
+ }, [_setState, _cancelSilenceTimer]);
const ttsPlaying = useCallback(() => {
const cur = stateRef.current;
- if (cur === 'muted') return;
+ if (cur === 'idle') return;
+ _cancelSilenceTimer();
+ _finalizeTranscript();
+ _stopRecognition();
_setState('botSpeaking');
- }, [_setState]);
+ }, [_setState, _cancelSilenceTimer, _finalizeTranscript, _stopRecognition]);
const ttsPaused = useCallback(() => {
const cur = stateRef.current;
- if (cur === 'botSpeaking') _setState('interrupted');
- }, [_setState]);
+ if (cur !== 'botSpeaking') return;
+ transcriptPartsRef.current = [];
+ processedIndexRef.current = 0;
+ setLiveTranscript('');
+ _setState('interrupted');
+ _startRecognition();
+ }, [_setState, _startRecognition]);
const ttsEnded = useCallback(() => {
const cur = stateRef.current;
- if (cur === 'botSpeaking' || cur === 'interrupted') _setState('listening');
- }, [_setState]);
+ if (cur !== 'botSpeaking' && cur !== 'interrupted') return;
+ transcriptPartsRef.current = [];
+ processedIndexRef.current = 0;
+ setLiveTranscript('');
+ _setState('listening');
+ _startRecognition();
+ }, [_setState, _startRecognition]);
const toggleMute = useCallback(() => {
const cur = stateRef.current;
- if (cur === 'muted') {
- _setState('listening');
- _startRecognition();
- } else if (cur === 'listening' || cur === 'interrupted') {
- _setState('muted');
+ if (cur === 'idle') return;
+ if (mutedRef.current) {
+ _setMuted(false);
+ if (cur === 'listening' || cur === 'interrupted') {
+ _startRecognition();
+ }
+ } else {
+ _setMuted(true);
_stopRecognition();
- } else if (cur === 'botSpeaking') {
- _setState('muted');
}
- }, [_setState, _startRecognition, _stopRecognition]);
+ }, [_setMuted, _startRecognition, _stopRecognition]);
+
+ const cancelPendingSpeech = useCallback(() => {
+ _cancelSilenceTimer();
+ transcriptPartsRef.current = [];
+ setLiveTranscript('');
+ _dlog('CANCEL-SPEECH', 'pending speech cleared for text input');
+ }, [_cancelSilenceTimer, _dlog]);
useEffect(() => {
return () => {
@@ -250,6 +279,7 @@ export function useVoiceController(onMessage: (text: string) => void): VoiceCont
return {
state,
+ muted,
liveTranscript,
activate,
deactivate,
@@ -257,5 +287,6 @@ export function useVoiceController(onMessage: (text: string) => void): VoiceCont
ttsPaused,
ttsEnded,
toggleMute,
+ cancelPendingSpeech,
};
}