diff --git a/src/hooks/useCommcoach.ts b/src/hooks/useCommcoach.ts index 7a8c5df..d416280 100644 --- a/src/hooks/useCommcoach.ts +++ b/src/hooks/useCommcoach.ts @@ -501,8 +501,9 @@ export function useCommcoach(): CommcoachHookReturn { try { const completed = await completeSessionApi(request, instanceId, session.id); if (isMountedRef.current) { + setMessages([]); setSession(completed); - if (selectedContextId) await selectContext(selectedContextId); + if (selectedContextId) await selectContext(selectedContextId, { skipSessionResume: true }); } } catch (err: any) { if (isMountedRef.current) setError(err.message || 'Fehler beim Abschliessen'); diff --git a/src/pages/views/commcoach/CommcoachDossierView.tsx b/src/pages/views/commcoach/CommcoachDossierView.tsx index d1362bd..41eac36 100644 --- a/src/pages/views/commcoach/CommcoachDossierView.tsx +++ b/src/pages/views/commcoach/CommcoachDossierView.tsx @@ -20,20 +20,10 @@ import AutoScroll from '../../../components/UiComponents/AutoScroll/AutoScroll'; import ReactMarkdown from 'react-markdown'; import remarkGfm from 'remark-gfm'; import styles from './CommcoachDossierView.module.css'; +import { useVoiceController } from './useVoiceController'; type TabKey = 'coaching' | 'tasks' | 'sessions' | 'scores' | 'documents'; -/** - * Voice State Machine - * - * idle – no session active, everything off - * listening – mic on, recognition active, TTS off - * botSpeaking – TTS playing, mic/recognition suspended - * interrupted – TTS paused (resumable), mic on, recognition active - * muted – mic off, TTS continues if playing - */ -type VoiceState = 'idle' | 'listening' | 'botSpeaking' | 'interrupted' | 'muted'; - export const CommcoachDossierView: React.FC = () => { const coach = useCommcoach(); const { request } = useApiRequest(); @@ -53,16 +43,10 @@ export const CommcoachDossierView: React.FC = () => { const [selectedPersonaId, setSelectedPersonaId] = useState(undefined); const inputRef = useRef(null); - const streamRef = useRef(null); - const speechRecognitionRef = useRef(null); - const transcriptPartsRef = useRef([]); - const processedResultIndexRef = useRef(0); - const silenceTimerRef = useRef | null>(null); - const [liveTranscript, setLiveTranscript] = useState(''); + const sendMessageRef = useRef(coach.sendMessage); + sendMessageRef.current = coach.sendMessage; - // Voice State Machine - const [voiceState, setVoiceState] = useState('idle'); - const voiceStateRef = useRef('idle'); + const voice = useVoiceController((text) => sendMessageRef.current(text)); // #region agent log const debugLogsRef = useRef([]); @@ -78,31 +62,15 @@ export const CommcoachDossierView: React.FC = () => { useEffect(() => { (window as any).__dlog = _dlog; return () => { delete (window as any).__dlog; }; }, [_dlog]); // #endregion - const _transitionVoice = useCallback((next: VoiceState) => { - const prev = voiceStateRef.current; - if (prev === next) return; - _dlog('VOICE', `${prev} -> ${next}`); - voiceStateRef.current = next; - setVoiceState(next); - }, [_dlog]); - - // Subscribe to TTS events from the hook useEffect(() => { coach.onTtsEventRef.current = (event: TtsEvent) => { - const cur = voiceStateRef.current; - if (event === 'playing') { - if (cur !== 'muted') _transitionVoice('botSpeaking'); - } else if (event === 'ended') { - if (cur === 'botSpeaking') _transitionVoice('listening'); - if (cur === 'interrupted') _transitionVoice('listening'); - } else if (event === 'paused') { - if (cur === 'botSpeaking') _transitionVoice('interrupted'); - } else if (event === 'error') { - if (cur === 'botSpeaking') _transitionVoice('listening'); - } + if (event === 'playing') voice.ttsPlaying(); + else if (event === 'ended') voice.ttsEnded(); + else if (event === 'paused') voice.ttsPaused(); + else if (event === 'error') voice.ttsEnded(); }; return () => { coach.onTtsEventRef.current = null; }; - }, [coach.onTtsEventRef, _transitionVoice]); + }, [coach.onTtsEventRef, voice.ttsPlaying, voice.ttsEnded, voice.ttsPaused]); // Auto-select first context useEffect(() => { @@ -139,190 +107,16 @@ export const CommcoachDossierView: React.FC = () => { .catch(() => {}); }, [instanceId, request]); - // Transition to idle when session ends or tab changes away useEffect(() => { if (activeTab !== 'coaching' || !coach.session) { - _transitionVoice('idle'); - } else if (voiceStateRef.current === 'idle') { - _transitionVoice('listening'); + voice.deactivate(); + } else if (voice.state === 'idle') { + voice.activate(); } - }, [activeTab, coach.session?.id, _transitionVoice]); + }, [activeTab, coach.session?.id, voice]); - // Hardware control: start/stop recognition + mic based on voiceState - useEffect(() => { - const micShouldBeOn = voiceState === 'listening' || voiceState === 'interrupted'; - const micShouldBeOff = voiceState === 'idle' || voiceState === 'botSpeaking' || voiceState === 'muted'; - - if (micShouldBeOff) { - if (speechRecognitionRef.current) { - try { speechRecognitionRef.current.stop(); } catch { /* ignore */ } - } - if (voiceState === 'idle' && streamRef.current) { - streamRef.current.getTracks().forEach(t => t.stop()); - streamRef.current = null; - speechRecognitionRef.current = null; - } - return; - } - - if (!micShouldBeOn) return; - - const SpeechRecognitionApi = (window as any).SpeechRecognition || (window as any).webkitSpeechRecognition; - if (!SpeechRecognitionApi) return; - - if (speechRecognitionRef.current) { - try { - speechRecognitionRef.current.start(); - _dlog('REC-RESTART', 'reused existing'); - } catch { - _dlog('REC-RESTART', 'existing failed, recreating'); - speechRecognitionRef.current = null; - } - if (speechRecognitionRef.current) return; - } - - let cancelled = false; - const SILENCE_TIMEOUT_MS = 1500; - const MIN_WORDS_TO_INTERRUPT = 4; - - const init = async () => { - try { - if (!streamRef.current) { - const stream = await navigator.mediaDevices.getUserMedia({ - audio: { echoCancellation: true, noiseSuppression: true }, - }); - if (cancelled) { stream.getTracks().forEach(t => t.stop()); return; } - streamRef.current = stream; - } - - const recognition = new SpeechRecognitionApi(); - recognition.continuous = true; - recognition.interimResults = true; - recognition.lang = 'de-DE'; - - const _sendAndClearTranscript = () => { - const fullTranscript = transcriptPartsRef.current.join(' ').trim(); - _dlog('SEND', `words=${fullTranscript.split(/\s+/).filter(Boolean).length} "${fullTranscript.substring(0,60)}"`); - if (fullTranscript) { - const wordCount = fullTranscript.split(/\s+/).filter(Boolean).length; - if (wordCount >= MIN_WORDS_TO_INTERRUPT) coach.sendMessage(fullTranscript); - } - transcriptPartsRef.current = []; - processedResultIndexRef.current = 0; - setLiveTranscript(''); - }; - - const _resetSilenceTimer = () => { - if (silenceTimerRef.current) clearTimeout(silenceTimerRef.current); - silenceTimerRef.current = setTimeout(() => { - if (cancelled) return; - _sendAndClearTranscript(); - }, SILENCE_TIMEOUT_MS); - }; - - recognition.onspeechstart = () => { - if (cancelled || voiceStateRef.current === 'botSpeaking') return; - transcriptPartsRef.current = []; - processedResultIndexRef.current = 0; - setLiveTranscript(''); - _resetSilenceTimer(); - }; - - recognition.onresult = (event: SpeechRecognitionEvent) => { - if (cancelled) return; - const isBotSpeaking = voiceStateRef.current === 'botSpeaking'; - const interimParts: string[] = []; - for (let i = processedResultIndexRef.current; i < event.results.length; i++) { - const r = event.results[i]; - if (r.isFinal) { - const text = r[0].transcript.trim(); - if (text && !isBotSpeaking) transcriptPartsRef.current.push(text); - processedResultIndexRef.current = i + 1; - } else { - if (isBotSpeaking) continue; - const text = r[0].transcript.trim(); - if (text) interimParts.push(text); - } - } - if (isBotSpeaking) return; - const currentInterim = interimParts.join(' '); - const preview = [...transcriptPartsRef.current, currentInterim].join(' ').trim(); - setLiveTranscript(preview); - if (preview) _resetSilenceTimer(); - }; - - recognition.onspeechend = () => { - if (cancelled) return; - if (silenceTimerRef.current) clearTimeout(silenceTimerRef.current); - if (voiceStateRef.current === 'botSpeaking') { - transcriptPartsRef.current = []; - processedResultIndexRef.current = 0; - setLiveTranscript(''); - return; - } - _sendAndClearTranscript(); - }; - - recognition.onend = () => { - _dlog('REC-END', `state=${voiceStateRef.current}`); - if (cancelled) return; - if (voiceStateRef.current === 'botSpeaking' || voiceStateRef.current === 'muted' || voiceStateRef.current === 'idle') return; - if (speechRecognitionRef.current === recognition) { - try { recognition.start(); } catch { speechRecognitionRef.current = null; } - } - }; - - recognition.onerror = (event: any) => { - _dlog('REC-ERR', event.error); - if (event.error === 'no-speech' || event.error === 'aborted') return; - console.warn('SpeechRecognition error:', event.error); - }; - - speechRecognitionRef.current = recognition; - recognition.start(); - } catch (err) { - console.warn('Mic access failed:', err); - } - }; - - init(); - return () => { cancelled = true; }; - }, [voiceState, _dlog, coach]); - - // Cleanup on unmount - useEffect(() => { - return () => { - if (silenceTimerRef.current) clearTimeout(silenceTimerRef.current); - if (speechRecognitionRef.current) { - try { speechRecognitionRef.current.stop(); } catch { /* ignore */ } - speechRecognitionRef.current = null; - } - if (streamRef.current) { - streamRef.current.getTracks().forEach(t => t.stop()); - streamRef.current = null; - } - }; - }, []); - - // Voice actions - const handleStopTts = useCallback(() => { - coach.stopTts(); - }, [coach]); - - const handleResumeTts = useCallback(() => { - coach.resumeTts(); - }, [coach]); - - const handleToggleMute = useCallback(() => { - const cur = voiceStateRef.current; - if (cur === 'muted') { - _transitionVoice('listening'); - } else if (cur === 'listening' || cur === 'interrupted') { - _transitionVoice('muted'); - } else if (cur === 'botSpeaking') { - _transitionVoice('muted'); - } - }, [_transitionVoice]); + const handleStopTts = useCallback(() => coach.stopTts(), [coach]); + const handleResumeTts = useCallback(() => coach.resumeTts(), [coach]); const handleSend = useCallback(async () => { if (!coach.inputValue.trim() || coach.isStreaming) return; @@ -539,18 +333,18 @@ export const CommcoachDossierView: React.FC = () => {
Session aktiv
- {voiceState === 'botSpeaking' && ( + {voice.state === 'botSpeaking' && ( )} - {voiceState === 'interrupted' && coach.hasAudioToResume() && ( + {voice.state === 'interrupted' && coach.hasAudioToResume() && ( )}
{/* Messages */} - +
{coach.messages.map(msg => (
@@ -574,9 +368,9 @@ export const CommcoachDossierView: React.FC = () => {
))} - {liveTranscript && ( + {voice.liveTranscript && (
-
{liveTranscript}
+
{voice.liveTranscript}
)} {coach.isStreaming && ( @@ -596,17 +390,17 @@ export const CommcoachDossierView: React.FC = () => { {/* Input Area */}
- - {voiceState === 'muted' + + {voice.state === 'muted' ? 'Stumm – Mikrofon aus' - : voiceState === 'botSpeaking' + : voice.state === 'botSpeaking' ? (coach.streamingStatus || 'Coach spricht...') : coach.isStreaming ? (coach.streamingStatus || 'Coach denkt nach...') - : voiceState === 'interrupted' + : voice.state === 'interrupted' ? 'Unterbrochen – Mikrofon an' - : voiceState === 'listening' - ? (liveTranscript ? 'Spricht...' : 'Mikrofon an – bitte sprechen') + : voice.state === 'listening' + ? (voice.liveTranscript ? 'Spricht...' : 'Mikrofon an – bitte sprechen') : 'Mikrofon wird gestartet...'}
diff --git a/src/pages/views/commcoach/useVoiceController.ts b/src/pages/views/commcoach/useVoiceController.ts new file mode 100644 index 0000000..e8cb151 --- /dev/null +++ b/src/pages/views/commcoach/useVoiceController.ts @@ -0,0 +1,261 @@ +/** + * Voice Controller - imperative state machine for CommCoach voice interaction. + * + * States: idle | listening | botSpeaking | interrupted | muted + * + * Key principle: SpeechRecognition is created once and lives until deactivate(). + * When botSpeaking, we ignore onresult events instead of stopping recognition. + */ + +import { useState, useRef, useCallback, useEffect } from 'react'; + +export type VoiceState = 'idle' | 'listening' | 'botSpeaking' | 'interrupted' | 'muted'; + +const SILENCE_TIMEOUT_MS = 1500; +const MIN_WORDS_TO_SEND = 4; +const REC_AUTORESTART_DELAY_MS = 300; + +export interface VoiceControllerApi { + state: VoiceState; + liveTranscript: string; + activate: () => void; + deactivate: () => void; + ttsPlaying: () => void; + ttsPaused: () => void; + ttsEnded: () => void; + toggleMute: () => void; +} + +export function useVoiceController(onMessage: (text: string) => void): VoiceControllerApi { + const [state, setState] = useState('idle'); + const [liveTranscript, setLiveTranscript] = useState(''); + const stateRef = useRef('idle'); + const streamRef = useRef(null); + const recognitionRef = useRef(null); + const transcriptPartsRef = useRef([]); + const processedIndexRef = useRef(0); + const silenceTimerRef = useRef | null>(null); + const onMessageRef = useRef(onMessage); + onMessageRef.current = onMessage; + + const _dlog = useCallback((tag: string, info?: string) => { + const t = new Date(); + const ts = `${t.getMinutes()}:${String(t.getSeconds()).padStart(2, '0')}.${String(t.getMilliseconds()).padStart(3, '0')}`; + const entry = `[${ts}] ${tag}${info ? ' ' + info : ''}`; + (window as any).__dlog?.(entry); + }, []); + + const _setState = useCallback((next: VoiceState) => { + const prev = stateRef.current; + if (prev === next) return; + _dlog('VOICE', `${prev} -> ${next}`); + stateRef.current = next; + setState(next); + }, [_dlog]); + + const _finalizeTranscript = useCallback(() => { + const full = transcriptPartsRef.current.join(' ').trim(); + _dlog('SEND', `words=${full.split(/\s+/).filter(Boolean).length} "${full.substring(0, 60)}"`); + if (full) { + const wordCount = full.split(/\s+/).filter(Boolean).length; + if (wordCount >= MIN_WORDS_TO_SEND) onMessageRef.current(full); + } + transcriptPartsRef.current = []; + setLiveTranscript(''); + }, [_dlog]); + + const _resetSilenceTimer = useCallback(() => { + if (silenceTimerRef.current) clearTimeout(silenceTimerRef.current); + silenceTimerRef.current = setTimeout(() => { + _finalizeTranscript(); + }, SILENCE_TIMEOUT_MS); + }, [_finalizeTranscript]); + + const _startRecognition = useCallback(() => { + const rec = recognitionRef.current; + if (!rec) return; + try { + rec.start(); + _dlog('REC-START', 'ok'); + } catch { + _dlog('REC-START', 'failed'); + } + }, [_dlog]); + + const _stopRecognition = useCallback(() => { + const rec = recognitionRef.current; + if (!rec) return; + try { + rec.stop(); + } catch { + /* ignore */ + } + }, []); + + const _createRecognition = useCallback(() => { + const SpeechRecognitionApi = (window as any).SpeechRecognition || (window as any).webkitSpeechRecognition; + if (!SpeechRecognitionApi) return; + + const recognition = new SpeechRecognitionApi(); + recognition.continuous = true; + recognition.interimResults = true; + recognition.lang = 'de-DE'; + + recognition.onspeechstart = () => { + if (stateRef.current === 'botSpeaking') return; + transcriptPartsRef.current = []; + setLiveTranscript(''); + _resetSilenceTimer(); + }; + + recognition.onresult = (event: SpeechRecognitionEvent) => { + const ignore = stateRef.current === 'botSpeaking'; + const interimParts: string[] = []; + for (let i = processedIndexRef.current; i < event.results.length; i++) { + const r = event.results[i]; + if (r.isFinal) { + const text = r[0].transcript.trim(); + if (text && !ignore) transcriptPartsRef.current.push(text); + processedIndexRef.current = i + 1; + } else { + if (ignore) continue; + const text = r[0].transcript.trim(); + if (text) interimParts.push(text); + } + } + if (ignore) return; + const currentInterim = interimParts.join(' '); + const preview = [...transcriptPartsRef.current, currentInterim].join(' ').trim(); + setLiveTranscript(preview); + if (preview) _resetSilenceTimer(); + }; + + recognition.onspeechend = () => { + if (silenceTimerRef.current) clearTimeout(silenceTimerRef.current); + if (stateRef.current === 'botSpeaking') { + transcriptPartsRef.current = []; + setLiveTranscript(''); + return; + } + _finalizeTranscript(); + }; + + recognition.onend = () => { + _dlog('REC-END', `state=${stateRef.current}`); + if (recognitionRef.current !== recognition) return; + const cur = stateRef.current; + if (cur === 'botSpeaking' || cur === 'muted' || cur === 'idle') return; + processedIndexRef.current = 0; + setTimeout(() => { + if (recognitionRef.current !== recognition) return; + if (stateRef.current !== 'listening' && stateRef.current !== 'interrupted') return; + try { + recognition.start(); + _dlog('REC-AUTOSTART', 'ok'); + } catch { + _dlog('REC-AUTOSTART', 'failed'); + } + }, REC_AUTORESTART_DELAY_MS); + }; + + recognition.onerror = (event: any) => { + _dlog('REC-ERR', event.error); + if (event.error === 'no-speech' || event.error === 'aborted') return; + console.warn('SpeechRecognition error:', event.error); + }; + + recognitionRef.current = recognition; + _startRecognition(); + }, [_dlog, _resetSilenceTimer, _finalizeTranscript, _startRecognition]); + + const activate = useCallback(async () => { + if (stateRef.current !== 'idle') return; + _setState('listening'); + + try { + if (!streamRef.current) { + const stream = await navigator.mediaDevices.getUserMedia({ + audio: { echoCancellation: true, noiseSuppression: true }, + }); + streamRef.current = stream; + } + _createRecognition(); + } catch (err) { + console.warn('Mic access failed:', err); + _setState('idle'); + } + }, [_setState, _createRecognition]); + + const deactivate = useCallback(() => { + _setState('idle'); + if (silenceTimerRef.current) { + clearTimeout(silenceTimerRef.current); + silenceTimerRef.current = null; + } + if (recognitionRef.current) { + try { recognitionRef.current.stop(); } catch { /* ignore */ } + recognitionRef.current = null; + } + if (streamRef.current) { + streamRef.current.getTracks().forEach(t => t.stop()); + streamRef.current = null; + } + transcriptPartsRef.current = []; + processedIndexRef.current = 0; + setLiveTranscript(''); + }, [_setState]); + + const ttsPlaying = useCallback(() => { + const cur = stateRef.current; + if (cur === 'muted') return; + _setState('botSpeaking'); + }, [_setState]); + + const ttsPaused = useCallback(() => { + const cur = stateRef.current; + if (cur === 'botSpeaking') _setState('interrupted'); + }, [_setState]); + + const ttsEnded = useCallback(() => { + const cur = stateRef.current; + if (cur === 'botSpeaking' || cur === 'interrupted') _setState('listening'); + }, [_setState]); + + const toggleMute = useCallback(() => { + const cur = stateRef.current; + if (cur === 'muted') { + _setState('listening'); + _startRecognition(); + } else if (cur === 'listening' || cur === 'interrupted') { + _setState('muted'); + _stopRecognition(); + } else if (cur === 'botSpeaking') { + _setState('muted'); + } + }, [_setState, _startRecognition, _stopRecognition]); + + useEffect(() => { + return () => { + if (silenceTimerRef.current) clearTimeout(silenceTimerRef.current); + if (recognitionRef.current) { + try { recognitionRef.current.stop(); } catch { /* ignore */ } + recognitionRef.current = null; + } + if (streamRef.current) { + streamRef.current.getTracks().forEach(t => t.stop()); + streamRef.current = null; + } + }; + }, []); + + return { + state, + liveTranscript, + activate, + deactivate, + ttsPlaying, + ttsPaused, + ttsEnded, + toggleMute, + }; +} diff --git a/src/pages/views/teamsbot/TeamsbotSessionView.tsx b/src/pages/views/teamsbot/TeamsbotSessionView.tsx index a053db7..bba714d 100644 --- a/src/pages/views/teamsbot/TeamsbotSessionView.tsx +++ b/src/pages/views/teamsbot/TeamsbotSessionView.tsx @@ -40,6 +40,17 @@ export const TeamsbotSessionView: React.FC = () => { const transcriptEndRef = useRef(null); const eventSourceRef = useRef(null); + const debugLogsRef = useRef([]); + const [debugVisible, setDebugVisible] = useState(false); + const [debugSnapshot, setDebugSnapshot] = useState([]); + const _dlog = useCallback((tag: string, info?: string) => { + const t = new Date(); + const ts = `${t.getMinutes()}:${String(t.getSeconds()).padStart(2, '0')}.${String(t.getMilliseconds()).padStart(3, '0')}`; + const entry = `[${ts}] ${tag}${info ? ' ' + info : ''}`; + debugLogsRef.current.push(entry); + if (debugLogsRef.current.length > 120) debugLogsRef.current.shift(); + }, []); + // Load session data - if no sessionId given, load the most recent session const _loadSession = useCallback(async () => { if (!instanceId) return; @@ -98,19 +109,26 @@ export const TeamsbotSessionView: React.FC = () => { const eventSource = teamsbotApi.createSessionStream(instanceId, sessionId); eventSourceRef.current = eventSource; setIsLive(true); + _dlog('SSE', 'connected'); eventSource.onmessage = (event) => { try { const sseEvent: TeamsbotSSEEvent = JSON.parse(event.data); + const evType = sseEvent.type || 'unknown'; - switch (sseEvent.type) { + _dlog('SSE', evType + (sseEvent.data ? ` ${JSON.stringify(sseEvent.data).substring(0, 80)}` : '')); + + switch (evType) { case 'sessionState': if (sseEvent.data) setSession(prev => prev ? { ...prev, ...sseEvent.data } : sseEvent.data); break; - case 'transcript': - setTranscripts(prev => [...prev, sseEvent.data as TeamsbotTranscript]); + case 'transcript': { + const t = sseEvent.data as TeamsbotTranscript; + _dlog('TRANSCRIPT', `[${t?.speaker || '?'}] ${(t?.text || '').substring(0, 50)}...`); + setTranscripts(prev => [...prev, t]); break; + } case 'botResponse': setBotResponses(prev => [...prev, sseEvent.data as TeamsbotBotResponse]); @@ -155,6 +173,7 @@ export const TeamsbotSessionView: React.FC = () => { break; } } catch (err) { + _dlog('SSE-ERR', String(err)); console.error('SSE parse error:', err); } }; @@ -169,7 +188,7 @@ export const TeamsbotSessionView: React.FC = () => { sseSessionRef.current = null; setIsLive(false); }; - }, [instanceId, sessionId, sessionStatus]); + }, [instanceId, sessionId, sessionStatus, _dlog]); // Polling fallback: refresh session data every 5s when SSE is not connected const pollRef = useRef | null>(null); @@ -362,6 +381,21 @@ export const TeamsbotSessionView: React.FC = () => { )}
+ {/* Debug Log (SSE/Transcript/Chat) */} +
+ + {debugVisible && ( +
+ {debugSnapshot.map((l, i) =>
{l}
)} +
+ )} +
+ {/* Debug Screenshots (SysAdmin only) */} {_isSysAdmin && (