fix: voice transcript cascading - processedIndex nur bei Recognition-Neustart resetten
Made-with: Cursor
This commit is contained in:
parent
93016a7e4c
commit
48215f165c
4 changed files with 331 additions and 241 deletions
|
|
@ -501,8 +501,9 @@ export function useCommcoach(): CommcoachHookReturn {
|
|||
try {
|
||||
const completed = await completeSessionApi(request, instanceId, session.id);
|
||||
if (isMountedRef.current) {
|
||||
setMessages([]);
|
||||
setSession(completed);
|
||||
if (selectedContextId) await selectContext(selectedContextId);
|
||||
if (selectedContextId) await selectContext(selectedContextId, { skipSessionResume: true });
|
||||
}
|
||||
} catch (err: any) {
|
||||
if (isMountedRef.current) setError(err.message || 'Fehler beim Abschliessen');
|
||||
|
|
|
|||
|
|
@ -20,20 +20,10 @@ import AutoScroll from '../../../components/UiComponents/AutoScroll/AutoScroll';
|
|||
import ReactMarkdown from 'react-markdown';
|
||||
import remarkGfm from 'remark-gfm';
|
||||
import styles from './CommcoachDossierView.module.css';
|
||||
import { useVoiceController } from './useVoiceController';
|
||||
|
||||
type TabKey = 'coaching' | 'tasks' | 'sessions' | 'scores' | 'documents';
|
||||
|
||||
/**
|
||||
* Voice State Machine
|
||||
*
|
||||
* idle – no session active, everything off
|
||||
* listening – mic on, recognition active, TTS off
|
||||
* botSpeaking – TTS playing, mic/recognition suspended
|
||||
* interrupted – TTS paused (resumable), mic on, recognition active
|
||||
* muted – mic off, TTS continues if playing
|
||||
*/
|
||||
type VoiceState = 'idle' | 'listening' | 'botSpeaking' | 'interrupted' | 'muted';
|
||||
|
||||
export const CommcoachDossierView: React.FC = () => {
|
||||
const coach = useCommcoach();
|
||||
const { request } = useApiRequest();
|
||||
|
|
@ -53,16 +43,10 @@ export const CommcoachDossierView: React.FC = () => {
|
|||
const [selectedPersonaId, setSelectedPersonaId] = useState<string | undefined>(undefined);
|
||||
|
||||
const inputRef = useRef<HTMLTextAreaElement>(null);
|
||||
const streamRef = useRef<MediaStream | null>(null);
|
||||
const speechRecognitionRef = useRef<SpeechRecognition | null>(null);
|
||||
const transcriptPartsRef = useRef<string[]>([]);
|
||||
const processedResultIndexRef = useRef(0);
|
||||
const silenceTimerRef = useRef<ReturnType<typeof setTimeout> | null>(null);
|
||||
const [liveTranscript, setLiveTranscript] = useState('');
|
||||
const sendMessageRef = useRef(coach.sendMessage);
|
||||
sendMessageRef.current = coach.sendMessage;
|
||||
|
||||
// Voice State Machine
|
||||
const [voiceState, setVoiceState] = useState<VoiceState>('idle');
|
||||
const voiceStateRef = useRef<VoiceState>('idle');
|
||||
const voice = useVoiceController((text) => sendMessageRef.current(text));
|
||||
|
||||
// #region agent log
|
||||
const debugLogsRef = useRef<string[]>([]);
|
||||
|
|
@ -78,31 +62,15 @@ export const CommcoachDossierView: React.FC = () => {
|
|||
useEffect(() => { (window as any).__dlog = _dlog; return () => { delete (window as any).__dlog; }; }, [_dlog]);
|
||||
// #endregion
|
||||
|
||||
const _transitionVoice = useCallback((next: VoiceState) => {
|
||||
const prev = voiceStateRef.current;
|
||||
if (prev === next) return;
|
||||
_dlog('VOICE', `${prev} -> ${next}`);
|
||||
voiceStateRef.current = next;
|
||||
setVoiceState(next);
|
||||
}, [_dlog]);
|
||||
|
||||
// Subscribe to TTS events from the hook
|
||||
useEffect(() => {
|
||||
coach.onTtsEventRef.current = (event: TtsEvent) => {
|
||||
const cur = voiceStateRef.current;
|
||||
if (event === 'playing') {
|
||||
if (cur !== 'muted') _transitionVoice('botSpeaking');
|
||||
} else if (event === 'ended') {
|
||||
if (cur === 'botSpeaking') _transitionVoice('listening');
|
||||
if (cur === 'interrupted') _transitionVoice('listening');
|
||||
} else if (event === 'paused') {
|
||||
if (cur === 'botSpeaking') _transitionVoice('interrupted');
|
||||
} else if (event === 'error') {
|
||||
if (cur === 'botSpeaking') _transitionVoice('listening');
|
||||
}
|
||||
if (event === 'playing') voice.ttsPlaying();
|
||||
else if (event === 'ended') voice.ttsEnded();
|
||||
else if (event === 'paused') voice.ttsPaused();
|
||||
else if (event === 'error') voice.ttsEnded();
|
||||
};
|
||||
return () => { coach.onTtsEventRef.current = null; };
|
||||
}, [coach.onTtsEventRef, _transitionVoice]);
|
||||
}, [coach.onTtsEventRef, voice.ttsPlaying, voice.ttsEnded, voice.ttsPaused]);
|
||||
|
||||
// Auto-select first context
|
||||
useEffect(() => {
|
||||
|
|
@ -139,190 +107,16 @@ export const CommcoachDossierView: React.FC = () => {
|
|||
.catch(() => {});
|
||||
}, [instanceId, request]);
|
||||
|
||||
// Transition to idle when session ends or tab changes away
|
||||
useEffect(() => {
|
||||
if (activeTab !== 'coaching' || !coach.session) {
|
||||
_transitionVoice('idle');
|
||||
} else if (voiceStateRef.current === 'idle') {
|
||||
_transitionVoice('listening');
|
||||
voice.deactivate();
|
||||
} else if (voice.state === 'idle') {
|
||||
voice.activate();
|
||||
}
|
||||
}, [activeTab, coach.session?.id, _transitionVoice]);
|
||||
}, [activeTab, coach.session?.id, voice]);
|
||||
|
||||
// Hardware control: start/stop recognition + mic based on voiceState
|
||||
useEffect(() => {
|
||||
const micShouldBeOn = voiceState === 'listening' || voiceState === 'interrupted';
|
||||
const micShouldBeOff = voiceState === 'idle' || voiceState === 'botSpeaking' || voiceState === 'muted';
|
||||
|
||||
if (micShouldBeOff) {
|
||||
if (speechRecognitionRef.current) {
|
||||
try { speechRecognitionRef.current.stop(); } catch { /* ignore */ }
|
||||
}
|
||||
if (voiceState === 'idle' && streamRef.current) {
|
||||
streamRef.current.getTracks().forEach(t => t.stop());
|
||||
streamRef.current = null;
|
||||
speechRecognitionRef.current = null;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (!micShouldBeOn) return;
|
||||
|
||||
const SpeechRecognitionApi = (window as any).SpeechRecognition || (window as any).webkitSpeechRecognition;
|
||||
if (!SpeechRecognitionApi) return;
|
||||
|
||||
if (speechRecognitionRef.current) {
|
||||
try {
|
||||
speechRecognitionRef.current.start();
|
||||
_dlog('REC-RESTART', 'reused existing');
|
||||
} catch {
|
||||
_dlog('REC-RESTART', 'existing failed, recreating');
|
||||
speechRecognitionRef.current = null;
|
||||
}
|
||||
if (speechRecognitionRef.current) return;
|
||||
}
|
||||
|
||||
let cancelled = false;
|
||||
const SILENCE_TIMEOUT_MS = 1500;
|
||||
const MIN_WORDS_TO_INTERRUPT = 4;
|
||||
|
||||
const init = async () => {
|
||||
try {
|
||||
if (!streamRef.current) {
|
||||
const stream = await navigator.mediaDevices.getUserMedia({
|
||||
audio: { echoCancellation: true, noiseSuppression: true },
|
||||
});
|
||||
if (cancelled) { stream.getTracks().forEach(t => t.stop()); return; }
|
||||
streamRef.current = stream;
|
||||
}
|
||||
|
||||
const recognition = new SpeechRecognitionApi();
|
||||
recognition.continuous = true;
|
||||
recognition.interimResults = true;
|
||||
recognition.lang = 'de-DE';
|
||||
|
||||
const _sendAndClearTranscript = () => {
|
||||
const fullTranscript = transcriptPartsRef.current.join(' ').trim();
|
||||
_dlog('SEND', `words=${fullTranscript.split(/\s+/).filter(Boolean).length} "${fullTranscript.substring(0,60)}"`);
|
||||
if (fullTranscript) {
|
||||
const wordCount = fullTranscript.split(/\s+/).filter(Boolean).length;
|
||||
if (wordCount >= MIN_WORDS_TO_INTERRUPT) coach.sendMessage(fullTranscript);
|
||||
}
|
||||
transcriptPartsRef.current = [];
|
||||
processedResultIndexRef.current = 0;
|
||||
setLiveTranscript('');
|
||||
};
|
||||
|
||||
const _resetSilenceTimer = () => {
|
||||
if (silenceTimerRef.current) clearTimeout(silenceTimerRef.current);
|
||||
silenceTimerRef.current = setTimeout(() => {
|
||||
if (cancelled) return;
|
||||
_sendAndClearTranscript();
|
||||
}, SILENCE_TIMEOUT_MS);
|
||||
};
|
||||
|
||||
recognition.onspeechstart = () => {
|
||||
if (cancelled || voiceStateRef.current === 'botSpeaking') return;
|
||||
transcriptPartsRef.current = [];
|
||||
processedResultIndexRef.current = 0;
|
||||
setLiveTranscript('');
|
||||
_resetSilenceTimer();
|
||||
};
|
||||
|
||||
recognition.onresult = (event: SpeechRecognitionEvent) => {
|
||||
if (cancelled) return;
|
||||
const isBotSpeaking = voiceStateRef.current === 'botSpeaking';
|
||||
const interimParts: string[] = [];
|
||||
for (let i = processedResultIndexRef.current; i < event.results.length; i++) {
|
||||
const r = event.results[i];
|
||||
if (r.isFinal) {
|
||||
const text = r[0].transcript.trim();
|
||||
if (text && !isBotSpeaking) transcriptPartsRef.current.push(text);
|
||||
processedResultIndexRef.current = i + 1;
|
||||
} else {
|
||||
if (isBotSpeaking) continue;
|
||||
const text = r[0].transcript.trim();
|
||||
if (text) interimParts.push(text);
|
||||
}
|
||||
}
|
||||
if (isBotSpeaking) return;
|
||||
const currentInterim = interimParts.join(' ');
|
||||
const preview = [...transcriptPartsRef.current, currentInterim].join(' ').trim();
|
||||
setLiveTranscript(preview);
|
||||
if (preview) _resetSilenceTimer();
|
||||
};
|
||||
|
||||
recognition.onspeechend = () => {
|
||||
if (cancelled) return;
|
||||
if (silenceTimerRef.current) clearTimeout(silenceTimerRef.current);
|
||||
if (voiceStateRef.current === 'botSpeaking') {
|
||||
transcriptPartsRef.current = [];
|
||||
processedResultIndexRef.current = 0;
|
||||
setLiveTranscript('');
|
||||
return;
|
||||
}
|
||||
_sendAndClearTranscript();
|
||||
};
|
||||
|
||||
recognition.onend = () => {
|
||||
_dlog('REC-END', `state=${voiceStateRef.current}`);
|
||||
if (cancelled) return;
|
||||
if (voiceStateRef.current === 'botSpeaking' || voiceStateRef.current === 'muted' || voiceStateRef.current === 'idle') return;
|
||||
if (speechRecognitionRef.current === recognition) {
|
||||
try { recognition.start(); } catch { speechRecognitionRef.current = null; }
|
||||
}
|
||||
};
|
||||
|
||||
recognition.onerror = (event: any) => {
|
||||
_dlog('REC-ERR', event.error);
|
||||
if (event.error === 'no-speech' || event.error === 'aborted') return;
|
||||
console.warn('SpeechRecognition error:', event.error);
|
||||
};
|
||||
|
||||
speechRecognitionRef.current = recognition;
|
||||
recognition.start();
|
||||
} catch (err) {
|
||||
console.warn('Mic access failed:', err);
|
||||
}
|
||||
};
|
||||
|
||||
init();
|
||||
return () => { cancelled = true; };
|
||||
}, [voiceState, _dlog, coach]);
|
||||
|
||||
// Cleanup on unmount
|
||||
useEffect(() => {
|
||||
return () => {
|
||||
if (silenceTimerRef.current) clearTimeout(silenceTimerRef.current);
|
||||
if (speechRecognitionRef.current) {
|
||||
try { speechRecognitionRef.current.stop(); } catch { /* ignore */ }
|
||||
speechRecognitionRef.current = null;
|
||||
}
|
||||
if (streamRef.current) {
|
||||
streamRef.current.getTracks().forEach(t => t.stop());
|
||||
streamRef.current = null;
|
||||
}
|
||||
};
|
||||
}, []);
|
||||
|
||||
// Voice actions
|
||||
const handleStopTts = useCallback(() => {
|
||||
coach.stopTts();
|
||||
}, [coach]);
|
||||
|
||||
const handleResumeTts = useCallback(() => {
|
||||
coach.resumeTts();
|
||||
}, [coach]);
|
||||
|
||||
const handleToggleMute = useCallback(() => {
|
||||
const cur = voiceStateRef.current;
|
||||
if (cur === 'muted') {
|
||||
_transitionVoice('listening');
|
||||
} else if (cur === 'listening' || cur === 'interrupted') {
|
||||
_transitionVoice('muted');
|
||||
} else if (cur === 'botSpeaking') {
|
||||
_transitionVoice('muted');
|
||||
}
|
||||
}, [_transitionVoice]);
|
||||
const handleStopTts = useCallback(() => coach.stopTts(), [coach]);
|
||||
const handleResumeTts = useCallback(() => coach.resumeTts(), [coach]);
|
||||
|
||||
const handleSend = useCallback(async () => {
|
||||
if (!coach.inputValue.trim() || coach.isStreaming) return;
|
||||
|
|
@ -539,18 +333,18 @@ export const CommcoachDossierView: React.FC = () => {
|
|||
<div className={styles.sessionHeader}>
|
||||
<span className={styles.sessionLabel}>Session aktiv</span>
|
||||
<div className={styles.sessionActions}>
|
||||
{voiceState === 'botSpeaking' && (
|
||||
{voice.state === 'botSpeaking' && (
|
||||
<button className={styles.btnSmallDanger} onClick={handleStopTts}>Stop</button>
|
||||
)}
|
||||
{voiceState === 'interrupted' && coach.hasAudioToResume() && (
|
||||
{voice.state === 'interrupted' && coach.hasAudioToResume() && (
|
||||
<button className={styles.btnSmall} onClick={handleResumeTts}>Weitersprechen</button>
|
||||
)}
|
||||
<button
|
||||
className={`${styles.btnSmall} ${voiceState === 'muted' ? styles.mutedActive : ''}`}
|
||||
onClick={handleToggleMute}
|
||||
title={voiceState === 'muted' ? 'Stummschaltung aufheben' : 'Stummschalten'}
|
||||
className={`${styles.btnSmall} ${voice.state === 'muted' ? styles.mutedActive : ''}`}
|
||||
onClick={voice.toggleMute}
|
||||
title={voice.state === 'muted' ? 'Stummschaltung aufheben' : 'Stummschalten'}
|
||||
>
|
||||
{voiceState === 'muted' ? '\u{1F507} Stumm' : '\u{1F3A4} Ton an'}
|
||||
{voice.state === 'muted' ? '\u{1F507} Stumm' : '\u{1F3A4} Ton an'}
|
||||
</button>
|
||||
<button className={styles.btnSmall} onClick={coach.completeSession} disabled={!!coach.actionLoading}>
|
||||
{coach.actionLoading === 'completing' ? 'Wird abgeschlossen...' : 'Abschliessen'}
|
||||
|
|
@ -562,7 +356,7 @@ export const CommcoachDossierView: React.FC = () => {
|
|||
</div>
|
||||
|
||||
{/* Messages */}
|
||||
<AutoScroll scrollDependency={coach.messages.length + (coach.isStreaming ? 1 : 0) + liveTranscript.length}>
|
||||
<AutoScroll scrollDependency={coach.messages.length + (coach.isStreaming ? 1 : 0) + voice.liveTranscript.length}>
|
||||
<div className={styles.messages}>
|
||||
{coach.messages.map(msg => (
|
||||
<div key={msg.id} className={`${styles.message} ${msg.role === 'user' ? styles.messageUser : styles.messageAssistant}`}>
|
||||
|
|
@ -574,9 +368,9 @@ export const CommcoachDossierView: React.FC = () => {
|
|||
</div>
|
||||
</div>
|
||||
))}
|
||||
{liveTranscript && (
|
||||
{voice.liveTranscript && (
|
||||
<div className={`${styles.message} ${styles.messageUser}`}>
|
||||
<div className={`${styles.messageBubble} ${styles.messageLive}`}>{liveTranscript}</div>
|
||||
<div className={`${styles.messageBubble} ${styles.messageLive}`}>{voice.liveTranscript}</div>
|
||||
</div>
|
||||
)}
|
||||
{coach.isStreaming && (
|
||||
|
|
@ -596,17 +390,17 @@ export const CommcoachDossierView: React.FC = () => {
|
|||
{/* Input Area */}
|
||||
<div className={styles.inputArea}>
|
||||
<div className={styles.voiceStatus}>
|
||||
<span className={`${styles.voiceIndicator} ${voiceState === 'listening' ? styles.voiceActive : ''}`}>
|
||||
{voiceState === 'muted'
|
||||
<span className={`${styles.voiceIndicator} ${voice.state === 'listening' ? styles.voiceActive : ''}`}>
|
||||
{voice.state === 'muted'
|
||||
? 'Stumm – Mikrofon aus'
|
||||
: voiceState === 'botSpeaking'
|
||||
: voice.state === 'botSpeaking'
|
||||
? (coach.streamingStatus || 'Coach spricht...')
|
||||
: coach.isStreaming
|
||||
? (coach.streamingStatus || 'Coach denkt nach...')
|
||||
: voiceState === 'interrupted'
|
||||
: voice.state === 'interrupted'
|
||||
? 'Unterbrochen – Mikrofon an'
|
||||
: voiceState === 'listening'
|
||||
? (liveTranscript ? 'Spricht...' : 'Mikrofon an – bitte sprechen')
|
||||
: voice.state === 'listening'
|
||||
? (voice.liveTranscript ? 'Spricht...' : 'Mikrofon an – bitte sprechen')
|
||||
: 'Mikrofon wird gestartet...'}
|
||||
</span>
|
||||
</div>
|
||||
|
|
|
|||
261
src/pages/views/commcoach/useVoiceController.ts
Normal file
261
src/pages/views/commcoach/useVoiceController.ts
Normal file
|
|
@ -0,0 +1,261 @@
|
|||
/**
|
||||
* Voice Controller - imperative state machine for CommCoach voice interaction.
|
||||
*
|
||||
* States: idle | listening | botSpeaking | interrupted | muted
|
||||
*
|
||||
* Key principle: SpeechRecognition is created once and lives until deactivate().
|
||||
* When botSpeaking, we ignore onresult events instead of stopping recognition.
|
||||
*/
|
||||
|
||||
import { useState, useRef, useCallback, useEffect } from 'react';
|
||||
|
||||
export type VoiceState = 'idle' | 'listening' | 'botSpeaking' | 'interrupted' | 'muted';
|
||||
|
||||
const SILENCE_TIMEOUT_MS = 1500;
|
||||
const MIN_WORDS_TO_SEND = 4;
|
||||
const REC_AUTORESTART_DELAY_MS = 300;
|
||||
|
||||
export interface VoiceControllerApi {
|
||||
state: VoiceState;
|
||||
liveTranscript: string;
|
||||
activate: () => void;
|
||||
deactivate: () => void;
|
||||
ttsPlaying: () => void;
|
||||
ttsPaused: () => void;
|
||||
ttsEnded: () => void;
|
||||
toggleMute: () => void;
|
||||
}
|
||||
|
||||
export function useVoiceController(onMessage: (text: string) => void): VoiceControllerApi {
|
||||
const [state, setState] = useState<VoiceState>('idle');
|
||||
const [liveTranscript, setLiveTranscript] = useState('');
|
||||
const stateRef = useRef<VoiceState>('idle');
|
||||
const streamRef = useRef<MediaStream | null>(null);
|
||||
const recognitionRef = useRef<SpeechRecognition | null>(null);
|
||||
const transcriptPartsRef = useRef<string[]>([]);
|
||||
const processedIndexRef = useRef(0);
|
||||
const silenceTimerRef = useRef<ReturnType<typeof setTimeout> | null>(null);
|
||||
const onMessageRef = useRef(onMessage);
|
||||
onMessageRef.current = onMessage;
|
||||
|
||||
const _dlog = useCallback((tag: string, info?: string) => {
|
||||
const t = new Date();
|
||||
const ts = `${t.getMinutes()}:${String(t.getSeconds()).padStart(2, '0')}.${String(t.getMilliseconds()).padStart(3, '0')}`;
|
||||
const entry = `[${ts}] ${tag}${info ? ' ' + info : ''}`;
|
||||
(window as any).__dlog?.(entry);
|
||||
}, []);
|
||||
|
||||
const _setState = useCallback((next: VoiceState) => {
|
||||
const prev = stateRef.current;
|
||||
if (prev === next) return;
|
||||
_dlog('VOICE', `${prev} -> ${next}`);
|
||||
stateRef.current = next;
|
||||
setState(next);
|
||||
}, [_dlog]);
|
||||
|
||||
const _finalizeTranscript = useCallback(() => {
|
||||
const full = transcriptPartsRef.current.join(' ').trim();
|
||||
_dlog('SEND', `words=${full.split(/\s+/).filter(Boolean).length} "${full.substring(0, 60)}"`);
|
||||
if (full) {
|
||||
const wordCount = full.split(/\s+/).filter(Boolean).length;
|
||||
if (wordCount >= MIN_WORDS_TO_SEND) onMessageRef.current(full);
|
||||
}
|
||||
transcriptPartsRef.current = [];
|
||||
setLiveTranscript('');
|
||||
}, [_dlog]);
|
||||
|
||||
const _resetSilenceTimer = useCallback(() => {
|
||||
if (silenceTimerRef.current) clearTimeout(silenceTimerRef.current);
|
||||
silenceTimerRef.current = setTimeout(() => {
|
||||
_finalizeTranscript();
|
||||
}, SILENCE_TIMEOUT_MS);
|
||||
}, [_finalizeTranscript]);
|
||||
|
||||
const _startRecognition = useCallback(() => {
|
||||
const rec = recognitionRef.current;
|
||||
if (!rec) return;
|
||||
try {
|
||||
rec.start();
|
||||
_dlog('REC-START', 'ok');
|
||||
} catch {
|
||||
_dlog('REC-START', 'failed');
|
||||
}
|
||||
}, [_dlog]);
|
||||
|
||||
const _stopRecognition = useCallback(() => {
|
||||
const rec = recognitionRef.current;
|
||||
if (!rec) return;
|
||||
try {
|
||||
rec.stop();
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
}, []);
|
||||
|
||||
const _createRecognition = useCallback(() => {
|
||||
const SpeechRecognitionApi = (window as any).SpeechRecognition || (window as any).webkitSpeechRecognition;
|
||||
if (!SpeechRecognitionApi) return;
|
||||
|
||||
const recognition = new SpeechRecognitionApi();
|
||||
recognition.continuous = true;
|
||||
recognition.interimResults = true;
|
||||
recognition.lang = 'de-DE';
|
||||
|
||||
recognition.onspeechstart = () => {
|
||||
if (stateRef.current === 'botSpeaking') return;
|
||||
transcriptPartsRef.current = [];
|
||||
setLiveTranscript('');
|
||||
_resetSilenceTimer();
|
||||
};
|
||||
|
||||
recognition.onresult = (event: SpeechRecognitionEvent) => {
|
||||
const ignore = stateRef.current === 'botSpeaking';
|
||||
const interimParts: string[] = [];
|
||||
for (let i = processedIndexRef.current; i < event.results.length; i++) {
|
||||
const r = event.results[i];
|
||||
if (r.isFinal) {
|
||||
const text = r[0].transcript.trim();
|
||||
if (text && !ignore) transcriptPartsRef.current.push(text);
|
||||
processedIndexRef.current = i + 1;
|
||||
} else {
|
||||
if (ignore) continue;
|
||||
const text = r[0].transcript.trim();
|
||||
if (text) interimParts.push(text);
|
||||
}
|
||||
}
|
||||
if (ignore) return;
|
||||
const currentInterim = interimParts.join(' ');
|
||||
const preview = [...transcriptPartsRef.current, currentInterim].join(' ').trim();
|
||||
setLiveTranscript(preview);
|
||||
if (preview) _resetSilenceTimer();
|
||||
};
|
||||
|
||||
recognition.onspeechend = () => {
|
||||
if (silenceTimerRef.current) clearTimeout(silenceTimerRef.current);
|
||||
if (stateRef.current === 'botSpeaking') {
|
||||
transcriptPartsRef.current = [];
|
||||
setLiveTranscript('');
|
||||
return;
|
||||
}
|
||||
_finalizeTranscript();
|
||||
};
|
||||
|
||||
recognition.onend = () => {
|
||||
_dlog('REC-END', `state=${stateRef.current}`);
|
||||
if (recognitionRef.current !== recognition) return;
|
||||
const cur = stateRef.current;
|
||||
if (cur === 'botSpeaking' || cur === 'muted' || cur === 'idle') return;
|
||||
processedIndexRef.current = 0;
|
||||
setTimeout(() => {
|
||||
if (recognitionRef.current !== recognition) return;
|
||||
if (stateRef.current !== 'listening' && stateRef.current !== 'interrupted') return;
|
||||
try {
|
||||
recognition.start();
|
||||
_dlog('REC-AUTOSTART', 'ok');
|
||||
} catch {
|
||||
_dlog('REC-AUTOSTART', 'failed');
|
||||
}
|
||||
}, REC_AUTORESTART_DELAY_MS);
|
||||
};
|
||||
|
||||
recognition.onerror = (event: any) => {
|
||||
_dlog('REC-ERR', event.error);
|
||||
if (event.error === 'no-speech' || event.error === 'aborted') return;
|
||||
console.warn('SpeechRecognition error:', event.error);
|
||||
};
|
||||
|
||||
recognitionRef.current = recognition;
|
||||
_startRecognition();
|
||||
}, [_dlog, _resetSilenceTimer, _finalizeTranscript, _startRecognition]);
|
||||
|
||||
const activate = useCallback(async () => {
|
||||
if (stateRef.current !== 'idle') return;
|
||||
_setState('listening');
|
||||
|
||||
try {
|
||||
if (!streamRef.current) {
|
||||
const stream = await navigator.mediaDevices.getUserMedia({
|
||||
audio: { echoCancellation: true, noiseSuppression: true },
|
||||
});
|
||||
streamRef.current = stream;
|
||||
}
|
||||
_createRecognition();
|
||||
} catch (err) {
|
||||
console.warn('Mic access failed:', err);
|
||||
_setState('idle');
|
||||
}
|
||||
}, [_setState, _createRecognition]);
|
||||
|
||||
const deactivate = useCallback(() => {
|
||||
_setState('idle');
|
||||
if (silenceTimerRef.current) {
|
||||
clearTimeout(silenceTimerRef.current);
|
||||
silenceTimerRef.current = null;
|
||||
}
|
||||
if (recognitionRef.current) {
|
||||
try { recognitionRef.current.stop(); } catch { /* ignore */ }
|
||||
recognitionRef.current = null;
|
||||
}
|
||||
if (streamRef.current) {
|
||||
streamRef.current.getTracks().forEach(t => t.stop());
|
||||
streamRef.current = null;
|
||||
}
|
||||
transcriptPartsRef.current = [];
|
||||
processedIndexRef.current = 0;
|
||||
setLiveTranscript('');
|
||||
}, [_setState]);
|
||||
|
||||
const ttsPlaying = useCallback(() => {
|
||||
const cur = stateRef.current;
|
||||
if (cur === 'muted') return;
|
||||
_setState('botSpeaking');
|
||||
}, [_setState]);
|
||||
|
||||
const ttsPaused = useCallback(() => {
|
||||
const cur = stateRef.current;
|
||||
if (cur === 'botSpeaking') _setState('interrupted');
|
||||
}, [_setState]);
|
||||
|
||||
const ttsEnded = useCallback(() => {
|
||||
const cur = stateRef.current;
|
||||
if (cur === 'botSpeaking' || cur === 'interrupted') _setState('listening');
|
||||
}, [_setState]);
|
||||
|
||||
const toggleMute = useCallback(() => {
|
||||
const cur = stateRef.current;
|
||||
if (cur === 'muted') {
|
||||
_setState('listening');
|
||||
_startRecognition();
|
||||
} else if (cur === 'listening' || cur === 'interrupted') {
|
||||
_setState('muted');
|
||||
_stopRecognition();
|
||||
} else if (cur === 'botSpeaking') {
|
||||
_setState('muted');
|
||||
}
|
||||
}, [_setState, _startRecognition, _stopRecognition]);
|
||||
|
||||
useEffect(() => {
|
||||
return () => {
|
||||
if (silenceTimerRef.current) clearTimeout(silenceTimerRef.current);
|
||||
if (recognitionRef.current) {
|
||||
try { recognitionRef.current.stop(); } catch { /* ignore */ }
|
||||
recognitionRef.current = null;
|
||||
}
|
||||
if (streamRef.current) {
|
||||
streamRef.current.getTracks().forEach(t => t.stop());
|
||||
streamRef.current = null;
|
||||
}
|
||||
};
|
||||
}, []);
|
||||
|
||||
return {
|
||||
state,
|
||||
liveTranscript,
|
||||
activate,
|
||||
deactivate,
|
||||
ttsPlaying,
|
||||
ttsPaused,
|
||||
ttsEnded,
|
||||
toggleMute,
|
||||
};
|
||||
}
|
||||
|
|
@ -40,6 +40,17 @@ export const TeamsbotSessionView: React.FC = () => {
|
|||
const transcriptEndRef = useRef<HTMLDivElement>(null);
|
||||
const eventSourceRef = useRef<EventSource | null>(null);
|
||||
|
||||
const debugLogsRef = useRef<string[]>([]);
|
||||
const [debugVisible, setDebugVisible] = useState(false);
|
||||
const [debugSnapshot, setDebugSnapshot] = useState<string[]>([]);
|
||||
const _dlog = useCallback((tag: string, info?: string) => {
|
||||
const t = new Date();
|
||||
const ts = `${t.getMinutes()}:${String(t.getSeconds()).padStart(2, '0')}.${String(t.getMilliseconds()).padStart(3, '0')}`;
|
||||
const entry = `[${ts}] ${tag}${info ? ' ' + info : ''}`;
|
||||
debugLogsRef.current.push(entry);
|
||||
if (debugLogsRef.current.length > 120) debugLogsRef.current.shift();
|
||||
}, []);
|
||||
|
||||
// Load session data - if no sessionId given, load the most recent session
|
||||
const _loadSession = useCallback(async () => {
|
||||
if (!instanceId) return;
|
||||
|
|
@ -98,19 +109,26 @@ export const TeamsbotSessionView: React.FC = () => {
|
|||
const eventSource = teamsbotApi.createSessionStream(instanceId, sessionId);
|
||||
eventSourceRef.current = eventSource;
|
||||
setIsLive(true);
|
||||
_dlog('SSE', 'connected');
|
||||
|
||||
eventSource.onmessage = (event) => {
|
||||
try {
|
||||
const sseEvent: TeamsbotSSEEvent = JSON.parse(event.data);
|
||||
const evType = sseEvent.type || 'unknown';
|
||||
|
||||
switch (sseEvent.type) {
|
||||
_dlog('SSE', evType + (sseEvent.data ? ` ${JSON.stringify(sseEvent.data).substring(0, 80)}` : ''));
|
||||
|
||||
switch (evType) {
|
||||
case 'sessionState':
|
||||
if (sseEvent.data) setSession(prev => prev ? { ...prev, ...sseEvent.data } : sseEvent.data);
|
||||
break;
|
||||
|
||||
case 'transcript':
|
||||
setTranscripts(prev => [...prev, sseEvent.data as TeamsbotTranscript]);
|
||||
case 'transcript': {
|
||||
const t = sseEvent.data as TeamsbotTranscript;
|
||||
_dlog('TRANSCRIPT', `[${t?.speaker || '?'}] ${(t?.text || '').substring(0, 50)}...`);
|
||||
setTranscripts(prev => [...prev, t]);
|
||||
break;
|
||||
}
|
||||
|
||||
case 'botResponse':
|
||||
setBotResponses(prev => [...prev, sseEvent.data as TeamsbotBotResponse]);
|
||||
|
|
@ -155,6 +173,7 @@ export const TeamsbotSessionView: React.FC = () => {
|
|||
break;
|
||||
}
|
||||
} catch (err) {
|
||||
_dlog('SSE-ERR', String(err));
|
||||
console.error('SSE parse error:', err);
|
||||
}
|
||||
};
|
||||
|
|
@ -169,7 +188,7 @@ export const TeamsbotSessionView: React.FC = () => {
|
|||
sseSessionRef.current = null;
|
||||
setIsLive(false);
|
||||
};
|
||||
}, [instanceId, sessionId, sessionStatus]);
|
||||
}, [instanceId, sessionId, sessionStatus, _dlog]);
|
||||
|
||||
// Polling fallback: refresh session data every 5s when SSE is not connected
|
||||
const pollRef = useRef<ReturnType<typeof setInterval> | null>(null);
|
||||
|
|
@ -362,6 +381,21 @@ export const TeamsbotSessionView: React.FC = () => {
|
|||
)}
|
||||
</div>
|
||||
|
||||
{/* Debug Log (SSE/Transcript/Chat) */}
|
||||
<div style={{ position: 'fixed', bottom: 0, right: 0, zIndex: 9999 }}>
|
||||
<button
|
||||
onClick={() => { setDebugSnapshot([...debugLogsRef.current]); setDebugVisible(v => !v); }}
|
||||
style={{ background: '#333', color: '#0f0', border: 'none', padding: '4px 8px', fontSize: '10px', borderRadius: '4px 0 0 0' }}
|
||||
>
|
||||
DBG ({debugLogsRef.current.length})
|
||||
</button>
|
||||
{debugVisible && (
|
||||
<div style={{ background: 'rgba(0,0,0,0.9)', color: '#0f0', fontSize: '9px', maxHeight: '40vh', overflow: 'auto', padding: '4px', fontFamily: 'monospace', whiteSpace: 'pre-wrap', width: '100vw' }}>
|
||||
{debugSnapshot.map((l, i) => <div key={i}>{l}</div>)}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Debug Screenshots (SysAdmin only) */}
|
||||
{_isSysAdmin && (
|
||||
<div className={styles.summaryCard}>
|
||||
|
|
|
|||
Loading…
Reference in a new issue