feat: add hybrid speaker hints for audio-mode transcripts

Made-with: Cursor
This commit is contained in:
ValueOn AG 2026-02-26 09:05:21 +01:00
parent 79c1555e0c
commit 39a422af25
2 changed files with 30 additions and 5 deletions

View file

@ -726,7 +726,12 @@ export class BotOrchestrator {
/**
* Send a transcript to the Gateway.
*/
private _sendTranscript(speaker: string, text: string, isFinal: boolean): void {
private _sendTranscript(
speaker: string,
text: string,
isFinal: boolean,
source: 'caption' | 'audioCapture' | 'speakerHint' | 'chat' = 'caption',
): void {
const message: TranscriptMessage = {
type: 'transcript',
sessionId: this._sessionId,
@ -735,6 +740,7 @@ export class BotOrchestrator {
text,
timestamp: new Date().toISOString(),
isFinal,
source,
},
};
this._sendToGateway(message);
@ -920,10 +926,14 @@ export class BotOrchestrator {
this._page,
this._logger,
(entry) => {
// Send transcript to Gateway
this._sendTranscript(entry.speaker, entry.text, entry.isFinal);
// Also notify local callbacks
const transferMode = this._getEffectiveTransferMode();
if (transferMode === 'audio') {
// In audio mode, captions are only used as speaker hints.
this._sendTranscript(entry.speaker, entry.text, entry.isFinal, 'speakerHint');
} else {
this._sendTranscript(entry.speaker, entry.text, entry.isFinal, 'caption');
this._callbacks.onTranscript(entry);
}
},
this._options.language
);
@ -1098,6 +1108,19 @@ export class BotOrchestrator {
}
}
/**
* Subscribe to captions only as speaker hints (without enabling captions flow).
* This supports hybrid mode: audio text + caption-based speaker names.
*/
private async _enableSpeakerHintsFromCaptions(): Promise<void> {
try {
await this._captionsProcedure!.subscribeToCaptions();
this._logger.info('Speaker hints from captions subscribed (audio mode)');
} catch (error) {
this._logger.warn('Could not subscribe to captions for speaker hints:', error);
}
}
/**
* Enable transcript capture (captions or audio) based on transfer mode.
*/
@ -1109,6 +1132,7 @@ export class BotOrchestrator {
await this._enableCaptions();
} else {
await this._enableAudioCapture();
await this._enableSpeakerHintsFromCaptions();
}
}

View file

@ -17,6 +17,7 @@ export interface TranscriptMessage {
text: string;
timestamp: string;
isFinal: boolean;
source?: 'caption' | 'audioCapture' | 'speakerHint' | 'chat';
};
}