1653 lines
76 KiB
Python
1653 lines
76 KiB
Python
# Copyright (c) 2025 Patrick Motsch
|
|
# All rights reserved.
|
|
"""
|
|
Teamsbot Service - Pipeline Orchestrator.
|
|
Manages the audio processing pipeline: STT -> Context Buffer -> SPEECH_TEAMS -> TTS -> Bridge.
|
|
"""
|
|
|
|
import logging
|
|
import json
|
|
import asyncio
|
|
import time
|
|
import base64
|
|
from typing import Optional, Dict, Any, List
|
|
|
|
from fastapi import WebSocket
|
|
|
|
from modules.datamodels.datamodelUam import User
|
|
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum
|
|
from modules.shared.timeUtils import getUtcTimestamp, getIsoTimestamp
|
|
|
|
from .datamodelTeamsbot import (
|
|
TeamsbotSessionStatus,
|
|
TeamsbotTranscript,
|
|
TeamsbotBotResponse,
|
|
TeamsbotResponseType,
|
|
TeamsbotConfig,
|
|
TeamsbotResponseMode,
|
|
TeamsbotResponseChannel,
|
|
SpeechTeamsResponse,
|
|
TeamsbotCommand,
|
|
)
|
|
from .browserBotConnector import BrowserBotConnector
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
# =========================================================================
|
|
# Minimal Service Context (for AI billing in bridge callbacks)
|
|
# =========================================================================
|
|
|
|
class _ServiceContext:
|
|
"""Minimal context providing user/mandate info for AiService billing.
|
|
Used by bridge callbacks where a full Services instance is not available."""
|
|
|
|
def __init__(self, user, mandateId, featureInstanceId=None):
|
|
self.user = user
|
|
self.mandateId = mandateId
|
|
self.featureInstanceId = featureInstanceId
|
|
self.featureCode = "teamsbot"
|
|
|
|
|
|
# =========================================================================
|
|
# Session Event Queues (for SSE streaming to frontend)
|
|
# =========================================================================
|
|
_sessionEvents: Dict[str, asyncio.Queue] = {}
|
|
|
|
|
|
async def _emitSessionEvent(sessionId: str, eventType: str, data: Any):
|
|
"""Emit an event to the session's SSE stream.
|
|
Creates the queue on-demand so events are never silently dropped."""
|
|
if sessionId not in _sessionEvents:
|
|
_sessionEvents[sessionId] = asyncio.Queue()
|
|
await _sessionEvents[sessionId].put({"type": eventType, "data": data, "timestamp": getIsoTimestamp()})
|
|
|
|
|
|
class TeamsbotService:
|
|
"""
|
|
Pipeline Orchestrator for Teams Bot sessions.
|
|
Coordinates VoiceObjects (STT/TTS), AiService (SPEECH_TEAMS), and Bridge communication.
|
|
"""
|
|
|
|
def __init__(self, currentUser: User, mandateId: str, instanceId: str, config: TeamsbotConfig):
|
|
self.currentUser = currentUser
|
|
self.mandateId = mandateId
|
|
self.instanceId = instanceId
|
|
self.config = config
|
|
self.browserBotConnector = BrowserBotConnector(config._getEffectiveBrowserBotUrl())
|
|
|
|
# State
|
|
self._lastAiCallTime: float = 0.0
|
|
self._aiAnalysisInProgress: bool = False
|
|
self._contextBuffer: List[Dict[str, Any]] = []
|
|
self._sessionContext: Optional[str] = None # User-provided background context
|
|
self._contextSummary: Optional[str] = None # AI-generated summary of long context
|
|
|
|
# Differential transcript tracking
|
|
self._lastTranscriptSpeaker: Optional[str] = None
|
|
self._lastTranscriptText: Optional[str] = None
|
|
self._lastTranscriptId: Optional[str] = None
|
|
self._lastSttTime: float = 0.0
|
|
self._lastBotResponseText: Optional[str] = None
|
|
self._lastBotResponseTs: float = 0.0
|
|
|
|
# Speaker attribution: simple last-caption-speaker model
|
|
self._lastCaptionSpeaker: Optional[str] = None
|
|
self._unattributedTranscriptIds: List[str] = []
|
|
self._knownSpeakers: set = set()
|
|
|
|
# Debounced name trigger: wait for speaker to finish before AI analysis
|
|
self._pendingNameTrigger: Optional[Dict[str, Any]] = None
|
|
self._followUpWindowEnd: float = 0.0
|
|
|
|
# =========================================================================
|
|
# Session Lifecycle
|
|
# =========================================================================
|
|
|
|
async def joinMeeting(
|
|
self,
|
|
sessionId: str,
|
|
meetingLink: str,
|
|
connectionId: Optional[str] = None,
|
|
gatewayBaseUrl: str = "",
|
|
botAccountEmail: Optional[str] = None,
|
|
botAccountPassword: Optional[str] = None,
|
|
):
|
|
"""Send join command to the Browser Bot service.
|
|
|
|
The browser bot will:
|
|
1. Launch browser (headful if credentials provided, headless otherwise)
|
|
2. Navigate to Teams web app
|
|
3. Authenticate if credentials provided, otherwise join as anonymous guest
|
|
4. Enable captions/audio capture and start scraping
|
|
5. Connect back via WebSocket to send transcripts
|
|
"""
|
|
from . import interfaceFeatureTeamsbot as interfaceDb
|
|
|
|
interface = interfaceDb.getInterface(self.currentUser, self.mandateId, self.instanceId)
|
|
|
|
# Initialize SSE event queue
|
|
_sessionEvents[sessionId] = asyncio.Queue()
|
|
|
|
try:
|
|
# Update status to JOINING
|
|
interface.updateSession(sessionId, {"status": TeamsbotSessionStatus.JOINING.value})
|
|
await _emitSessionEvent(sessionId, "statusChange", {"status": "joining"})
|
|
|
|
# Send join command to browser bot
|
|
session = interface.getSession(sessionId)
|
|
if not session:
|
|
raise ValueError(f"Session {sessionId} not found")
|
|
|
|
# Build the full WebSocket URL for the bot to connect back to this gateway instance
|
|
# gatewayBaseUrl is passed from the route handler (derived from request.base_url)
|
|
wsScheme = "wss" if gatewayBaseUrl.startswith("https") else "ws"
|
|
gatewayHost = gatewayBaseUrl.replace("https://", "").replace("http://", "").rstrip("/")
|
|
fullGatewayWsUrl = f"{wsScheme}://{gatewayHost}/api/teamsbot/{self.instanceId}/bot/ws/{sessionId}"
|
|
|
|
hasAuth = bool(botAccountEmail and botAccountPassword)
|
|
logger.info(f"Joining meeting for session {sessionId}: auth={hasAuth}, email={botAccountEmail or 'N/A'}, transferMode={self.config.transferMode}")
|
|
|
|
result = await self.browserBotConnector.joinMeeting(
|
|
sessionId=sessionId,
|
|
meetingUrl=meetingLink,
|
|
botName=session.get("botName", self.config.botName),
|
|
instanceId=self.instanceId,
|
|
gatewayWsUrl=fullGatewayWsUrl,
|
|
language=self.config.language,
|
|
botAccountEmail=botAccountEmail,
|
|
botAccountPassword=botAccountPassword,
|
|
transferMode=self.config.transferMode if hasattr(self.config, 'transferMode') else "auto",
|
|
debugMode=self.config.debugMode if hasattr(self.config, 'debugMode') else False,
|
|
)
|
|
|
|
if result.get("success"):
|
|
interface.updateSession(sessionId, {
|
|
"status": TeamsbotSessionStatus.JOINING.value, # Will become ACTIVE when bot connects via WS
|
|
})
|
|
logger.info(f"Browser bot deployment started for session {sessionId}")
|
|
else:
|
|
errorMsg = result.get("error", "Unknown error joining meeting")
|
|
interface.updateSession(sessionId, {
|
|
"status": TeamsbotSessionStatus.ERROR.value,
|
|
"errorMessage": errorMsg,
|
|
})
|
|
await _emitSessionEvent(sessionId, "statusChange", {"status": "error", "errorMessage": errorMsg})
|
|
logger.error(f"Failed to deploy browser bot for session {sessionId}: {errorMsg}")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error joining meeting for session {sessionId}: {e}")
|
|
interface.updateSession(sessionId, {
|
|
"status": TeamsbotSessionStatus.ERROR.value,
|
|
"errorMessage": str(e),
|
|
})
|
|
await _emitSessionEvent(sessionId, "statusChange", {"status": "error", "errorMessage": str(e)})
|
|
|
|
async def leaveMeeting(self, sessionId: str):
|
|
"""Send leave command to the Browser Bot service."""
|
|
from . import interfaceFeatureTeamsbot as interfaceDb
|
|
|
|
interface = interfaceDb.getInterface(self.currentUser, self.mandateId, self.instanceId)
|
|
|
|
try:
|
|
interface.updateSession(sessionId, {"status": TeamsbotSessionStatus.LEAVING.value})
|
|
await _emitSessionEvent(sessionId, "statusChange", {"status": "leaving"})
|
|
|
|
await self.browserBotConnector.leaveMeeting(sessionId)
|
|
|
|
interface.updateSession(sessionId, {
|
|
"status": TeamsbotSessionStatus.ENDED.value,
|
|
"endedAt": getIsoTimestamp(),
|
|
})
|
|
await _emitSessionEvent(sessionId, "statusChange", {"status": "ended"})
|
|
|
|
# Generate meeting summary in background
|
|
asyncio.create_task(self._generateMeetingSummary(sessionId))
|
|
|
|
logger.info(f"Bot left meeting for session {sessionId}")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error leaving meeting for session {sessionId}: {e}")
|
|
interface.updateSession(sessionId, {
|
|
"status": TeamsbotSessionStatus.ERROR.value,
|
|
"errorMessage": str(e),
|
|
"endedAt": getIsoTimestamp(),
|
|
})
|
|
|
|
# Cleanup event queue
|
|
_sessionEvents.pop(sessionId, None)
|
|
|
|
# =========================================================================
|
|
# Browser Bot WebSocket Communication
|
|
# =========================================================================
|
|
|
|
async def handleBotWebSocket(self, websocket: WebSocket, sessionId: str):
|
|
"""
|
|
Main WebSocket handler for Browser Bot communication.
|
|
|
|
Receives:
|
|
- transcript: Caption text scraped from Teams
|
|
- status: Bot state changes (joined, in_lobby, left, error)
|
|
|
|
Sends:
|
|
- playAudio: TTS audio for the bot to play in the meeting
|
|
"""
|
|
from . import interfaceFeatureTeamsbot as interfaceDb
|
|
from modules.interfaces.interfaceVoiceObjects import getVoiceInterface
|
|
|
|
interface = interfaceDb.getInterface(self.currentUser, self.mandateId, self.instanceId)
|
|
voiceInterface = getVoiceInterface(self.currentUser, self.mandateId)
|
|
|
|
# Load session context (user-provided background knowledge)
|
|
# If the context is long (>500 chars), summarize it to reduce token usage
|
|
session = interface.getSession(sessionId)
|
|
if session:
|
|
rawContext = session.get("sessionContext")
|
|
if rawContext and len(rawContext) > 500:
|
|
logger.info(f"Session {sessionId}: Summarizing long session context ({len(rawContext)} chars)...")
|
|
self._sessionContext = await self._summarizeSessionContext(sessionId, rawContext)
|
|
elif rawContext:
|
|
self._sessionContext = rawContext
|
|
if self._sessionContext:
|
|
logger.info(f"Session {sessionId}: Session context ready ({len(self._sessionContext)} chars)")
|
|
|
|
# Resolve system bot email for speaker detection (prevents bot from triggering AI on own speech)
|
|
try:
|
|
systemBot = interface.getActiveSystemBot(self.mandateId)
|
|
self._botAccountEmail = systemBot.get("email") if systemBot else None
|
|
if self._botAccountEmail:
|
|
logger.info(f"Session {sessionId}: Bot account email resolved: {self._botAccountEmail}")
|
|
except Exception:
|
|
self._botAccountEmail = None
|
|
|
|
logger.info(f"[WS] Handler started for session {sessionId}")
|
|
|
|
try:
|
|
msgCount = 0
|
|
while True:
|
|
data = await websocket.receive()
|
|
msgCount += 1
|
|
|
|
if "text" not in data:
|
|
logger.debug(f"[WS] session={sessionId} msg #{msgCount}: non-text data (keys: {list(data.keys())})")
|
|
continue
|
|
|
|
message = json.loads(data["text"])
|
|
msgType = message.get("type")
|
|
if msgType not in ("audioChunk", "ping"):
|
|
logger.info(f"[WS] session={sessionId} msg #{msgCount}: type={msgType}")
|
|
|
|
if msgType == "transcript":
|
|
transcript = message.get("transcript", {})
|
|
source = transcript.get("source", "caption")
|
|
speaker = transcript.get("speaker", "Unknown")
|
|
textPreview = (transcript.get("text", "") or "")[:60]
|
|
# Caption/speakerHint: name resolution only; transcript comes from STT
|
|
logger.info(f"[WS] Transcript (source={source}, speaker={speaker}): {textPreview}...")
|
|
await self._processTranscript(
|
|
sessionId=sessionId,
|
|
speaker=transcript.get("speaker", "Unknown"),
|
|
text=transcript.get("text", ""),
|
|
isFinal=transcript.get("isFinal", True),
|
|
interface=interface,
|
|
voiceInterface=voiceInterface,
|
|
websocket=websocket,
|
|
source=source,
|
|
)
|
|
|
|
elif msgType == "chatMessage":
|
|
chat = message.get("chat", {})
|
|
isHistory = chat.get("isHistory", False)
|
|
source = "chatHistory" if isHistory else "chat"
|
|
logger.info(
|
|
f"[WS] Chat{'[HISTORY]' if isHistory else ''}: "
|
|
f"speaker={chat.get('speaker')}, text={chat.get('text', '')[:60]}..."
|
|
)
|
|
await self._processTranscript(
|
|
sessionId=sessionId,
|
|
speaker=chat.get("speaker", "Unknown"),
|
|
text=chat.get("text", ""),
|
|
isFinal=True,
|
|
interface=interface,
|
|
voiceInterface=voiceInterface,
|
|
websocket=websocket,
|
|
source=source,
|
|
)
|
|
|
|
elif msgType == "status":
|
|
status = message.get("status")
|
|
errorMessage = message.get("message")
|
|
logger.info(f"[WS] Status: status={status}, message={errorMessage}")
|
|
await self._handleBotStatus(sessionId, status, errorMessage, interface)
|
|
|
|
elif msgType == "audioChunk":
|
|
audioData = message.get("audio", {})
|
|
audioBase64 = audioData.get("data", "")
|
|
sampleRate = audioData.get("sampleRate", 16000)
|
|
captureDiagnostics = audioData.get("captureDiagnostics") or {}
|
|
if audioBase64:
|
|
await self._processAudioChunk(
|
|
sessionId=sessionId,
|
|
audioBase64=audioBase64,
|
|
sampleRate=sampleRate,
|
|
captureDiagnostics=captureDiagnostics,
|
|
interface=interface,
|
|
voiceInterface=voiceInterface,
|
|
websocket=websocket,
|
|
)
|
|
|
|
elif msgType == "voiceGreeting":
|
|
greetingText = message.get("text", "")
|
|
greetingLang = message.get("language", self.config.language)
|
|
logger.info(f"[WS] Voice greeting: text={greetingText[:60]}..., language={greetingLang}")
|
|
if greetingText and voiceInterface:
|
|
try:
|
|
ttsResult = await voiceInterface.textToSpeech(
|
|
text=greetingText,
|
|
languageCode=greetingLang,
|
|
voiceName=self.config.voiceId
|
|
)
|
|
if ttsResult and isinstance(ttsResult, dict):
|
|
audioContent = ttsResult.get("audioContent")
|
|
if audioContent:
|
|
await websocket.send_text(json.dumps({
|
|
"type": "playAudio",
|
|
"sessionId": sessionId,
|
|
"audio": {
|
|
"data": base64.b64encode(audioContent if isinstance(audioContent, bytes) else audioContent.encode()).decode(),
|
|
"format": "mp3",
|
|
}
|
|
}))
|
|
logger.info(f"Voice greeting TTS sent for session {sessionId}")
|
|
except Exception as ttsErr:
|
|
logger.warning(f"Voice greeting TTS failed for session {sessionId}: {ttsErr}")
|
|
|
|
elif msgType == "ping":
|
|
await websocket.send_text(json.dumps({"type": "pong"}))
|
|
|
|
elif msgType == "ttsPlaybackAck":
|
|
playback = message.get("playback", {}) or {}
|
|
status = playback.get("status", "unknown")
|
|
ackMessage = playback.get("message") or "Bot playback status update"
|
|
logger.info(
|
|
f"[WS] TTS playback ack: status={status}, format={playback.get('format')}, "
|
|
f"bytesBase64={playback.get('bytesBase64')}"
|
|
)
|
|
await _emitSessionEvent(sessionId, "ttsDeliveryStatus", {
|
|
"status": f"playback_{status}",
|
|
"hasWebSocket": True,
|
|
"message": ackMessage,
|
|
"timestamp": playback.get("timestamp") or getIsoTimestamp(),
|
|
"format": playback.get("format"),
|
|
"bytesBase64": playback.get("bytesBase64"),
|
|
})
|
|
|
|
elif msgType == "mfaChallenge":
|
|
mfaData = message.get("mfa", {})
|
|
mfaType = mfaData.get("type", "unknown")
|
|
displayNumber = mfaData.get("displayNumber")
|
|
prompt = mfaData.get("prompt", "")
|
|
logger.info(f"[WS] MFA challenge: type={mfaType}, number={displayNumber}, prompt={prompt[:60]}")
|
|
|
|
await _emitSessionEvent(sessionId, "mfaChallenge", {
|
|
"mfaType": mfaType,
|
|
"displayNumber": displayNumber,
|
|
"prompt": prompt,
|
|
"timestamp": getIsoTimestamp(),
|
|
})
|
|
|
|
from .routeFeatureTeamsbot import _mfaCodeQueues, _mfaWaitTasks
|
|
mfaQueue = asyncio.Queue()
|
|
_mfaCodeQueues[sessionId] = mfaQueue
|
|
|
|
async def _waitAndForwardMfa(sid, queue, ws):
|
|
try:
|
|
mfaResponse = await asyncio.wait_for(queue.get(), timeout=120.0)
|
|
logger.info(f"[WS] MFA response received for session {sid}: action={mfaResponse.get('action')}")
|
|
await ws.send_text(json.dumps({
|
|
"type": "mfaResponse",
|
|
"sessionId": sid,
|
|
"mfa": mfaResponse,
|
|
}))
|
|
except asyncio.TimeoutError:
|
|
logger.warning(f"[WS] MFA response timeout for session {sid}")
|
|
await ws.send_text(json.dumps({
|
|
"type": "mfaResponse",
|
|
"sessionId": sid,
|
|
"mfa": {"action": "timeout"},
|
|
}))
|
|
await _emitSessionEvent(sid, "mfaChallenge", {
|
|
"mfaType": "timeout",
|
|
"prompt": "MFA-Zeitlimit ueberschritten. Bitte erneut versuchen.",
|
|
})
|
|
except asyncio.CancelledError:
|
|
logger.info(f"[WS] MFA wait cancelled for session {sid} (resolved via page)")
|
|
finally:
|
|
_mfaCodeQueues.pop(sid, None)
|
|
_mfaWaitTasks.pop(sid, None)
|
|
|
|
_mfaWaitTasks[sessionId] = asyncio.create_task(
|
|
_waitAndForwardMfa(sessionId, mfaQueue, websocket)
|
|
)
|
|
|
|
elif msgType == "mfaResolved":
|
|
success = message.get("success", False)
|
|
logger.info(f"[WS] MFA resolved: success={success}")
|
|
from .routeFeatureTeamsbot import _mfaCodeQueues, _mfaWaitTasks
|
|
task = _mfaWaitTasks.pop(sessionId, None)
|
|
if task and not task.done():
|
|
task.cancel()
|
|
_mfaCodeQueues.pop(sessionId, None)
|
|
await _emitSessionEvent(sessionId, "mfaResolved", {
|
|
"success": success,
|
|
"timestamp": getIsoTimestamp(),
|
|
})
|
|
|
|
except Exception as e:
|
|
if "disconnect" not in str(e).lower():
|
|
logger.error(f"[WS] Error for session {sessionId}: {type(e).__name__}: {e}")
|
|
|
|
logger.info(f"[WS] Handler ended for session {sessionId} after {msgCount} messages")
|
|
|
|
async def _handleBotStatus(
|
|
self,
|
|
sessionId: str,
|
|
status: str,
|
|
errorMessage: Optional[str],
|
|
interface,
|
|
):
|
|
"""Handle status updates from the browser bot."""
|
|
logger.info(f"Bot status update for session {sessionId}: {status}")
|
|
|
|
statusMap = {
|
|
"connecting": TeamsbotSessionStatus.JOINING.value,
|
|
"launching": TeamsbotSessionStatus.JOINING.value,
|
|
"navigating": TeamsbotSessionStatus.JOINING.value,
|
|
"in_lobby": TeamsbotSessionStatus.JOINING.value,
|
|
"joined": TeamsbotSessionStatus.ACTIVE.value,
|
|
"in_meeting": TeamsbotSessionStatus.ACTIVE.value,
|
|
"left": TeamsbotSessionStatus.ENDED.value,
|
|
"error": TeamsbotSessionStatus.ERROR.value,
|
|
}
|
|
|
|
dbStatus = statusMap.get(status, TeamsbotSessionStatus.ACTIVE.value)
|
|
|
|
updates = {"status": dbStatus}
|
|
if errorMessage:
|
|
updates["errorMessage"] = errorMessage
|
|
if dbStatus == TeamsbotSessionStatus.ACTIVE.value:
|
|
updates["startedAt"] = getIsoTimestamp()
|
|
elif dbStatus in [TeamsbotSessionStatus.ENDED.value, TeamsbotSessionStatus.ERROR.value]:
|
|
updates["endedAt"] = getIsoTimestamp()
|
|
|
|
interface.updateSession(sessionId, updates)
|
|
await _emitSessionEvent(sessionId, "statusChange", {"status": status, "errorMessage": errorMessage})
|
|
|
|
# Generate summary when session ends
|
|
if dbStatus == TeamsbotSessionStatus.ENDED.value:
|
|
asyncio.create_task(self._generateMeetingSummary(sessionId))
|
|
|
|
async def _processAudioChunk(
|
|
self,
|
|
sessionId: str,
|
|
audioBase64: str,
|
|
sampleRate: int,
|
|
captureDiagnostics: Optional[Dict[str, Any]],
|
|
interface,
|
|
voiceInterface,
|
|
websocket: WebSocket,
|
|
):
|
|
"""Process an audio chunk from WebRTC capture — run STT and feed into transcript pipeline."""
|
|
import base64
|
|
try:
|
|
audioBytes = base64.b64decode(audioBase64)
|
|
if len(audioBytes) < 1000:
|
|
return
|
|
|
|
if captureDiagnostics:
|
|
trackId = captureDiagnostics.get("trackId")
|
|
readyState = captureDiagnostics.get("readyState")
|
|
rms = captureDiagnostics.get("rms")
|
|
nativeSampleRate = captureDiagnostics.get("nativeSampleRate")
|
|
logger.debug(
|
|
f"[AudioChunk] diagnostics: track={trackId}, readyState={readyState}, "
|
|
f"rms={rms}, nativeRate={nativeSampleRate}, bytes={len(audioBytes)}"
|
|
)
|
|
|
|
# Use RMS from capture diagnostics to skip real silence.
|
|
# Byte-variation heuristics produced false positives and dropped valid speech.
|
|
if captureDiagnostics and captureDiagnostics.get("rms") is not None:
|
|
try:
|
|
rmsVal = float(captureDiagnostics.get("rms"))
|
|
if rmsVal < 0.0003:
|
|
logger.debug(f"[AudioChunk] Skipping silent audio ({len(audioBytes)} bytes, rms={rmsVal:.6f})")
|
|
return
|
|
except Exception:
|
|
pass
|
|
|
|
if not voiceInterface:
|
|
logger.warning(f"[AudioChunk] No voice interface available for session {sessionId}")
|
|
return
|
|
|
|
# Treat sampleRate=0 as unknown (triggers auto-detection)
|
|
effectiveSampleRate = sampleRate if sampleRate and sampleRate > 0 else None
|
|
|
|
phraseHints = list(self._knownSpeakers)
|
|
if self.config.botName:
|
|
phraseHints.append(self.config.botName)
|
|
|
|
sttResult = await voiceInterface.speechToText(
|
|
audioContent=audioBytes,
|
|
language=self.config.language or "de-DE",
|
|
sampleRate=effectiveSampleRate,
|
|
channels=1,
|
|
skipFallbacks=True,
|
|
phraseHints=phraseHints if phraseHints else None,
|
|
alternativeLanguages=["en-US"],
|
|
)
|
|
|
|
if sttResult and sttResult.get("success") and sttResult.get("text"):
|
|
text = sttResult["text"].strip()
|
|
if text:
|
|
resolvedSpeaker = self._resolveSpeakerForAudioCapture()
|
|
fromCaption = resolvedSpeaker.get("speakerResolvedFromHint", False)
|
|
logger.info(
|
|
f"[AudioChunk] STT result: speaker={resolvedSpeaker.get('speaker', 'Meeting Audio')} "
|
|
f"(fromCaption={fromCaption}), text={text[:80]}..."
|
|
)
|
|
await self._processTranscript(
|
|
sessionId=sessionId,
|
|
speaker=resolvedSpeaker["speaker"],
|
|
text=text,
|
|
isFinal=True,
|
|
interface=interface,
|
|
voiceInterface=voiceInterface,
|
|
websocket=websocket,
|
|
source="audioCapture",
|
|
speakerResolvedFromHint=resolvedSpeaker["speakerResolvedFromHint"],
|
|
)
|
|
except Exception as e:
|
|
logger.error(f"[AudioChunk] STT error for session {sessionId}: {type(e).__name__}: {e}")
|
|
|
|
def _registerSpeakerHint(self, speaker: str, text: str, sessionId: str = ""):
|
|
"""Track current speaker from captions for STT attribution.
|
|
When the first non-bot caption arrives, retroactively attributes
|
|
any STT segments that were created before a speaker was known."""
|
|
if not speaker:
|
|
return
|
|
normalizedSpeaker = speaker.strip()
|
|
if not normalizedSpeaker or self._isBotSpeaker(normalizedSpeaker):
|
|
return
|
|
|
|
prevSpeaker = self._lastCaptionSpeaker
|
|
self._lastCaptionSpeaker = normalizedSpeaker
|
|
self._knownSpeakers.add(normalizedSpeaker)
|
|
|
|
if prevSpeaker is None and self._unattributedTranscriptIds:
|
|
from . import interfaceFeatureTeamsbot as interfaceDb
|
|
interface = interfaceDb.getInterface(self.currentUser, self.mandateId, self.instanceId)
|
|
for tid in self._unattributedTranscriptIds:
|
|
interface.updateTranscript(tid, {"speaker": normalizedSpeaker})
|
|
for seg in self._contextBuffer:
|
|
if seg.get("speaker") == "Unknown" and seg.get("source") == "audioCapture":
|
|
seg["speaker"] = normalizedSpeaker
|
|
if self._lastTranscriptSpeaker == "Unknown":
|
|
self._lastTranscriptSpeaker = normalizedSpeaker
|
|
logger.info(
|
|
f"Session {sessionId}: Retroactive speaker attribution: "
|
|
f"{len(self._unattributedTranscriptIds)} segments -> {normalizedSpeaker}"
|
|
)
|
|
self._unattributedTranscriptIds.clear()
|
|
|
|
if self._pendingNameTrigger:
|
|
self._pendingNameTrigger["lastActivity"] = time.time()
|
|
|
|
def _resolveSpeakerForAudioCapture(self) -> Dict[str, Any]:
|
|
"""Speaker name for audio chunks — uses the last caption speaker."""
|
|
if self._lastCaptionSpeaker:
|
|
return {"speaker": self._lastCaptionSpeaker, "speakerResolvedFromHint": True}
|
|
return {"speaker": "Unknown", "speakerResolvedFromHint": False}
|
|
|
|
async def _processTranscript(
|
|
self,
|
|
sessionId: str,
|
|
speaker: str,
|
|
text: str,
|
|
isFinal: bool,
|
|
interface,
|
|
voiceInterface,
|
|
websocket: WebSocket,
|
|
source: str = "caption",
|
|
speakerResolvedFromHint: Optional[bool] = None,
|
|
):
|
|
"""Process a transcript segment from captions or chat messages.
|
|
|
|
Differential writing: When the same speaker continues (text grows
|
|
incrementally as captions stream), we UPDATE the existing DB record
|
|
instead of creating a cascade of near-duplicate rows. A new record
|
|
is only created when the speaker changes or the text is not a
|
|
continuation of the previous segment.
|
|
"""
|
|
|
|
text = text.strip()
|
|
if not text:
|
|
return
|
|
|
|
# Captions are used ONLY for speaker name resolution (never as transcript).
|
|
# Transcript text comes exclusively from audio STT or chat.
|
|
# Address detection (bot name in caption) still triggers AI analysis
|
|
# using existing audio-based context — but caption text itself is NOT
|
|
# added to the context buffer.
|
|
if source in ("caption", "speakerHint"):
|
|
self._registerSpeakerHint(speaker, text, sessionId)
|
|
|
|
if (
|
|
source == "speakerHint"
|
|
and isFinal
|
|
and not self._isBotSpeaker(speaker)
|
|
and self.config.responseMode != TeamsbotResponseMode.TRANSCRIBE_ONLY
|
|
and self._detectBotName(text)
|
|
):
|
|
triggerTranscript = {"id": None, "speaker": speaker, "text": text, "source": source}
|
|
isNew = self._setPendingNameTrigger(sessionId, interface, voiceInterface, websocket, triggerTranscript)
|
|
if isNew:
|
|
logger.info(f"Session {sessionId}: Bot name in caption, debounce trigger started")
|
|
asyncio.create_task(self._checkPendingNameTrigger())
|
|
return
|
|
|
|
# Chat history: messages sent before the bot joined the meeting.
|
|
# Stored in DB for reference but NOT added to the AI context buffer,
|
|
# because old messages (e.g. "nyla, summarize the protocol") would
|
|
# be treated as current requests when AI analysis is triggered.
|
|
if source == "chatHistory":
|
|
transcriptData = TeamsbotTranscript(
|
|
sessionId=sessionId,
|
|
speaker=speaker,
|
|
text=text,
|
|
timestamp=getIsoTimestamp(),
|
|
confidence=1.0,
|
|
language=self.config.language,
|
|
isFinal=True,
|
|
source="chatHistory",
|
|
).model_dump()
|
|
createdTranscript = interface.createTranscript(transcriptData)
|
|
|
|
await _emitSessionEvent(sessionId, "transcript", {
|
|
"id": createdTranscript.get("id"),
|
|
"speaker": speaker,
|
|
"text": text,
|
|
"confidence": 1.0,
|
|
"timestamp": getIsoTimestamp(),
|
|
"isContinuation": False,
|
|
"source": "chatHistory",
|
|
"isHistory": True,
|
|
})
|
|
logger.debug(f"Session {sessionId}: Chat history stored (no AI trigger): [{speaker}] {text[:60]}")
|
|
return
|
|
|
|
# Filter out the bot's own speech (caption/audioCapture) — garbled text
|
|
# pollutes context. Chat from the bot is clean text and must appear in
|
|
# the transcript for all participants.
|
|
isBotSpeaker = self._isBotSpeaker(speaker)
|
|
if isBotSpeaker and source != "chat":
|
|
logger.debug(f"Session {sessionId}: Ignoring own bot caption from: [{speaker}] {text[:80]}...")
|
|
return
|
|
|
|
# Differential transcript writing:
|
|
# audioCapture from same speaker → append text (merge STT chunks into one block)
|
|
# Start a new block after a pause (>5s gap between STT results)
|
|
sttPauseThreshold = 5.0
|
|
isMerge = (
|
|
source == "audioCapture"
|
|
and self._lastTranscriptSpeaker == speaker
|
|
and self._lastTranscriptText is not None
|
|
and self._lastTranscriptId is not None
|
|
and (time.time() - self._lastSttTime) < sttPauseThreshold
|
|
)
|
|
|
|
if isMerge:
|
|
mergedText = f"{self._lastTranscriptText} {text}"
|
|
interface.updateTranscript(self._lastTranscriptId, {
|
|
"text": mergedText,
|
|
"isFinal": isFinal,
|
|
})
|
|
self._lastTranscriptText = mergedText
|
|
createdTranscript = {"id": self._lastTranscriptId}
|
|
|
|
if self._contextBuffer and self._contextBuffer[-1].get("speaker") == speaker:
|
|
self._contextBuffer[-1]["text"] = mergedText
|
|
else:
|
|
transcriptData = TeamsbotTranscript(
|
|
sessionId=sessionId,
|
|
speaker=speaker,
|
|
text=text,
|
|
timestamp=getIsoTimestamp(),
|
|
confidence=1.0,
|
|
language=self.config.language,
|
|
isFinal=isFinal,
|
|
source=source,
|
|
).model_dump()
|
|
|
|
createdTranscript = interface.createTranscript(transcriptData)
|
|
|
|
self._lastTranscriptSpeaker = speaker
|
|
self._lastTranscriptText = text
|
|
self._lastTranscriptId = createdTranscript.get("id")
|
|
|
|
if source == "audioCapture" and speaker == "Unknown":
|
|
self._unattributedTranscriptIds.append(createdTranscript.get("id"))
|
|
|
|
self._contextBuffer.append({
|
|
"speaker": speaker or "Unknown",
|
|
"text": text,
|
|
"timestamp": getUtcTimestamp(),
|
|
"source": source,
|
|
})
|
|
|
|
maxSegments = self.config.contextWindowSegments
|
|
if len(self._contextBuffer) > maxSegments:
|
|
if not self._contextSummary and len(self._contextBuffer) > maxSegments * 1.5:
|
|
asyncio.create_task(self._summarizeContextBuffer(sessionId))
|
|
self._contextBuffer = self._contextBuffer[-maxSegments:]
|
|
|
|
session = interface.getSession(sessionId)
|
|
if session:
|
|
count = session.get("transcriptSegmentCount", 0) + 1
|
|
interface.updateSession(sessionId, {"transcriptSegmentCount": count})
|
|
|
|
if source == "audioCapture":
|
|
self._lastSttTime = time.time()
|
|
|
|
displayText = self._lastTranscriptText if isMerge else text
|
|
await _emitSessionEvent(sessionId, "transcript", {
|
|
"id": createdTranscript.get("id"),
|
|
"speaker": speaker,
|
|
"text": displayText,
|
|
"confidence": 1.0,
|
|
"timestamp": getIsoTimestamp(),
|
|
"isContinuation": isMerge,
|
|
"source": source,
|
|
"speakerResolvedFromHint": (
|
|
speakerResolvedFromHint
|
|
if speakerResolvedFromHint is not None
|
|
else False
|
|
),
|
|
})
|
|
|
|
if not isFinal:
|
|
return
|
|
|
|
if self.config.responseMode == TeamsbotResponseMode.TRANSCRIBE_ONLY:
|
|
return
|
|
|
|
# Bot's own chat: stored for display only, never trigger AI
|
|
if source == "chat" and isBotSpeaker:
|
|
return
|
|
|
|
# Stop phrases: trigger immediately without debounce (root cause: 3s debounce delayed stop)
|
|
if self._isStopPhrase(text):
|
|
logger.info(f"Session {sessionId}: Stop phrase detected, triggering analysis immediately")
|
|
await self._analyzeAndRespond(sessionId, interface, voiceInterface, websocket, createdTranscript)
|
|
return
|
|
|
|
# Update activity for any pending debounced trigger
|
|
if self._pendingNameTrigger:
|
|
self._pendingNameTrigger["lastActivity"] = time.time()
|
|
|
|
# Bot name detection → debounced trigger (wait for speaker to finish)
|
|
if self._detectBotName(text):
|
|
isNew = self._setPendingNameTrigger(sessionId, interface, voiceInterface, websocket, createdTranscript)
|
|
if isNew:
|
|
asyncio.create_task(self._checkPendingNameTrigger())
|
|
return
|
|
|
|
# Follow-up window: after a bot response, trigger AI for any human speech
|
|
# without requiring the bot name — the AI decides via shouldRespond
|
|
if (
|
|
source == "audioCapture"
|
|
and not self._isBotSpeaker(speaker)
|
|
and time.time() < self._followUpWindowEnd
|
|
and not self._pendingNameTrigger
|
|
):
|
|
isNew = self._setPendingNameTrigger(sessionId, interface, voiceInterface, websocket, createdTranscript)
|
|
if isNew:
|
|
logger.info(f"Session {sessionId}: Follow-up window trigger (no name needed)")
|
|
asyncio.create_task(self._checkPendingNameTrigger())
|
|
return
|
|
|
|
# Periodic trigger (only when no debounce pending)
|
|
if not self._pendingNameTrigger:
|
|
shouldTrigger = self._shouldTriggerAnalysis(text)
|
|
if shouldTrigger:
|
|
logger.info(f"Session {sessionId}: Periodic trigger (buffer: {len(self._contextBuffer)} segments)")
|
|
await self._analyzeAndRespond(sessionId, interface, voiceInterface, websocket, createdTranscript)
|
|
|
|
def _isBotSpeaker(self, speaker: str) -> bool:
|
|
"""Check if a transcript speaker is the bot itself.
|
|
|
|
Teams captions show the bot as e.g. "BotName (Unverified)" or
|
|
"Nyla Larsson" depending on auth/anonymous join. We match against:
|
|
- The configured/derived bot name
|
|
- The bot account display name if authenticated
|
|
"""
|
|
if not speaker:
|
|
return False
|
|
|
|
speakerLower = speaker.lower().strip()
|
|
|
|
# Match against configured bot name
|
|
botName = self.config.botName.lower().strip()
|
|
if botName and botName in speakerLower:
|
|
return True
|
|
|
|
# Match against bot account email prefix (e.g. "nyla.larsson" from "nyla.larsson@poweron.swiss")
|
|
botAccountEmail = getattr(self, '_botAccountEmail', None) or getattr(self.config, 'botAccountEmail', None)
|
|
if botAccountEmail:
|
|
emailPrefix = botAccountEmail.split("@")[0].lower().replace(".", " ")
|
|
if emailPrefix in speakerLower:
|
|
return True
|
|
|
|
return False
|
|
|
|
def _shouldTriggerAnalysis(self, transcriptText: str, allowPeriodic: bool = True) -> bool:
|
|
"""
|
|
Decide whether to trigger AI analysis based on the latest transcript.
|
|
Bot name detection is handled separately via debounce.
|
|
This method only checks periodic/cooldown triggers.
|
|
"""
|
|
now = time.time()
|
|
timeSinceLastCall = now - self._lastAiCallTime
|
|
|
|
if timeSinceLastCall < self.config.triggerCooldownSeconds:
|
|
return False
|
|
|
|
if allowPeriodic and timeSinceLastCall >= self.config.triggerIntervalSeconds:
|
|
logger.info(f"Trigger: Periodic interval ({self.config.triggerIntervalSeconds}s) elapsed ({timeSinceLastCall:.1f}s)")
|
|
return True
|
|
|
|
return False
|
|
|
|
def _isStopPhrase(self, text: str) -> bool:
|
|
"""Check if text is a stop command (stop, halt, be quiet, etc.). Triggers immediate analysis."""
|
|
if not text or len(text.strip()) < 2:
|
|
return False
|
|
t = text.strip().lower()
|
|
words = [w.strip(".,!?:;\"'()[]") for w in t.split() if w.strip()]
|
|
wordSet = set(words)
|
|
stopWords = {"stop", "stopp", "halt", "ruhe", "stille", "schweig", "arrete", "quiet", "shut"}
|
|
if wordSet & stopWords:
|
|
return True
|
|
if "sei still" in t or "be quiet" in t or "shut up" in t or "aufhoeren" in t or "aufhören" in t:
|
|
return True
|
|
return False
|
|
|
|
def _detectBotName(self, text: str) -> bool:
|
|
"""Check if text contains the bot's name (exact or phonetically similar)."""
|
|
botNameLower = self.config.botName.lower()
|
|
textLower = text.lower()
|
|
|
|
if botNameLower in textLower:
|
|
return True
|
|
|
|
botFirstName = botNameLower.split()[0] if " " in botNameLower else botNameLower
|
|
if len(botFirstName) >= 3:
|
|
for word in textLower.split():
|
|
cleanWord = word.strip(".,!?:;\"'()[]")
|
|
if not cleanWord or len(cleanWord) < 3:
|
|
continue
|
|
if cleanWord == botFirstName:
|
|
return True
|
|
if cleanWord[0] == botFirstName[0] and abs(len(cleanWord) - len(botFirstName)) <= 2:
|
|
common = sum(1 for c in set(botFirstName) if c in cleanWord)
|
|
similarity = common / max(len(set(botFirstName)), len(set(cleanWord)))
|
|
if similarity >= 0.6:
|
|
return True
|
|
return False
|
|
|
|
def _setPendingNameTrigger(self, sessionId, interface, voiceInterface, websocket, triggerTranscript) -> bool:
|
|
"""Set or update a debounced name trigger. Returns True if newly set."""
|
|
if self._pendingNameTrigger:
|
|
self._pendingNameTrigger["lastActivity"] = time.time()
|
|
return False
|
|
self._pendingNameTrigger = {
|
|
"sessionId": sessionId,
|
|
"interface": interface,
|
|
"voiceInterface": voiceInterface,
|
|
"websocket": websocket,
|
|
"triggerTranscript": triggerTranscript,
|
|
"detectedAt": time.time(),
|
|
"lastActivity": time.time(),
|
|
}
|
|
return True
|
|
|
|
async def _checkPendingNameTrigger(self, delaySec: float = 3.0):
|
|
"""Async loop: fire the pending name trigger once the speaker is quiet."""
|
|
await asyncio.sleep(delaySec)
|
|
if not self._pendingNameTrigger:
|
|
return
|
|
|
|
now = time.time()
|
|
lastActivity = self._pendingNameTrigger.get("lastActivity", 0)
|
|
detectedAt = self._pendingNameTrigger.get("detectedAt", 0)
|
|
quietSec = now - lastActivity
|
|
totalWaitSec = now - detectedAt
|
|
|
|
if quietSec >= 3.0 or totalWaitSec >= 15.0:
|
|
trigger = self._pendingNameTrigger
|
|
self._pendingNameTrigger = None
|
|
logger.info(
|
|
f"Session {trigger['sessionId']}: Debounced name trigger fires "
|
|
f"(quiet={quietSec:.1f}s, totalWait={totalWaitSec:.1f}s)"
|
|
)
|
|
await self._analyzeAndRespond(
|
|
trigger["sessionId"],
|
|
trigger["interface"],
|
|
trigger["voiceInterface"],
|
|
trigger["websocket"],
|
|
trigger["triggerTranscript"],
|
|
)
|
|
else:
|
|
remaining = max(0.5, 3.0 - quietSec)
|
|
asyncio.create_task(self._checkPendingNameTrigger(remaining))
|
|
|
|
async def _analyzeAndRespond(
|
|
self,
|
|
sessionId: str,
|
|
interface,
|
|
voiceInterface,
|
|
websocket: WebSocket,
|
|
triggerTranscript: Dict[str, Any],
|
|
):
|
|
"""Run SPEECH_TEAMS AI analysis and respond if needed."""
|
|
if self._aiAnalysisInProgress:
|
|
logger.info(f"Session {sessionId}: AI analysis already in progress, skipping duplicate trigger")
|
|
return
|
|
self._aiAnalysisInProgress = True
|
|
self._lastAiCallTime = time.time()
|
|
|
|
# Build transcript context from buffer.
|
|
# Mark bot's own utterances and chat messages for the AI.
|
|
contextLines = []
|
|
for segment in self._contextBuffer:
|
|
speaker = segment.get("speaker", "Unknown")
|
|
text = segment.get("text", "")
|
|
segSource = segment.get("source", "caption")
|
|
prefix = "Chat" if segSource == "chat" else ""
|
|
if self._isBotSpeaker(speaker):
|
|
contextLines.append(f"[YOU ({self.config.botName})]: {text}")
|
|
elif prefix:
|
|
contextLines.append(f"[{prefix}: {speaker}]: {text}")
|
|
else:
|
|
contextLines.append(f"[{speaker}]: {text}")
|
|
|
|
# Include session context if provided by the user at session start
|
|
sessionContextStr = ""
|
|
if self._sessionContext:
|
|
sessionContextStr = f"\nSESSION_CONTEXT (background knowledge provided by the user):\n{self._sessionContext}\n"
|
|
|
|
# Include summary of earlier conversation if available
|
|
summaryStr = ""
|
|
if self._contextSummary:
|
|
summaryStr = f"\nEARLIER_CONVERSATION_SUMMARY:\n{self._contextSummary}\n"
|
|
|
|
transcriptContext = f"BOT_NAME:{self.config.botName}{sessionContextStr}{summaryStr}\nRECENT_TRANSCRIPT:\n" + "\n".join(contextLines)
|
|
|
|
# Call SPEECH_TEAMS
|
|
try:
|
|
from modules.services.serviceAi.mainServiceAi import AiService
|
|
|
|
# Create minimal service context for AI billing
|
|
serviceContext = _ServiceContext(self.currentUser, self.mandateId, self.instanceId)
|
|
aiService = AiService(serviceCenter=serviceContext)
|
|
await aiService.ensureAiObjectsInitialized()
|
|
|
|
request = AiCallRequest(
|
|
prompt=self.config.aiSystemPrompt,
|
|
context=transcriptContext,
|
|
options=AiCallOptions(
|
|
operationType=OperationTypeEnum.SPEECH_TEAMS,
|
|
priority=PriorityEnum.SPEED,
|
|
)
|
|
)
|
|
|
|
response = await aiService.callAi(request)
|
|
|
|
# Parse structured response
|
|
try:
|
|
speechResult = SpeechTeamsResponse.model_validate_json(response.content)
|
|
except Exception:
|
|
# Try to extract JSON from response content
|
|
try:
|
|
jsonStr = response.content
|
|
if "```json" in jsonStr:
|
|
jsonStr = jsonStr.split("```json")[1].split("```")[0]
|
|
elif "```" in jsonStr:
|
|
jsonStr = jsonStr.split("```")[1].split("```")[0]
|
|
speechResult = SpeechTeamsResponse.model_validate_json(jsonStr.strip())
|
|
except Exception as parseErr:
|
|
logger.warning(f"Failed to parse SPEECH_TEAMS response: {parseErr}")
|
|
speechResult = SpeechTeamsResponse(
|
|
shouldRespond=False,
|
|
reasoning=f"Parse error: {str(parseErr)[:100]}",
|
|
detectedIntent="none"
|
|
)
|
|
|
|
logger.info(
|
|
f"SPEECH_TEAMS result: shouldRespond={speechResult.shouldRespond}, "
|
|
f"intent={speechResult.detectedIntent}, "
|
|
f"reasoning={speechResult.reasoning[:80]}..."
|
|
)
|
|
|
|
# Emit analysis event (always, for debug/UI)
|
|
await _emitSessionEvent(sessionId, "analysis", {
|
|
"shouldRespond": speechResult.shouldRespond,
|
|
"detectedIntent": speechResult.detectedIntent,
|
|
"reasoning": speechResult.reasoning,
|
|
"modelName": response.modelName,
|
|
"processingTime": response.processingTime,
|
|
"priceCHF": response.priceCHF,
|
|
})
|
|
|
|
# Step 4a: Handle STOP intent -- stop audio immediately
|
|
if speechResult.detectedIntent == "stop":
|
|
logger.info(f"Session {sessionId}: AI detected STOP intent: {speechResult.reasoning}")
|
|
if websocket:
|
|
try:
|
|
await websocket.send_text(json.dumps({
|
|
"type": "stopAudio",
|
|
"sessionId": sessionId,
|
|
}))
|
|
except Exception as stopErr:
|
|
logger.warning(f"Failed to send stop command: {stopErr}")
|
|
return
|
|
|
|
# Step 4b: Respond if AI decided to
|
|
if speechResult.shouldRespond and speechResult.responseText:
|
|
|
|
if self.config.responseMode == TeamsbotResponseMode.MANUAL:
|
|
# In manual mode, suggest but don't send
|
|
await _emitSessionEvent(sessionId, "suggestedResponse", {
|
|
"responseText": speechResult.responseText,
|
|
"detectedIntent": speechResult.detectedIntent,
|
|
"reasoning": speechResult.reasoning,
|
|
})
|
|
return
|
|
|
|
# Determine response channel: per-request (AI) overrides config
|
|
channels = speechResult.responseChannels
|
|
if channels and isinstance(channels, list):
|
|
channelStr = ",".join(str(c).lower().strip() for c in channels)
|
|
sendVoice = "voice" in channelStr
|
|
sendChat = "chat" in channelStr
|
|
logger.info(f"Response channel (from AI): voice={sendVoice}, chat={sendChat}")
|
|
else:
|
|
channelRaw = self.config.responseChannel
|
|
channelStr = (channelRaw.value if hasattr(channelRaw, 'value') else str(channelRaw)).lower().strip()
|
|
sendVoice = channelStr in ("voice", "both")
|
|
sendChat = channelStr in ("chat", "both")
|
|
logger.info(f"Response channel (from config): '{channelStr}'")
|
|
|
|
if sendVoice and sendChat:
|
|
responseType = TeamsbotResponseType.BOTH
|
|
elif sendVoice:
|
|
responseType = TeamsbotResponseType.AUDIO
|
|
else:
|
|
responseType = TeamsbotResponseType.CHAT
|
|
|
|
# Suppress duplicate responses in short windows ("repeat loop" protection).
|
|
canonicalText = (
|
|
speechResult.responseText
|
|
or speechResult.responseTextForVoice
|
|
or speechResult.responseTextForChat
|
|
or ""
|
|
)
|
|
normalizedResponse = (canonicalText or "").strip().lower()
|
|
nowTs = time.time()
|
|
if (
|
|
normalizedResponse
|
|
and self._lastBotResponseText == normalizedResponse
|
|
and (nowTs - self._lastBotResponseTs) < 90
|
|
):
|
|
logger.info(f"Session {sessionId}: Suppressing duplicate bot response within 90s window")
|
|
await _emitSessionEvent(sessionId, "analysis", {
|
|
"shouldRespond": False,
|
|
"detectedIntent": speechResult.detectedIntent,
|
|
"reasoning": "Suppressed duplicate response within 90s",
|
|
"modelName": response.modelName,
|
|
"processingTime": response.processingTime,
|
|
"priceCHF": response.priceCHF,
|
|
})
|
|
return
|
|
|
|
# Resolve text per channel (AI can send different content to voice vs chat)
|
|
textForVoice = speechResult.responseTextForVoice or speechResult.responseText
|
|
textForChat = speechResult.responseTextForChat or speechResult.responseText
|
|
storedText = textForChat or textForVoice or speechResult.responseText
|
|
|
|
# 4a: Voice response (TTS -> Audio to bot)
|
|
if sendVoice and textForVoice:
|
|
try:
|
|
await _emitSessionEvent(sessionId, "ttsDeliveryStatus", {
|
|
"status": "requested",
|
|
"hasWebSocket": websocket is not None,
|
|
"message": "TTS generation requested",
|
|
"timestamp": getIsoTimestamp(),
|
|
})
|
|
logger.info(
|
|
f"Session {sessionId}: TTS requested (websocket_available={websocket is not None})"
|
|
)
|
|
ttsResult = await voiceInterface.textToSpeech(
|
|
text=textForVoice,
|
|
languageCode=self.config.language,
|
|
voiceName=self.config.voiceId
|
|
)
|
|
|
|
if not ttsResult or not isinstance(ttsResult, dict):
|
|
raise RuntimeError("TTS returned invalid result payload")
|
|
|
|
if ttsResult.get("success") is False:
|
|
raise RuntimeError(f"TTS backend error: {ttsResult.get('error', 'unknown')}")
|
|
|
|
audioContent = ttsResult.get("audioContent")
|
|
if not audioContent:
|
|
raise RuntimeError("TTS returned no audioContent")
|
|
|
|
if websocket:
|
|
await websocket.send_text(json.dumps({
|
|
"type": "playAudio",
|
|
"sessionId": sessionId,
|
|
"audio": {
|
|
"data": base64.b64encode(audioContent if isinstance(audioContent, bytes) else audioContent.encode()).decode(),
|
|
"format": "mp3",
|
|
},
|
|
}))
|
|
logger.info(f"Session {sessionId}: TTS audio dispatched to bot")
|
|
await _emitSessionEvent(sessionId, "ttsDeliveryStatus", {
|
|
"status": "dispatched",
|
|
"hasWebSocket": True,
|
|
"message": "TTS audio dispatched to bot",
|
|
"timestamp": getIsoTimestamp(),
|
|
})
|
|
else:
|
|
logger.warning(
|
|
f"Session {sessionId}: TTS audio generated but cannot be played (bot websocket unavailable, likely fallback mode)"
|
|
)
|
|
await _emitSessionEvent(sessionId, "ttsDeliveryStatus", {
|
|
"status": "unavailable",
|
|
"hasWebSocket": False,
|
|
"message": "TTS audio generated but bot websocket unavailable",
|
|
"timestamp": getIsoTimestamp(),
|
|
})
|
|
except Exception as ttsErr:
|
|
logger.warning(f"TTS failed for session {sessionId}: {ttsErr}")
|
|
await _emitSessionEvent(sessionId, "ttsDeliveryStatus", {
|
|
"status": "failed",
|
|
"hasWebSocket": websocket is not None,
|
|
"message": str(ttsErr),
|
|
"timestamp": getIsoTimestamp(),
|
|
})
|
|
if not sendChat:
|
|
sendChat = True # Fallback to chat if voice-only and TTS failed
|
|
|
|
# 4b: Chat response (send text message to meeting chat)
|
|
if sendChat and textForChat:
|
|
try:
|
|
if websocket:
|
|
await websocket.send_text(json.dumps({
|
|
"type": "sendChatMessage",
|
|
"sessionId": sessionId,
|
|
"text": textForChat,
|
|
}))
|
|
logger.info(f"Chat response sent for session {sessionId}")
|
|
except Exception as chatErr:
|
|
logger.warning(f"Chat message send failed for session {sessionId}: {chatErr}")
|
|
|
|
# 4b: Store bot response
|
|
botResponseData = TeamsbotBotResponse(
|
|
sessionId=sessionId,
|
|
responseText=storedText,
|
|
responseType=responseType,
|
|
detectedIntent=speechResult.detectedIntent,
|
|
reasoning=speechResult.reasoning,
|
|
triggeredByTranscriptId=triggerTranscript.get("id"),
|
|
modelName=response.modelName,
|
|
processingTime=response.processingTime,
|
|
priceCHF=response.priceCHF,
|
|
timestamp=getIsoTimestamp(),
|
|
).model_dump()
|
|
|
|
createdResponse = interface.createBotResponse(botResponseData)
|
|
|
|
# 4c: Emit SSE event
|
|
await _emitSessionEvent(sessionId, "botResponse", {
|
|
"id": createdResponse.get("id"),
|
|
"responseText": storedText,
|
|
"responseType": responseType.value,
|
|
"detectedIntent": speechResult.detectedIntent,
|
|
"reasoning": speechResult.reasoning,
|
|
"modelName": response.modelName,
|
|
"processingTime": response.processingTime,
|
|
"priceCHF": response.priceCHF,
|
|
"timestamp": botResponseData.get("timestamp"),
|
|
})
|
|
|
|
# Update session response count
|
|
session = interface.getSession(sessionId)
|
|
if session:
|
|
count = session.get("botResponseCount", 0) + 1
|
|
interface.updateSession(sessionId, {"botResponseCount": count})
|
|
|
|
self._lastBotResponseText = normalizedResponse
|
|
self._lastBotResponseTs = nowTs
|
|
|
|
# Record bot response in transcript (exactly once, regardless of channel)
|
|
botTranscriptData = TeamsbotTranscript(
|
|
sessionId=sessionId,
|
|
speaker=self.config.botName,
|
|
text=storedText,
|
|
timestamp=getIsoTimestamp(),
|
|
confidence=1.0,
|
|
language=self.config.language,
|
|
isFinal=True,
|
|
).model_dump()
|
|
botTranscript = interface.createTranscript(botTranscriptData)
|
|
|
|
self._contextBuffer.append({
|
|
"speaker": self.config.botName,
|
|
"text": storedText,
|
|
"timestamp": getUtcTimestamp(),
|
|
"source": "botResponse",
|
|
})
|
|
|
|
await _emitSessionEvent(sessionId, "transcript", {
|
|
"id": botTranscript.get("id"),
|
|
"speaker": self.config.botName,
|
|
"text": storedText,
|
|
"confidence": 1.0,
|
|
"timestamp": getIsoTimestamp(),
|
|
"isContinuation": False,
|
|
"source": "botResponse",
|
|
"speakerResolvedFromHint": False,
|
|
})
|
|
|
|
# Reset differential writing tracker so next STT creates a new block
|
|
self._lastTranscriptSpeaker = self.config.botName
|
|
self._lastTranscriptText = storedText
|
|
self._lastTranscriptId = botTranscript.get("id")
|
|
|
|
self._followUpWindowEnd = time.time() + 15.0
|
|
logger.info(f"Bot responded in session {sessionId}: intent={speechResult.detectedIntent}, follow-up window open for 15s")
|
|
|
|
# Step 5: Execute AI-issued commands (if any)
|
|
if speechResult.commands:
|
|
await self._executeCommands(sessionId, speechResult.commands, voiceInterface, websocket)
|
|
|
|
except Exception as e:
|
|
logger.error(f"SPEECH_TEAMS analysis failed for session {sessionId}: {type(e).__name__}: {e}", exc_info=True)
|
|
await _emitSessionEvent(sessionId, "error", {"message": f"AI analysis failed: {type(e).__name__}: {str(e)}"})
|
|
finally:
|
|
self._aiAnalysisInProgress = False
|
|
|
|
# =========================================================================
|
|
# AI Command Execution
|
|
# =========================================================================
|
|
|
|
async def _executeCommands(
|
|
self,
|
|
sessionId: str,
|
|
commands: List[TeamsbotCommand],
|
|
voiceInterface,
|
|
websocket: WebSocket,
|
|
):
|
|
"""Execute structured commands returned by the AI.
|
|
Each command is dispatched to a dedicated handler function."""
|
|
for cmd in commands:
|
|
action = cmd.action
|
|
params = cmd.params or {}
|
|
logger.info(f"Session {sessionId}: Executing command '{action}' with params {params}")
|
|
try:
|
|
if action == "toggleTranscript":
|
|
await self._cmdToggleTranscript(sessionId, params, websocket)
|
|
elif action == "toggleChat":
|
|
await self._cmdToggleChat(sessionId, params, websocket)
|
|
elif action == "sendChat":
|
|
await self._cmdSendChat(sessionId, params, websocket)
|
|
elif action == "readChat":
|
|
await self._cmdReadChat(sessionId, params, voiceInterface, websocket)
|
|
elif action == "readAloud":
|
|
await self._cmdReadAloud(sessionId, params, voiceInterface, websocket)
|
|
elif action == "changeLanguage":
|
|
await self._cmdChangeLanguage(sessionId, params)
|
|
elif action in ("toggleMic", "toggleCamera"):
|
|
await self._cmdToggleMicOrCamera(sessionId, action, params, websocket)
|
|
elif action == "sendMail":
|
|
await self._cmdSendMail(sessionId, params)
|
|
elif action == "storeDocument":
|
|
await self._cmdStoreDocument(sessionId, params)
|
|
else:
|
|
logger.warning(f"Session {sessionId}: Unknown command '{action}'")
|
|
except Exception as cmdErr:
|
|
logger.warning(f"Session {sessionId}: Command '{action}' failed: {cmdErr}")
|
|
|
|
async def _cmdToggleTranscript(self, sessionId: str, params: dict, websocket: WebSocket):
|
|
"""Caption on/off - toggle Teams live transcript capture."""
|
|
enable = params.get("enable", True)
|
|
if websocket:
|
|
await websocket.send_text(json.dumps({
|
|
"type": "botCommand",
|
|
"sessionId": sessionId,
|
|
"command": "toggleTranscript",
|
|
"params": {"enable": enable},
|
|
}))
|
|
|
|
async def _cmdToggleChat(self, sessionId: str, params: dict, websocket: WebSocket):
|
|
"""Chat on/off - enable/disable meeting chat monitoring."""
|
|
enable = params.get("enable", True)
|
|
if websocket:
|
|
await websocket.send_text(json.dumps({
|
|
"type": "botCommand",
|
|
"sessionId": sessionId,
|
|
"command": "toggleChat",
|
|
"params": {"enable": enable},
|
|
}))
|
|
|
|
async def _cmdSendChat(self, sessionId: str, params: dict, websocket: WebSocket):
|
|
"""Send a message to the meeting chat."""
|
|
chatText = params.get("text", "")
|
|
if chatText and websocket:
|
|
await websocket.send_text(json.dumps({
|
|
"type": "sendChatMessage",
|
|
"sessionId": sessionId,
|
|
"text": chatText,
|
|
}))
|
|
|
|
async def _cmdReadChat(
|
|
self,
|
|
sessionId: str,
|
|
params: dict,
|
|
voiceInterface,
|
|
websocket: WebSocket,
|
|
):
|
|
"""Read chat messages (from DB) with optional fromdatetime/todatetime, then speak or send to chat."""
|
|
from . import interfaceFeatureTeamsbot as interfaceDb
|
|
interface = interfaceDb.getInterface(self.currentUser, self.mandateId, self.instanceId)
|
|
transcripts = interface.getTranscripts(sessionId)
|
|
fromDt = params.get("fromdatetime") or params.get("fromDateTime")
|
|
toDt = params.get("todatetime") or params.get("toDateTime")
|
|
chatOnly = [t for t in transcripts if t.get("source") in ("chat", "chatHistory")]
|
|
if fromDt:
|
|
chatOnly = [t for t in chatOnly if (t.get("timestamp") or "") >= fromDt]
|
|
if toDt:
|
|
chatOnly = [t for t in chatOnly if (t.get("timestamp") or "") <= toDt]
|
|
summary = "\n".join(f"[{t.get('speaker', '?')}]: {t.get('text', '')}" for t in chatOnly[-20:])
|
|
if not summary:
|
|
summary = "Keine Chat-Nachrichten im angegebenen Zeitraum."
|
|
if voiceInterface and websocket:
|
|
ttsResult = await voiceInterface.textToSpeech(
|
|
text=summary[:2000],
|
|
languageCode=self.config.language,
|
|
voiceName=self.config.voiceId,
|
|
)
|
|
if ttsResult and isinstance(ttsResult, dict) and ttsResult.get("audioContent"):
|
|
audioContent = ttsResult["audioContent"]
|
|
await websocket.send_text(json.dumps({
|
|
"type": "playAudio",
|
|
"sessionId": sessionId,
|
|
"audio": {
|
|
"data": base64.b64encode(
|
|
audioContent if isinstance(audioContent, bytes) else audioContent.encode()
|
|
).decode(),
|
|
"format": "mp3",
|
|
},
|
|
}))
|
|
|
|
async def _cmdReadAloud(
|
|
self,
|
|
sessionId: str,
|
|
params: dict,
|
|
voiceInterface,
|
|
websocket: WebSocket,
|
|
):
|
|
"""Read text aloud via TTS and play in meeting."""
|
|
readText = params.get("text", "")
|
|
if readText and voiceInterface:
|
|
ttsResult = await voiceInterface.textToSpeech(
|
|
text=readText,
|
|
languageCode=self.config.language,
|
|
voiceName=self.config.voiceId,
|
|
)
|
|
if ttsResult and isinstance(ttsResult, dict):
|
|
audioContent = ttsResult.get("audioContent")
|
|
if audioContent and websocket:
|
|
await websocket.send_text(json.dumps({
|
|
"type": "playAudio",
|
|
"sessionId": sessionId,
|
|
"audio": {
|
|
"data": base64.b64encode(
|
|
audioContent if isinstance(audioContent, bytes) else audioContent.encode()
|
|
).decode(),
|
|
"format": "mp3",
|
|
},
|
|
}))
|
|
|
|
async def _cmdChangeLanguage(self, sessionId: str, params: dict):
|
|
"""Change bot language."""
|
|
newLang = params.get("language", "")
|
|
if newLang:
|
|
self.config = self.config.model_copy(update={"language": newLang})
|
|
logger.info(f"Session {sessionId}: Language changed to '{newLang}'")
|
|
await _emitSessionEvent(sessionId, "languageChanged", {"language": newLang})
|
|
|
|
async def _cmdToggleMicOrCamera(
|
|
self,
|
|
sessionId: str,
|
|
action: str,
|
|
params: dict,
|
|
websocket: WebSocket,
|
|
):
|
|
"""Toggle mic or camera in the meeting."""
|
|
if websocket:
|
|
await websocket.send_text(json.dumps({
|
|
"type": "botCommand",
|
|
"sessionId": sessionId,
|
|
"command": action,
|
|
"params": params,
|
|
}))
|
|
|
|
async def _cmdSendMail(self, sessionId: str, params: dict):
|
|
"""Send email via Service Center MessagingService."""
|
|
recipient = params.get("recipient") or params.get("to", "")
|
|
subject = params.get("subject", "")
|
|
message = params.get("message") or params.get("body", "")
|
|
if not recipient or not subject:
|
|
logger.warning(f"Session {sessionId}: sendMail requires recipient and subject")
|
|
return
|
|
try:
|
|
from modules.serviceCenter import ServiceCenterContext, getService
|
|
ctx = ServiceCenterContext(
|
|
user=self.currentUser,
|
|
mandate_id=self.mandateId,
|
|
feature_instance_id=self.instanceId,
|
|
)
|
|
messaging = getService("messaging", ctx)
|
|
success = messaging.sendEmailDirect(
|
|
recipient=recipient,
|
|
subject=subject,
|
|
message=message,
|
|
userId=str(self.currentUser.id) if self.currentUser else None,
|
|
)
|
|
if success:
|
|
logger.info(f"Session {sessionId}: Email sent to {recipient}")
|
|
else:
|
|
logger.warning(f"Session {sessionId}: Email send failed for {recipient}")
|
|
except Exception as e:
|
|
logger.warning(f"Session {sessionId}: sendMail failed: {e}")
|
|
|
|
async def _cmdStoreDocument(self, sessionId: str, params: dict):
|
|
"""Store document via Service Center SharepointService."""
|
|
sitePath = params.get("sitePath") or params.get("site", "")
|
|
folderPath = params.get("folderPath") or params.get("folder", "")
|
|
fileName = params.get("fileName", "document.txt")
|
|
content = params.get("content", "")
|
|
if isinstance(content, str):
|
|
content = content.encode("utf-8")
|
|
if not sitePath or not folderPath:
|
|
logger.warning(f"Session {sessionId}: storeDocument requires sitePath and folderPath")
|
|
return
|
|
try:
|
|
from modules.serviceCenter import ServiceCenterContext, getService
|
|
ctx = ServiceCenterContext(
|
|
user=self.currentUser,
|
|
mandate_id=self.mandateId,
|
|
feature_instance_id=self.instanceId,
|
|
)
|
|
sharepoint = getService("sharepoint", ctx)
|
|
if not sharepoint.setAccessTokenFromConnection(self.currentUser):
|
|
logger.warning(f"Session {sessionId}: SharePoint connection not configured")
|
|
return
|
|
site = await sharepoint.getSiteByStandardPath(sitePath)
|
|
if not site:
|
|
logger.warning(f"Session {sessionId}: SharePoint site not found: {sitePath}")
|
|
return
|
|
result = await sharepoint.uploadFile(
|
|
siteId=site["id"],
|
|
folderPath=folderPath,
|
|
fileName=fileName,
|
|
content=content,
|
|
)
|
|
if "error" in result:
|
|
logger.warning(f"Session {sessionId}: storeDocument failed: {result['error']}")
|
|
else:
|
|
logger.info(f"Session {sessionId}: Document stored: {fileName}")
|
|
except Exception as e:
|
|
logger.warning(f"Session {sessionId}: storeDocument failed: {e}")
|
|
|
|
# =========================================================================
|
|
# Context Summarization (for long sessions)
|
|
# =========================================================================
|
|
|
|
async def _summarizeSessionContext(self, sessionId: str, rawContext: str) -> str:
|
|
"""Summarize a long user-provided session context to its essential points.
|
|
This reduces token usage in every subsequent AI call."""
|
|
try:
|
|
from modules.services.serviceAi.mainServiceAi import AiService
|
|
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum
|
|
|
|
serviceContext = _ServiceContext(self.currentUser, self.mandateId, self.instanceId)
|
|
aiService = AiService(serviceCenter=serviceContext)
|
|
await aiService.ensureAiObjectsInitialized()
|
|
|
|
request = AiCallRequest(
|
|
prompt=(
|
|
"Fasse den folgenden Kontext auf die wesentlichen Punkte zusammen. "
|
|
"Behalte alle wichtigen Fakten, Namen, Zahlen, Entscheidungen und Aktionspunkte. "
|
|
"Entferne Fuelltext und Wiederholungen. "
|
|
"Antworte NUR mit der Zusammenfassung, keine Erklaerungen oder Einleitungen."
|
|
),
|
|
context=rawContext,
|
|
options=AiCallOptions(
|
|
operationType=OperationTypeEnum.DATA_ANALYSE,
|
|
priority=PriorityEnum.SPEED,
|
|
)
|
|
)
|
|
|
|
response = await aiService.callAi(request)
|
|
if response and response.errorCount == 0 and response.content:
|
|
summary = response.content.strip()
|
|
logger.info(f"Session {sessionId}: Context summarized from {len(rawContext)} to {len(summary)} chars")
|
|
return summary
|
|
except Exception as e:
|
|
logger.warning(f"Session context summarization failed for {sessionId}: {e}")
|
|
|
|
# Fallback: return original (truncated if very long)
|
|
return rawContext[:2000] if len(rawContext) > 2000 else rawContext
|
|
|
|
async def _summarizeContextBuffer(self, sessionId: str):
|
|
"""Summarize the older part of the context buffer to preserve information
|
|
without exceeding the context window. This runs in the background."""
|
|
try:
|
|
if self._contextSummary:
|
|
return # Already summarized recently
|
|
|
|
# Take the older half of the buffer for summarization
|
|
halfPoint = len(self._contextBuffer) // 2
|
|
oldSegments = self._contextBuffer[:halfPoint]
|
|
|
|
if len(oldSegments) < 10:
|
|
return # Not enough to summarize
|
|
|
|
# Build text to summarize
|
|
lines = []
|
|
for seg in oldSegments:
|
|
speaker = seg.get("speaker", "Unknown")
|
|
text = seg.get("text", "")
|
|
lines.append(f"[{speaker}]: {text}")
|
|
textToSummarize = "\n".join(lines)
|
|
|
|
from modules.services.serviceAi.mainServiceAi import AiService
|
|
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum
|
|
|
|
serviceContext = _ServiceContext(self.currentUser, self.mandateId, self.instanceId)
|
|
aiService = AiService(serviceCenter=serviceContext)
|
|
await aiService.ensureAiObjectsInitialized()
|
|
|
|
request = AiCallRequest(
|
|
prompt="Fasse das folgende Meeting-Transkript in 3-5 Saetzen zusammen. Nenne die wichtigsten Themen, Entscheidungen und offene Fragen. Antworte NUR mit der Zusammenfassung, keine Erklaerungen.",
|
|
context=textToSummarize,
|
|
options=AiCallOptions(
|
|
operationType=OperationTypeEnum.DATA_ANALYSE,
|
|
priority=PriorityEnum.SPEED,
|
|
)
|
|
)
|
|
|
|
response = await aiService.callAi(request)
|
|
if response and response.errorCount == 0:
|
|
self._contextSummary = response.content.strip()
|
|
logger.info(f"Session {sessionId}: Context summarized ({len(oldSegments)} segments -> {len(self._contextSummary)} chars)")
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Context summarization failed for session {sessionId}: {e}")
|
|
|
|
# =========================================================================
|
|
# Meeting Summary
|
|
# =========================================================================
|
|
|
|
async def _generateMeetingSummary(self, sessionId: str):
|
|
"""Generate an AI summary of the meeting after it ends."""
|
|
try:
|
|
from . import interfaceFeatureTeamsbot as interfaceDb
|
|
|
|
interface = interfaceDb.getInterface(self.currentUser, self.mandateId, self.instanceId)
|
|
transcripts = interface.getTranscripts(sessionId)
|
|
|
|
if not transcripts or len(transcripts) < 5:
|
|
return # Not enough content for a summary
|
|
|
|
# Build full transcript
|
|
fullTranscript = "\n".join(
|
|
f"[{t.get('speaker', 'Unknown')}]: {t.get('text', '')}"
|
|
for t in transcripts
|
|
)
|
|
|
|
from modules.services.serviceAi.mainServiceAi import AiService
|
|
|
|
serviceContext = _ServiceContext(self.currentUser, self.mandateId, self.instanceId)
|
|
aiService = AiService(serviceCenter=serviceContext)
|
|
await aiService.ensureAiObjectsInitialized()
|
|
|
|
request = AiCallRequest(
|
|
prompt="Erstelle eine kurze Zusammenfassung dieses Meeting-Transkripts. Nenne die wichtigsten Punkte, Entscheidungen und offene Aktionspunkte.",
|
|
context=fullTranscript,
|
|
options=AiCallOptions(
|
|
operationType=OperationTypeEnum.DATA_ANALYSE,
|
|
priority=PriorityEnum.BALANCED,
|
|
)
|
|
)
|
|
|
|
response = await aiService.callAi(request)
|
|
if response and response.errorCount == 0:
|
|
interface.updateSession(sessionId, {"summary": response.content})
|
|
logger.info(f"Meeting summary generated for session {sessionId}")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to generate meeting summary for session {sessionId}: {e}")
|