678 lines
29 KiB
Python
678 lines
29 KiB
Python
# Copyright (c) 2025 Patrick Motsch
|
|
# All rights reserved.
|
|
"""
|
|
Teamsbot Service - Pipeline Orchestrator.
|
|
Manages the audio processing pipeline: STT -> Context Buffer -> SPEECH_TEAMS -> TTS -> Bridge.
|
|
"""
|
|
|
|
import logging
|
|
import json
|
|
import asyncio
|
|
import time
|
|
import base64
|
|
from typing import Optional, Dict, Any, List
|
|
|
|
from fastapi import WebSocket
|
|
|
|
from modules.datamodels.datamodelUam import User
|
|
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum
|
|
from modules.shared.timeUtils import getUtcTimestamp, getIsoTimestamp
|
|
|
|
from .datamodelTeamsbot import (
|
|
TeamsbotSessionStatus,
|
|
TeamsbotTranscript,
|
|
TeamsbotBotResponse,
|
|
TeamsbotResponseType,
|
|
TeamsbotConfig,
|
|
TeamsbotResponseMode,
|
|
TeamsbotResponseChannel,
|
|
SpeechTeamsResponse,
|
|
)
|
|
from .browserBotConnector import BrowserBotConnector
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
# =========================================================================
|
|
# Minimal Service Context (for AI billing in bridge callbacks)
|
|
# =========================================================================
|
|
|
|
class _ServiceContext:
|
|
"""Minimal context providing user/mandate info for AiService billing.
|
|
Used by bridge callbacks where a full Services instance is not available."""
|
|
|
|
def __init__(self, user, mandateId, featureInstanceId=None):
|
|
self.user = user
|
|
self.mandateId = mandateId
|
|
self.featureInstanceId = featureInstanceId
|
|
self.featureCode = "teamsbot"
|
|
|
|
|
|
# =========================================================================
|
|
# Session Event Queues (for SSE streaming to frontend)
|
|
# =========================================================================
|
|
_sessionEvents: Dict[str, asyncio.Queue] = {}
|
|
|
|
|
|
async def _emitSessionEvent(sessionId: str, eventType: str, data: Any):
|
|
"""Emit an event to the session's SSE stream."""
|
|
eventQueue = _sessionEvents.get(sessionId)
|
|
if eventQueue:
|
|
await eventQueue.put({"type": eventType, "data": data, "timestamp": getIsoTimestamp()})
|
|
|
|
|
|
class TeamsbotService:
|
|
"""
|
|
Pipeline Orchestrator for Teams Bot sessions.
|
|
Coordinates VoiceObjects (STT/TTS), AiService (SPEECH_TEAMS), and Bridge communication.
|
|
"""
|
|
|
|
def __init__(self, currentUser: User, mandateId: str, instanceId: str, config: TeamsbotConfig):
|
|
self.currentUser = currentUser
|
|
self.mandateId = mandateId
|
|
self.instanceId = instanceId
|
|
self.config = config
|
|
self.browserBotConnector = BrowserBotConnector(config._getEffectiveBrowserBotUrl())
|
|
|
|
# State
|
|
self._lastAiCallTime: float = 0.0
|
|
self._contextBuffer: List[Dict[str, Any]] = []
|
|
|
|
# =========================================================================
|
|
# Session Lifecycle
|
|
# =========================================================================
|
|
|
|
async def joinMeeting(
|
|
self,
|
|
sessionId: str,
|
|
meetingLink: str,
|
|
connectionId: Optional[str] = None,
|
|
gatewayBaseUrl: str = "",
|
|
):
|
|
"""Send join command to the Browser Bot service.
|
|
|
|
The browser bot will:
|
|
1. Launch a headless browser
|
|
2. Navigate to Teams web app
|
|
3. Join the meeting as anonymous guest
|
|
4. Enable captions and start scraping
|
|
5. Connect back via WebSocket to send transcripts
|
|
"""
|
|
from . import interfaceFeatureTeamsbot as interfaceDb
|
|
|
|
interface = interfaceDb.getInterface(self.currentUser, self.mandateId, self.instanceId)
|
|
|
|
# Initialize SSE event queue
|
|
_sessionEvents[sessionId] = asyncio.Queue()
|
|
|
|
try:
|
|
# Update status to JOINING
|
|
interface.updateSession(sessionId, {"status": TeamsbotSessionStatus.JOINING.value})
|
|
await _emitSessionEvent(sessionId, "statusChange", {"status": "joining"})
|
|
|
|
# Send join command to browser bot
|
|
session = interface.getSession(sessionId)
|
|
if not session:
|
|
raise ValueError(f"Session {sessionId} not found")
|
|
|
|
# Build the full WebSocket URL for the bot to connect back to this gateway instance
|
|
# gatewayBaseUrl is passed from the route handler (derived from request.base_url)
|
|
wsScheme = "wss" if gatewayBaseUrl.startswith("https") else "ws"
|
|
gatewayHost = gatewayBaseUrl.replace("https://", "").replace("http://", "").rstrip("/")
|
|
fullGatewayWsUrl = f"{wsScheme}://{gatewayHost}/api/teamsbot/{self.instanceId}/bot/ws/{sessionId}"
|
|
|
|
result = await self.browserBotConnector.joinMeeting(
|
|
sessionId=sessionId,
|
|
meetingUrl=meetingLink,
|
|
botName=session.get("botName", self.config.botName),
|
|
instanceId=self.instanceId,
|
|
gatewayWsUrl=fullGatewayWsUrl,
|
|
language=self.config.language,
|
|
botAccountEmail=self.config.botAccountEmail,
|
|
botAccountPassword=self.config.botAccountPassword,
|
|
backgroundImageUrl=session.get("backgroundImageUrl") or self.config.backgroundImageUrl,
|
|
)
|
|
|
|
if result.get("success"):
|
|
interface.updateSession(sessionId, {
|
|
"status": TeamsbotSessionStatus.JOINING.value, # Will become ACTIVE when bot connects via WS
|
|
})
|
|
logger.info(f"Browser bot deployment started for session {sessionId}")
|
|
else:
|
|
errorMsg = result.get("error", "Unknown error joining meeting")
|
|
interface.updateSession(sessionId, {
|
|
"status": TeamsbotSessionStatus.ERROR.value,
|
|
"errorMessage": errorMsg,
|
|
})
|
|
await _emitSessionEvent(sessionId, "statusChange", {"status": "error", "errorMessage": errorMsg})
|
|
logger.error(f"Failed to deploy browser bot for session {sessionId}: {errorMsg}")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error joining meeting for session {sessionId}: {e}")
|
|
interface.updateSession(sessionId, {
|
|
"status": TeamsbotSessionStatus.ERROR.value,
|
|
"errorMessage": str(e),
|
|
})
|
|
await _emitSessionEvent(sessionId, "statusChange", {"status": "error", "errorMessage": str(e)})
|
|
|
|
async def leaveMeeting(self, sessionId: str):
|
|
"""Send leave command to the Browser Bot service."""
|
|
from . import interfaceFeatureTeamsbot as interfaceDb
|
|
|
|
interface = interfaceDb.getInterface(self.currentUser, self.mandateId, self.instanceId)
|
|
|
|
try:
|
|
interface.updateSession(sessionId, {"status": TeamsbotSessionStatus.LEAVING.value})
|
|
await _emitSessionEvent(sessionId, "statusChange", {"status": "leaving"})
|
|
|
|
await self.browserBotConnector.leaveMeeting(sessionId)
|
|
|
|
interface.updateSession(sessionId, {
|
|
"status": TeamsbotSessionStatus.ENDED.value,
|
|
"endedAt": getIsoTimestamp(),
|
|
})
|
|
await _emitSessionEvent(sessionId, "statusChange", {"status": "ended"})
|
|
|
|
# Generate meeting summary in background
|
|
asyncio.create_task(self._generateMeetingSummary(sessionId))
|
|
|
|
logger.info(f"Bot left meeting for session {sessionId}")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error leaving meeting for session {sessionId}: {e}")
|
|
interface.updateSession(sessionId, {
|
|
"status": TeamsbotSessionStatus.ERROR.value,
|
|
"errorMessage": str(e),
|
|
"endedAt": getIsoTimestamp(),
|
|
})
|
|
|
|
# Cleanup event queue
|
|
_sessionEvents.pop(sessionId, None)
|
|
|
|
# =========================================================================
|
|
# Browser Bot WebSocket Communication
|
|
# =========================================================================
|
|
|
|
async def handleBotWebSocket(self, websocket: WebSocket, sessionId: str):
|
|
"""
|
|
Main WebSocket handler for Browser Bot communication.
|
|
|
|
Receives:
|
|
- transcript: Caption text scraped from Teams
|
|
- status: Bot state changes (joined, in_lobby, left, error)
|
|
|
|
Sends:
|
|
- playAudio: TTS audio for the bot to play in the meeting
|
|
"""
|
|
from . import interfaceFeatureTeamsbot as interfaceDb
|
|
from modules.interfaces.interfaceVoiceObjects import getVoiceInterface
|
|
|
|
interface = interfaceDb.getInterface(self.currentUser, self.mandateId, self.instanceId)
|
|
voiceInterface = getVoiceInterface(self.currentUser, self.mandateId)
|
|
|
|
logger.info(f"[WS-DEBUG] WebSocket handler started for session {sessionId}")
|
|
|
|
try:
|
|
msgCount = 0
|
|
while True:
|
|
data = await websocket.receive()
|
|
msgCount += 1
|
|
|
|
if "text" not in data:
|
|
logger.debug(f"[WS-DEBUG] session={sessionId} msg #{msgCount}: non-text data received (keys: {list(data.keys())})")
|
|
continue
|
|
|
|
message = json.loads(data["text"])
|
|
msgType = message.get("type")
|
|
logger.info(f"[WS-DEBUG] session={sessionId} msg #{msgCount}: type={msgType}")
|
|
|
|
if msgType == "transcript":
|
|
transcript = message.get("transcript", {})
|
|
logger.info(f"[WS-DEBUG] Transcript received: speaker={transcript.get('speaker')}, text={transcript.get('text', '')[:60]}...")
|
|
await self._processTranscript(
|
|
sessionId=sessionId,
|
|
speaker=transcript.get("speaker", "Unknown"),
|
|
text=transcript.get("text", ""),
|
|
isFinal=transcript.get("isFinal", True),
|
|
interface=interface,
|
|
voiceInterface=voiceInterface,
|
|
websocket=websocket,
|
|
)
|
|
|
|
elif msgType == "chatMessage":
|
|
chat = message.get("chat", {})
|
|
logger.info(f"[WS-DEBUG] Chat message received: speaker={chat.get('speaker')}, text={chat.get('text', '')[:60]}...")
|
|
await self._processTranscript(
|
|
sessionId=sessionId,
|
|
speaker=chat.get("speaker", "Unknown"),
|
|
text=chat.get("text", ""),
|
|
isFinal=True,
|
|
interface=interface,
|
|
voiceInterface=voiceInterface,
|
|
websocket=websocket,
|
|
source="chat",
|
|
)
|
|
|
|
elif msgType == "status":
|
|
status = message.get("status")
|
|
errorMessage = message.get("message")
|
|
logger.info(f"[WS-DEBUG] Status received: status={status}, message={errorMessage}")
|
|
await self._handleBotStatus(sessionId, status, errorMessage, interface)
|
|
|
|
elif msgType == "ping":
|
|
await websocket.send_text(json.dumps({"type": "pong"}))
|
|
|
|
except Exception as e:
|
|
if "disconnect" not in str(e).lower():
|
|
logger.error(f"[WS-DEBUG] WebSocket error for session {sessionId}: {type(e).__name__}: {e}")
|
|
|
|
logger.info(f"[WS-DEBUG] WebSocket handler ended for session {sessionId} after {msgCount} messages")
|
|
|
|
async def _handleBotStatus(
|
|
self,
|
|
sessionId: str,
|
|
status: str,
|
|
errorMessage: Optional[str],
|
|
interface,
|
|
):
|
|
"""Handle status updates from the browser bot."""
|
|
logger.info(f"Bot status update for session {sessionId}: {status}")
|
|
|
|
statusMap = {
|
|
"connecting": TeamsbotSessionStatus.JOINING.value,
|
|
"launching": TeamsbotSessionStatus.JOINING.value,
|
|
"navigating": TeamsbotSessionStatus.JOINING.value,
|
|
"in_lobby": TeamsbotSessionStatus.JOINING.value,
|
|
"joined": TeamsbotSessionStatus.ACTIVE.value,
|
|
"in_meeting": TeamsbotSessionStatus.ACTIVE.value,
|
|
"left": TeamsbotSessionStatus.ENDED.value,
|
|
"error": TeamsbotSessionStatus.ERROR.value,
|
|
}
|
|
|
|
dbStatus = statusMap.get(status, TeamsbotSessionStatus.ACTIVE.value)
|
|
|
|
updates = {"status": dbStatus}
|
|
if errorMessage:
|
|
updates["errorMessage"] = errorMessage
|
|
if dbStatus == TeamsbotSessionStatus.ACTIVE.value:
|
|
updates["startedAt"] = getIsoTimestamp()
|
|
elif dbStatus in [TeamsbotSessionStatus.ENDED.value, TeamsbotSessionStatus.ERROR.value]:
|
|
updates["endedAt"] = getIsoTimestamp()
|
|
|
|
interface.updateSession(sessionId, updates)
|
|
await _emitSessionEvent(sessionId, "statusChange", {"status": status, "errorMessage": errorMessage})
|
|
|
|
# Generate summary when session ends
|
|
if dbStatus == TeamsbotSessionStatus.ENDED.value:
|
|
asyncio.create_task(self._generateMeetingSummary(sessionId))
|
|
|
|
async def _processTranscript(
|
|
self,
|
|
sessionId: str,
|
|
speaker: str,
|
|
text: str,
|
|
isFinal: bool,
|
|
interface,
|
|
voiceInterface,
|
|
websocket: WebSocket,
|
|
source: str = "caption",
|
|
):
|
|
"""Process a transcript segment from captions or chat messages."""
|
|
|
|
text = text.strip()
|
|
if not text:
|
|
return
|
|
|
|
# Filter out the bot's own speech from AI triggering.
|
|
# The bot hears itself via captions — these should be stored in the
|
|
# transcript for the record, but must NOT trigger AI analysis (feedback loop).
|
|
isBotSpeaker = self._isBotSpeaker(speaker)
|
|
|
|
# Store transcript segment
|
|
transcriptData = TeamsbotTranscript(
|
|
sessionId=sessionId,
|
|
speaker=speaker,
|
|
text=text,
|
|
timestamp=getIsoTimestamp(),
|
|
confidence=1.0, # Captions don't have confidence scores
|
|
language=self.config.language,
|
|
isFinal=isFinal,
|
|
).model_dump()
|
|
|
|
createdTranscript = interface.createTranscript(transcriptData)
|
|
|
|
# Update context buffer (mark source for chat messages)
|
|
self._contextBuffer.append({
|
|
"speaker": speaker or "Unknown",
|
|
"text": text,
|
|
"timestamp": getUtcTimestamp(),
|
|
"source": source,
|
|
})
|
|
# Keep only last N segments
|
|
maxSegments = self.config.contextWindowSegments
|
|
if len(self._contextBuffer) > maxSegments:
|
|
self._contextBuffer = self._contextBuffer[-maxSegments:]
|
|
|
|
# Emit SSE event for live transcript
|
|
await _emitSessionEvent(sessionId, "transcript", {
|
|
"id": createdTranscript.get("id"),
|
|
"speaker": speaker,
|
|
"text": text,
|
|
"confidence": 1.0,
|
|
"timestamp": getIsoTimestamp(),
|
|
})
|
|
|
|
# Update session transcript count
|
|
session = interface.getSession(sessionId)
|
|
if session:
|
|
count = session.get("transcriptSegmentCount", 0) + 1
|
|
interface.updateSession(sessionId, {"transcriptSegmentCount": count})
|
|
|
|
# Skip AI analysis for bot's own speech (prevents feedback loop)
|
|
if isBotSpeaker:
|
|
logger.debug(f"Session {sessionId}: Skipping AI trigger for bot's own speech: [{speaker}] {text[:60]}...")
|
|
return
|
|
|
|
# Check if AI analysis should be triggered (only for final transcripts)
|
|
if not isFinal:
|
|
return
|
|
|
|
if self.config.responseMode == TeamsbotResponseMode.TRANSCRIBE_ONLY:
|
|
logger.debug(f"Session {sessionId}: responseMode=TRANSCRIBE_ONLY, skipping AI analysis")
|
|
return
|
|
|
|
shouldTrigger = self._shouldTriggerAnalysis(text)
|
|
logger.debug(f"Session {sessionId}: shouldTriggerAnalysis={shouldTrigger}, bufferSize={len(self._contextBuffer)}, responseMode={self.config.responseMode}")
|
|
|
|
if not shouldTrigger:
|
|
return
|
|
|
|
# SPEECH_TEAMS AI analysis
|
|
logger.info(f"Session {sessionId}: Triggering AI analysis (buffer: {len(self._contextBuffer)} segments)")
|
|
await self._analyzeAndRespond(sessionId, interface, voiceInterface, websocket, createdTranscript)
|
|
|
|
def _isBotSpeaker(self, speaker: str) -> bool:
|
|
"""Check if a transcript speaker is the bot itself.
|
|
|
|
Teams captions show the bot as e.g. "Shelly Miller (Unverified)" or
|
|
"Nyla Larsson" depending on auth/anonymous join. We match against:
|
|
- The configured bot name (e.g. "Shelly Miller")
|
|
- The bot account display name if authenticated
|
|
"""
|
|
if not speaker:
|
|
return False
|
|
|
|
speakerLower = speaker.lower().strip()
|
|
|
|
# Match against configured bot name
|
|
botName = self.config.botName.lower().strip()
|
|
if botName and botName in speakerLower:
|
|
return True
|
|
|
|
# Match against bot account email prefix (e.g. "nyla.larsson" from "nyla.larsson@poweron.swiss")
|
|
if self.config.botAccountEmail:
|
|
emailPrefix = self.config.botAccountEmail.split("@")[0].lower().replace(".", " ")
|
|
if emailPrefix in speakerLower:
|
|
return True
|
|
|
|
return False
|
|
|
|
def _shouldTriggerAnalysis(self, transcriptText: str) -> bool:
|
|
"""
|
|
Decide whether to trigger AI analysis based on the latest transcript.
|
|
Triggers:
|
|
- Bot name mentioned (immediate)
|
|
- Periodic interval elapsed
|
|
- Cooldown respected
|
|
"""
|
|
now = time.time()
|
|
timeSinceLastCall = now - self._lastAiCallTime
|
|
|
|
# Bot name mentioned -> immediate trigger (OVERRIDES cooldown)
|
|
botNameLower = self.config.botName.lower()
|
|
if botNameLower in transcriptText.lower():
|
|
logger.info(f"Trigger: Bot name '{self.config.botName}' detected in transcript (overrides cooldown): '{transcriptText[:60]}...'")
|
|
return True
|
|
|
|
# Cooldown check (only for non-name triggers)
|
|
if timeSinceLastCall < self.config.triggerCooldownSeconds:
|
|
logger.debug(f"Trigger: Cooldown active ({timeSinceLastCall:.1f}s < {self.config.triggerCooldownSeconds}s)")
|
|
return False
|
|
|
|
# Periodic trigger
|
|
if timeSinceLastCall >= self.config.triggerIntervalSeconds:
|
|
logger.info(f"Trigger: Periodic interval ({self.config.triggerIntervalSeconds}s) elapsed ({timeSinceLastCall:.1f}s since last call)")
|
|
return True
|
|
|
|
logger.debug(f"Trigger: No trigger ({timeSinceLastCall:.1f}s / {self.config.triggerIntervalSeconds}s interval)")
|
|
return False
|
|
|
|
async def _analyzeAndRespond(
|
|
self,
|
|
sessionId: str,
|
|
interface,
|
|
voiceInterface,
|
|
websocket: WebSocket,
|
|
triggerTranscript: Dict[str, Any],
|
|
):
|
|
"""Run SPEECH_TEAMS AI analysis and respond if needed."""
|
|
self._lastAiCallTime = time.time()
|
|
|
|
# Build transcript context from buffer.
|
|
# Mark bot's own utterances and chat messages for the AI.
|
|
contextLines = []
|
|
for segment in self._contextBuffer:
|
|
speaker = segment.get("speaker", "Unknown")
|
|
text = segment.get("text", "")
|
|
segSource = segment.get("source", "caption")
|
|
prefix = "Chat" if segSource == "chat" else ""
|
|
if self._isBotSpeaker(speaker):
|
|
contextLines.append(f"[YOU ({self.config.botName})]: {text}")
|
|
elif prefix:
|
|
contextLines.append(f"[{prefix}: {speaker}]: {text}")
|
|
else:
|
|
contextLines.append(f"[{speaker}]: {text}")
|
|
|
|
transcriptContext = f"BOT_NAME:{self.config.botName}\n" + "\n".join(contextLines)
|
|
|
|
# Call SPEECH_TEAMS
|
|
try:
|
|
from modules.services.serviceAi.mainServiceAi import AiService
|
|
|
|
# Create minimal service context for AI billing
|
|
serviceContext = _ServiceContext(self.currentUser, self.mandateId, self.instanceId)
|
|
aiService = AiService(serviceCenter=serviceContext)
|
|
await aiService.ensureAiObjectsInitialized()
|
|
|
|
request = AiCallRequest(
|
|
prompt=self.config.aiSystemPrompt,
|
|
context=transcriptContext,
|
|
options=AiCallOptions(
|
|
operationType=OperationTypeEnum.SPEECH_TEAMS,
|
|
priority=PriorityEnum.SPEED,
|
|
)
|
|
)
|
|
|
|
response = await aiService.callAi(request)
|
|
|
|
# Parse structured response
|
|
try:
|
|
speechResult = SpeechTeamsResponse.model_validate_json(response.content)
|
|
except Exception:
|
|
# Try to extract JSON from response content
|
|
try:
|
|
jsonStr = response.content
|
|
if "```json" in jsonStr:
|
|
jsonStr = jsonStr.split("```json")[1].split("```")[0]
|
|
elif "```" in jsonStr:
|
|
jsonStr = jsonStr.split("```")[1].split("```")[0]
|
|
speechResult = SpeechTeamsResponse.model_validate_json(jsonStr.strip())
|
|
except Exception as parseErr:
|
|
logger.warning(f"Failed to parse SPEECH_TEAMS response: {parseErr}")
|
|
speechResult = SpeechTeamsResponse(
|
|
shouldRespond=False,
|
|
reasoning=f"Parse error: {str(parseErr)[:100]}",
|
|
detectedIntent="none"
|
|
)
|
|
|
|
logger.info(
|
|
f"SPEECH_TEAMS result: shouldRespond={speechResult.shouldRespond}, "
|
|
f"intent={speechResult.detectedIntent}, "
|
|
f"reasoning={speechResult.reasoning[:80]}..."
|
|
)
|
|
|
|
# Emit analysis event (always, for debug/UI)
|
|
await _emitSessionEvent(sessionId, "analysis", {
|
|
"shouldRespond": speechResult.shouldRespond,
|
|
"detectedIntent": speechResult.detectedIntent,
|
|
"reasoning": speechResult.reasoning,
|
|
"modelName": response.modelName,
|
|
"processingTime": response.processingTime,
|
|
"priceCHF": response.priceCHF,
|
|
})
|
|
|
|
# Step 4: Respond if AI decided to
|
|
if speechResult.shouldRespond and speechResult.responseText:
|
|
|
|
if self.config.responseMode == TeamsbotResponseMode.MANUAL:
|
|
# In manual mode, suggest but don't send
|
|
await _emitSessionEvent(sessionId, "suggestedResponse", {
|
|
"responseText": speechResult.responseText,
|
|
"detectedIntent": speechResult.detectedIntent,
|
|
"reasoning": speechResult.reasoning,
|
|
})
|
|
return
|
|
|
|
# Determine response channel (voice, chat, or both)
|
|
channel = self.config.responseChannel
|
|
responseType = TeamsbotResponseType.BOTH
|
|
|
|
if channel == TeamsbotResponseChannel.VOICE:
|
|
responseType = TeamsbotResponseType.AUDIO
|
|
elif channel == TeamsbotResponseChannel.CHAT:
|
|
responseType = TeamsbotResponseType.CHAT
|
|
else:
|
|
responseType = TeamsbotResponseType.BOTH
|
|
|
|
# 4a: Voice response (TTS -> Audio to bot)
|
|
if channel in (TeamsbotResponseChannel.VOICE, TeamsbotResponseChannel.BOTH):
|
|
try:
|
|
ttsResult = await voiceInterface.textToSpeech(
|
|
text=speechResult.responseText,
|
|
languageCode=self.config.language,
|
|
voiceName=self.config.voiceId
|
|
)
|
|
|
|
if ttsResult and isinstance(ttsResult, dict):
|
|
audioContent = ttsResult.get("audioContent")
|
|
if audioContent and websocket:
|
|
await websocket.send_text(json.dumps({
|
|
"type": "playAudio",
|
|
"sessionId": sessionId,
|
|
"audio": {
|
|
"data": base64.b64encode(audioContent if isinstance(audioContent, bytes) else audioContent.encode()).decode(),
|
|
"format": "mp3",
|
|
},
|
|
}))
|
|
elif audioContent and not websocket:
|
|
logger.info(f"TTS audio generated for session {sessionId} (HTTP mode - no WebSocket for playback)")
|
|
except Exception as ttsErr:
|
|
logger.warning(f"TTS failed for session {sessionId}: {ttsErr}")
|
|
if responseType == TeamsbotResponseType.AUDIO:
|
|
responseType = TeamsbotResponseType.CHAT # Fallback to chat only
|
|
|
|
# 4b: Chat response (send text message to meeting chat)
|
|
if channel in (TeamsbotResponseChannel.CHAT, TeamsbotResponseChannel.BOTH):
|
|
try:
|
|
if websocket:
|
|
await websocket.send_text(json.dumps({
|
|
"type": "sendChatMessage",
|
|
"sessionId": sessionId,
|
|
"text": speechResult.responseText,
|
|
}))
|
|
except Exception as chatErr:
|
|
logger.warning(f"Chat message send failed for session {sessionId}: {chatErr}")
|
|
|
|
# 4b: Store bot response
|
|
botResponseData = TeamsbotBotResponse(
|
|
sessionId=sessionId,
|
|
responseText=speechResult.responseText,
|
|
responseType=responseType,
|
|
detectedIntent=speechResult.detectedIntent,
|
|
reasoning=speechResult.reasoning,
|
|
triggeredByTranscriptId=triggerTranscript.get("id"),
|
|
modelName=response.modelName,
|
|
processingTime=response.processingTime,
|
|
priceCHF=response.priceCHF,
|
|
timestamp=getIsoTimestamp(),
|
|
).model_dump()
|
|
|
|
createdResponse = interface.createBotResponse(botResponseData)
|
|
|
|
# 4c: Emit SSE event
|
|
await _emitSessionEvent(sessionId, "botResponse", {
|
|
"id": createdResponse.get("id"),
|
|
"responseText": speechResult.responseText,
|
|
"responseType": responseType.value,
|
|
"detectedIntent": speechResult.detectedIntent,
|
|
"reasoning": speechResult.reasoning,
|
|
"modelName": response.modelName,
|
|
"processingTime": response.processingTime,
|
|
"priceCHF": response.priceCHF,
|
|
})
|
|
|
|
# Update session response count
|
|
session = interface.getSession(sessionId)
|
|
if session:
|
|
count = session.get("botResponseCount", 0) + 1
|
|
interface.updateSession(sessionId, {"botResponseCount": count})
|
|
|
|
logger.info(f"Bot responded in session {sessionId}: intent={speechResult.detectedIntent}")
|
|
|
|
except Exception as e:
|
|
logger.error(f"SPEECH_TEAMS analysis failed for session {sessionId}: {type(e).__name__}: {e}", exc_info=True)
|
|
await _emitSessionEvent(sessionId, "error", {"message": f"AI analysis failed: {type(e).__name__}: {str(e)}"})
|
|
|
|
# =========================================================================
|
|
# Meeting Summary
|
|
# =========================================================================
|
|
|
|
async def _generateMeetingSummary(self, sessionId: str):
|
|
"""Generate an AI summary of the meeting after it ends."""
|
|
try:
|
|
from . import interfaceFeatureTeamsbot as interfaceDb
|
|
|
|
interface = interfaceDb.getInterface(self.currentUser, self.mandateId, self.instanceId)
|
|
transcripts = interface.getTranscripts(sessionId)
|
|
|
|
if not transcripts or len(transcripts) < 5:
|
|
return # Not enough content for a summary
|
|
|
|
# Build full transcript
|
|
fullTranscript = "\n".join(
|
|
f"[{t.get('speaker', 'Unknown')}]: {t.get('text', '')}"
|
|
for t in transcripts
|
|
)
|
|
|
|
from modules.services.serviceAi.mainServiceAi import AiService
|
|
|
|
serviceContext = _ServiceContext(self.currentUser, self.mandateId, self.instanceId)
|
|
aiService = AiService(serviceCenter=serviceContext)
|
|
await aiService.ensureAiObjectsInitialized()
|
|
|
|
request = AiCallRequest(
|
|
prompt="Erstelle eine kurze Zusammenfassung dieses Meeting-Transkripts. Nenne die wichtigsten Punkte, Entscheidungen und offene Aktionspunkte.",
|
|
context=fullTranscript,
|
|
options=AiCallOptions(
|
|
operationType=OperationTypeEnum.DATA_ANALYSE,
|
|
priority=PriorityEnum.BALANCED,
|
|
)
|
|
)
|
|
|
|
response = await aiService.callAi(request)
|
|
if response and response.errorCount == 0:
|
|
interface.updateSession(sessionId, {"summary": response.content})
|
|
logger.info(f"Meeting summary generated for session {sessionId}")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to generate meeting summary for session {sessionId}: {e}")
|