# Copyright (c) 2025 Patrick Motsch # All rights reserved. """ Teamsbot Service - Pipeline Orchestrator. Manages the audio processing pipeline: STT -> Context Buffer -> SPEECH_TEAMS -> TTS -> Bridge. """ import logging import json import asyncio import time import base64 from typing import Optional, Dict, Any, List from fastapi import WebSocket from modules.datamodels.datamodelUam import User from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum from modules.shared.timeUtils import getUtcTimestamp, getIsoTimestamp from .datamodelTeamsbot import ( TeamsbotSessionStatus, TeamsbotTranscript, TeamsbotBotResponse, TeamsbotResponseType, TeamsbotConfig, TeamsbotResponseMode, TeamsbotResponseChannel, SpeechTeamsResponse, ) from .browserBotConnector import BrowserBotConnector logger = logging.getLogger(__name__) # ========================================================================= # Minimal Service Context (for AI billing in bridge callbacks) # ========================================================================= class _ServiceContext: """Minimal context providing user/mandate info for AiService billing. Used by bridge callbacks where a full Services instance is not available.""" def __init__(self, user, mandateId, featureInstanceId=None): self.user = user self.mandateId = mandateId self.featureInstanceId = featureInstanceId self.featureCode = "teamsbot" # ========================================================================= # Session Event Queues (for SSE streaming to frontend) # ========================================================================= _sessionEvents: Dict[str, asyncio.Queue] = {} async def _emitSessionEvent(sessionId: str, eventType: str, data: Any): """Emit an event to the session's SSE stream.""" eventQueue = _sessionEvents.get(sessionId) if eventQueue: await eventQueue.put({"type": eventType, "data": data, "timestamp": getIsoTimestamp()}) class TeamsbotService: """ Pipeline Orchestrator for Teams Bot sessions. Coordinates VoiceObjects (STT/TTS), AiService (SPEECH_TEAMS), and Bridge communication. """ def __init__(self, currentUser: User, mandateId: str, instanceId: str, config: TeamsbotConfig): self.currentUser = currentUser self.mandateId = mandateId self.instanceId = instanceId self.config = config self.browserBotConnector = BrowserBotConnector(config._getEffectiveBrowserBotUrl()) # State self._lastAiCallTime: float = 0.0 self._contextBuffer: List[Dict[str, Any]] = [] self._sessionContext: Optional[str] = None # User-provided background context self._contextSummary: Optional[str] = None # AI-generated summary of long context # ========================================================================= # Session Lifecycle # ========================================================================= async def joinMeeting( self, sessionId: str, meetingLink: str, connectionId: Optional[str] = None, gatewayBaseUrl: str = "", ): """Send join command to the Browser Bot service. The browser bot will: 1. Launch a headless browser 2. Navigate to Teams web app 3. Join the meeting as anonymous guest 4. Enable captions and start scraping 5. Connect back via WebSocket to send transcripts """ from . import interfaceFeatureTeamsbot as interfaceDb interface = interfaceDb.getInterface(self.currentUser, self.mandateId, self.instanceId) # Initialize SSE event queue _sessionEvents[sessionId] = asyncio.Queue() try: # Update status to JOINING interface.updateSession(sessionId, {"status": TeamsbotSessionStatus.JOINING.value}) await _emitSessionEvent(sessionId, "statusChange", {"status": "joining"}) # Send join command to browser bot session = interface.getSession(sessionId) if not session: raise ValueError(f"Session {sessionId} not found") # Build the full WebSocket URL for the bot to connect back to this gateway instance # gatewayBaseUrl is passed from the route handler (derived from request.base_url) wsScheme = "wss" if gatewayBaseUrl.startswith("https") else "ws" gatewayHost = gatewayBaseUrl.replace("https://", "").replace("http://", "").rstrip("/") fullGatewayWsUrl = f"{wsScheme}://{gatewayHost}/api/teamsbot/{self.instanceId}/bot/ws/{sessionId}" result = await self.browserBotConnector.joinMeeting( sessionId=sessionId, meetingUrl=meetingLink, botName=session.get("botName", self.config.botName), instanceId=self.instanceId, gatewayWsUrl=fullGatewayWsUrl, language=self.config.language, botAccountEmail=self.config.botAccountEmail if hasattr(self.config, 'botAccountEmail') else None, botAccountPassword=self.config.botAccountPassword if hasattr(self.config, 'botAccountPassword') else None, transferMode=self.config.transferMode if hasattr(self.config, 'transferMode') else "auto", ) if result.get("success"): interface.updateSession(sessionId, { "status": TeamsbotSessionStatus.JOINING.value, # Will become ACTIVE when bot connects via WS }) logger.info(f"Browser bot deployment started for session {sessionId}") else: errorMsg = result.get("error", "Unknown error joining meeting") interface.updateSession(sessionId, { "status": TeamsbotSessionStatus.ERROR.value, "errorMessage": errorMsg, }) await _emitSessionEvent(sessionId, "statusChange", {"status": "error", "errorMessage": errorMsg}) logger.error(f"Failed to deploy browser bot for session {sessionId}: {errorMsg}") except Exception as e: logger.error(f"Error joining meeting for session {sessionId}: {e}") interface.updateSession(sessionId, { "status": TeamsbotSessionStatus.ERROR.value, "errorMessage": str(e), }) await _emitSessionEvent(sessionId, "statusChange", {"status": "error", "errorMessage": str(e)}) async def leaveMeeting(self, sessionId: str): """Send leave command to the Browser Bot service.""" from . import interfaceFeatureTeamsbot as interfaceDb interface = interfaceDb.getInterface(self.currentUser, self.mandateId, self.instanceId) try: interface.updateSession(sessionId, {"status": TeamsbotSessionStatus.LEAVING.value}) await _emitSessionEvent(sessionId, "statusChange", {"status": "leaving"}) await self.browserBotConnector.leaveMeeting(sessionId) interface.updateSession(sessionId, { "status": TeamsbotSessionStatus.ENDED.value, "endedAt": getIsoTimestamp(), }) await _emitSessionEvent(sessionId, "statusChange", {"status": "ended"}) # Generate meeting summary in background asyncio.create_task(self._generateMeetingSummary(sessionId)) logger.info(f"Bot left meeting for session {sessionId}") except Exception as e: logger.error(f"Error leaving meeting for session {sessionId}: {e}") interface.updateSession(sessionId, { "status": TeamsbotSessionStatus.ERROR.value, "errorMessage": str(e), "endedAt": getIsoTimestamp(), }) # Cleanup event queue _sessionEvents.pop(sessionId, None) # ========================================================================= # Browser Bot WebSocket Communication # ========================================================================= async def handleBotWebSocket(self, websocket: WebSocket, sessionId: str): """ Main WebSocket handler for Browser Bot communication. Receives: - transcript: Caption text scraped from Teams - status: Bot state changes (joined, in_lobby, left, error) Sends: - playAudio: TTS audio for the bot to play in the meeting """ from . import interfaceFeatureTeamsbot as interfaceDb from modules.interfaces.interfaceVoiceObjects import getVoiceInterface interface = interfaceDb.getInterface(self.currentUser, self.mandateId, self.instanceId) voiceInterface = getVoiceInterface(self.currentUser, self.mandateId) # Load session context (user-provided background knowledge) # If the context is long (>500 chars), summarize it to reduce token usage session = interface.getSession(sessionId) if session: rawContext = session.get("sessionContext") if rawContext and len(rawContext) > 500: logger.info(f"Session {sessionId}: Summarizing long session context ({len(rawContext)} chars)...") self._sessionContext = await self._summarizeSessionContext(sessionId, rawContext) elif rawContext: self._sessionContext = rawContext if self._sessionContext: logger.info(f"Session {sessionId}: Session context ready ({len(self._sessionContext)} chars)") logger.info(f"[WS-DEBUG] WebSocket handler started for session {sessionId}") try: msgCount = 0 while True: data = await websocket.receive() msgCount += 1 if "text" not in data: logger.debug(f"[WS-DEBUG] session={sessionId} msg #{msgCount}: non-text data received (keys: {list(data.keys())})") continue message = json.loads(data["text"]) msgType = message.get("type") logger.info(f"[WS-DEBUG] session={sessionId} msg #{msgCount}: type={msgType}") if msgType == "transcript": transcript = message.get("transcript", {}) logger.info(f"[WS-DEBUG] Transcript received: speaker={transcript.get('speaker')}, text={transcript.get('text', '')[:60]}...") await self._processTranscript( sessionId=sessionId, speaker=transcript.get("speaker", "Unknown"), text=transcript.get("text", ""), isFinal=transcript.get("isFinal", True), interface=interface, voiceInterface=voiceInterface, websocket=websocket, ) elif msgType == "chatMessage": chat = message.get("chat", {}) logger.info(f"[WS-DEBUG] Chat message received: speaker={chat.get('speaker')}, text={chat.get('text', '')[:60]}...") await self._processTranscript( sessionId=sessionId, speaker=chat.get("speaker", "Unknown"), text=chat.get("text", ""), isFinal=True, interface=interface, voiceInterface=voiceInterface, websocket=websocket, source="chat", ) elif msgType == "status": status = message.get("status") errorMessage = message.get("message") logger.info(f"[WS-DEBUG] Status received: status={status}, message={errorMessage}") await self._handleBotStatus(sessionId, status, errorMessage, interface) elif msgType == "audioChunk": audioData = message.get("audio", {}) audioBase64 = audioData.get("data", "") sampleRate = audioData.get("sampleRate", 16000) if audioBase64: await self._processAudioChunk( sessionId=sessionId, audioBase64=audioBase64, sampleRate=sampleRate, interface=interface, voiceInterface=voiceInterface, websocket=websocket, ) elif msgType == "ping": await websocket.send_text(json.dumps({"type": "pong"})) except Exception as e: if "disconnect" not in str(e).lower(): logger.error(f"[WS-DEBUG] WebSocket error for session {sessionId}: {type(e).__name__}: {e}") logger.info(f"[WS-DEBUG] WebSocket handler ended for session {sessionId} after {msgCount} messages") async def _handleBotStatus( self, sessionId: str, status: str, errorMessage: Optional[str], interface, ): """Handle status updates from the browser bot.""" logger.info(f"Bot status update for session {sessionId}: {status}") statusMap = { "connecting": TeamsbotSessionStatus.JOINING.value, "launching": TeamsbotSessionStatus.JOINING.value, "navigating": TeamsbotSessionStatus.JOINING.value, "in_lobby": TeamsbotSessionStatus.JOINING.value, "joined": TeamsbotSessionStatus.ACTIVE.value, "in_meeting": TeamsbotSessionStatus.ACTIVE.value, "left": TeamsbotSessionStatus.ENDED.value, "error": TeamsbotSessionStatus.ERROR.value, } dbStatus = statusMap.get(status, TeamsbotSessionStatus.ACTIVE.value) updates = {"status": dbStatus} if errorMessage: updates["errorMessage"] = errorMessage if dbStatus == TeamsbotSessionStatus.ACTIVE.value: updates["startedAt"] = getIsoTimestamp() elif dbStatus in [TeamsbotSessionStatus.ENDED.value, TeamsbotSessionStatus.ERROR.value]: updates["endedAt"] = getIsoTimestamp() interface.updateSession(sessionId, updates) await _emitSessionEvent(sessionId, "statusChange", {"status": status, "errorMessage": errorMessage}) # Generate summary when session ends if dbStatus == TeamsbotSessionStatus.ENDED.value: asyncio.create_task(self._generateMeetingSummary(sessionId)) async def _processAudioChunk( self, sessionId: str, audioBase64: str, sampleRate: int, interface, voiceInterface, websocket: WebSocket, ): """Process an audio chunk from WebRTC capture — run STT and feed into transcript pipeline.""" import base64 try: audioBytes = base64.b64decode(audioBase64) if len(audioBytes) < 1000: return # Use the existing Google Cloud Speech connector for STT speechConnector = voiceInterface.getSpeechConnector() if voiceInterface else None if not speechConnector or not hasattr(speechConnector, 'speech_client'): logger.warning(f"[AudioChunk] No speech client available for session {sessionId}") return from google.cloud import speech config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=sampleRate, language_code=self.config.language or "de-DE", enable_automatic_punctuation=True, ) audio = speech.RecognitionAudio(content=audioBytes) response = speechConnector.speech_client.recognize(config=config, audio=audio) for result in response.results: if result.alternatives: text = result.alternatives[0].transcript.strip() if text: logger.info(f"[AudioChunk] STT result: {text[:80]}...") await self._processTranscript( sessionId=sessionId, speaker="Meeting Audio", text=text, isFinal=True, interface=interface, voiceInterface=voiceInterface, websocket=websocket, source="audioCapture", ) except Exception as e: logger.error(f"[AudioChunk] STT error for session {sessionId}: {type(e).__name__}: {e}") async def _processTranscript( self, sessionId: str, speaker: str, text: str, isFinal: bool, interface, voiceInterface, websocket: WebSocket, source: str = "caption", ): """Process a transcript segment from captions or chat messages.""" text = text.strip() if not text: return # Filter out the bot's own speech from AI triggering. # The bot hears itself via captions — these should be stored in the # transcript for the record, but must NOT trigger AI analysis (feedback loop). isBotSpeaker = self._isBotSpeaker(speaker) # Store transcript segment transcriptData = TeamsbotTranscript( sessionId=sessionId, speaker=speaker, text=text, timestamp=getIsoTimestamp(), confidence=1.0, # Captions don't have confidence scores language=self.config.language, isFinal=isFinal, ).model_dump() createdTranscript = interface.createTranscript(transcriptData) # Update context buffer (mark source for chat messages) self._contextBuffer.append({ "speaker": speaker or "Unknown", "text": text, "timestamp": getUtcTimestamp(), "source": source, }) # Keep only last N segments maxSegments = self.config.contextWindowSegments if len(self._contextBuffer) > maxSegments: # When buffer overflows, summarize the older half to preserve context # without losing information. The summary replaces the old segments. if not self._contextSummary and len(self._contextBuffer) > maxSegments * 1.5: asyncio.create_task(self._summarizeContextBuffer(sessionId)) self._contextBuffer = self._contextBuffer[-maxSegments:] # Emit SSE event for live transcript await _emitSessionEvent(sessionId, "transcript", { "id": createdTranscript.get("id"), "speaker": speaker, "text": text, "confidence": 1.0, "timestamp": getIsoTimestamp(), }) # Update session transcript count session = interface.getSession(sessionId) if session: count = session.get("transcriptSegmentCount", 0) + 1 interface.updateSession(sessionId, {"transcriptSegmentCount": count}) # Skip AI analysis for bot's own speech (prevents feedback loop) if isBotSpeaker: logger.debug(f"Session {sessionId}: Skipping AI trigger for bot's own speech: [{speaker}] {text[:60]}...") return # Check if AI analysis should be triggered (only for final transcripts) if not isFinal: return if self.config.responseMode == TeamsbotResponseMode.TRANSCRIBE_ONLY: logger.debug(f"Session {sessionId}: responseMode=TRANSCRIBE_ONLY, skipping AI analysis") return shouldTrigger = self._shouldTriggerAnalysis(text) logger.debug(f"Session {sessionId}: shouldTriggerAnalysis={shouldTrigger}, bufferSize={len(self._contextBuffer)}, responseMode={self.config.responseMode}") if not shouldTrigger: return # SPEECH_TEAMS AI analysis logger.info(f"Session {sessionId}: Triggering AI analysis (buffer: {len(self._contextBuffer)} segments)") await self._analyzeAndRespond(sessionId, interface, voiceInterface, websocket, createdTranscript) def _isBotSpeaker(self, speaker: str) -> bool: """Check if a transcript speaker is the bot itself. Teams captions show the bot as e.g. "Shelly Miller (Unverified)" or "Nyla Larsson" depending on auth/anonymous join. We match against: - The configured bot name (e.g. "Shelly Miller") - The bot account display name if authenticated """ if not speaker: return False speakerLower = speaker.lower().strip() # Match against configured bot name botName = self.config.botName.lower().strip() if botName and botName in speakerLower: return True # Match against bot account email prefix (e.g. "nyla.larsson" from "nyla.larsson@poweron.swiss") if self.config.botAccountEmail: emailPrefix = self.config.botAccountEmail.split("@")[0].lower().replace(".", " ") if emailPrefix in speakerLower: return True return False def _shouldTriggerAnalysis(self, transcriptText: str) -> bool: """ Decide whether to trigger AI analysis based on the latest transcript. Triggers: - Bot name mentioned (immediate) - Periodic interval elapsed - Cooldown respected """ now = time.time() timeSinceLastCall = now - self._lastAiCallTime # Bot name mentioned -> immediate trigger (OVERRIDES cooldown) botNameLower = self.config.botName.lower() if botNameLower in transcriptText.lower(): logger.info(f"Trigger: Bot name '{self.config.botName}' detected in transcript (overrides cooldown): '{transcriptText[:60]}...'") return True # Cooldown check (only for non-name triggers) if timeSinceLastCall < self.config.triggerCooldownSeconds: logger.debug(f"Trigger: Cooldown active ({timeSinceLastCall:.1f}s < {self.config.triggerCooldownSeconds}s)") return False # Periodic trigger if timeSinceLastCall >= self.config.triggerIntervalSeconds: logger.info(f"Trigger: Periodic interval ({self.config.triggerIntervalSeconds}s) elapsed ({timeSinceLastCall:.1f}s since last call)") return True logger.debug(f"Trigger: No trigger ({timeSinceLastCall:.1f}s / {self.config.triggerIntervalSeconds}s interval)") return False async def _analyzeAndRespond( self, sessionId: str, interface, voiceInterface, websocket: WebSocket, triggerTranscript: Dict[str, Any], ): """Run SPEECH_TEAMS AI analysis and respond if needed.""" self._lastAiCallTime = time.time() # Build transcript context from buffer. # Mark bot's own utterances and chat messages for the AI. contextLines = [] for segment in self._contextBuffer: speaker = segment.get("speaker", "Unknown") text = segment.get("text", "") segSource = segment.get("source", "caption") prefix = "Chat" if segSource == "chat" else "" if self._isBotSpeaker(speaker): contextLines.append(f"[YOU ({self.config.botName})]: {text}") elif prefix: contextLines.append(f"[{prefix}: {speaker}]: {text}") else: contextLines.append(f"[{speaker}]: {text}") # Include session context if provided by the user at session start sessionContextStr = "" if self._sessionContext: sessionContextStr = f"\nSESSION_CONTEXT (background knowledge provided by the user):\n{self._sessionContext}\n" # Include summary of earlier conversation if available summaryStr = "" if self._contextSummary: summaryStr = f"\nEARLIER_CONVERSATION_SUMMARY:\n{self._contextSummary}\n" transcriptContext = f"BOT_NAME:{self.config.botName}{sessionContextStr}{summaryStr}\nRECENT_TRANSCRIPT:\n" + "\n".join(contextLines) # Call SPEECH_TEAMS try: from modules.services.serviceAi.mainServiceAi import AiService # Create minimal service context for AI billing serviceContext = _ServiceContext(self.currentUser, self.mandateId, self.instanceId) aiService = AiService(serviceCenter=serviceContext) await aiService.ensureAiObjectsInitialized() request = AiCallRequest( prompt=self.config.aiSystemPrompt, context=transcriptContext, options=AiCallOptions( operationType=OperationTypeEnum.SPEECH_TEAMS, priority=PriorityEnum.SPEED, ) ) response = await aiService.callAi(request) # Parse structured response try: speechResult = SpeechTeamsResponse.model_validate_json(response.content) except Exception: # Try to extract JSON from response content try: jsonStr = response.content if "```json" in jsonStr: jsonStr = jsonStr.split("```json")[1].split("```")[0] elif "```" in jsonStr: jsonStr = jsonStr.split("```")[1].split("```")[0] speechResult = SpeechTeamsResponse.model_validate_json(jsonStr.strip()) except Exception as parseErr: logger.warning(f"Failed to parse SPEECH_TEAMS response: {parseErr}") speechResult = SpeechTeamsResponse( shouldRespond=False, reasoning=f"Parse error: {str(parseErr)[:100]}", detectedIntent="none" ) logger.info( f"SPEECH_TEAMS result: shouldRespond={speechResult.shouldRespond}, " f"intent={speechResult.detectedIntent}, " f"reasoning={speechResult.reasoning[:80]}..." ) # Emit analysis event (always, for debug/UI) await _emitSessionEvent(sessionId, "analysis", { "shouldRespond": speechResult.shouldRespond, "detectedIntent": speechResult.detectedIntent, "reasoning": speechResult.reasoning, "modelName": response.modelName, "processingTime": response.processingTime, "priceCHF": response.priceCHF, }) # Step 4a: Handle STOP intent -- stop audio immediately if speechResult.detectedIntent == "stop": logger.info(f"Session {sessionId}: AI detected STOP intent: {speechResult.reasoning}") if websocket: try: await websocket.send_text(json.dumps({ "type": "stopAudio", "sessionId": sessionId, })) except Exception as stopErr: logger.warning(f"Failed to send stop command: {stopErr}") return # Step 4b: Respond if AI decided to if speechResult.shouldRespond and speechResult.responseText: if self.config.responseMode == TeamsbotResponseMode.MANUAL: # In manual mode, suggest but don't send await _emitSessionEvent(sessionId, "suggestedResponse", { "responseText": speechResult.responseText, "detectedIntent": speechResult.detectedIntent, "reasoning": speechResult.reasoning, }) return # Determine response channel (voice, chat, or both) channel = self.config.responseChannel responseType = TeamsbotResponseType.BOTH if channel == TeamsbotResponseChannel.VOICE: responseType = TeamsbotResponseType.AUDIO elif channel == TeamsbotResponseChannel.CHAT: responseType = TeamsbotResponseType.CHAT else: responseType = TeamsbotResponseType.BOTH # 4a: Voice response (TTS -> Audio to bot) if channel in (TeamsbotResponseChannel.VOICE, TeamsbotResponseChannel.BOTH): try: ttsResult = await voiceInterface.textToSpeech( text=speechResult.responseText, languageCode=self.config.language, voiceName=self.config.voiceId ) if ttsResult and isinstance(ttsResult, dict): audioContent = ttsResult.get("audioContent") if audioContent and websocket: await websocket.send_text(json.dumps({ "type": "playAudio", "sessionId": sessionId, "audio": { "data": base64.b64encode(audioContent if isinstance(audioContent, bytes) else audioContent.encode()).decode(), "format": "mp3", }, })) elif audioContent and not websocket: logger.info(f"TTS audio generated for session {sessionId} (HTTP mode - no WebSocket for playback)") except Exception as ttsErr: logger.warning(f"TTS failed for session {sessionId}: {ttsErr}") if responseType == TeamsbotResponseType.AUDIO: responseType = TeamsbotResponseType.CHAT # Fallback to chat only # 4b: Chat response (send text message to meeting chat) if channel in (TeamsbotResponseChannel.CHAT, TeamsbotResponseChannel.BOTH): try: if websocket: await websocket.send_text(json.dumps({ "type": "sendChatMessage", "sessionId": sessionId, "text": speechResult.responseText, })) except Exception as chatErr: logger.warning(f"Chat message send failed for session {sessionId}: {chatErr}") # 4b: Store bot response botResponseData = TeamsbotBotResponse( sessionId=sessionId, responseText=speechResult.responseText, responseType=responseType, detectedIntent=speechResult.detectedIntent, reasoning=speechResult.reasoning, triggeredByTranscriptId=triggerTranscript.get("id"), modelName=response.modelName, processingTime=response.processingTime, priceCHF=response.priceCHF, timestamp=getIsoTimestamp(), ).model_dump() createdResponse = interface.createBotResponse(botResponseData) # 4c: Emit SSE event await _emitSessionEvent(sessionId, "botResponse", { "id": createdResponse.get("id"), "responseText": speechResult.responseText, "responseType": responseType.value, "detectedIntent": speechResult.detectedIntent, "reasoning": speechResult.reasoning, "modelName": response.modelName, "processingTime": response.processingTime, "priceCHF": response.priceCHF, }) # Update session response count session = interface.getSession(sessionId) if session: count = session.get("botResponseCount", 0) + 1 interface.updateSession(sessionId, {"botResponseCount": count}) logger.info(f"Bot responded in session {sessionId}: intent={speechResult.detectedIntent}") except Exception as e: logger.error(f"SPEECH_TEAMS analysis failed for session {sessionId}: {type(e).__name__}: {e}", exc_info=True) await _emitSessionEvent(sessionId, "error", {"message": f"AI analysis failed: {type(e).__name__}: {str(e)}"}) # ========================================================================= # Context Summarization (for long sessions) # ========================================================================= async def _summarizeSessionContext(self, sessionId: str, rawContext: str) -> str: """Summarize a long user-provided session context to its essential points. This reduces token usage in every subsequent AI call.""" try: from modules.services.serviceAi.mainServiceAi import AiService from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum serviceContext = _ServiceContext(self.currentUser, self.mandateId, self.instanceId) aiService = AiService(serviceCenter=serviceContext) await aiService.ensureAiObjectsInitialized() request = AiCallRequest( prompt=( "Fasse den folgenden Kontext auf die wesentlichen Punkte zusammen. " "Behalte alle wichtigen Fakten, Namen, Zahlen, Entscheidungen und Aktionspunkte. " "Entferne Fuelltext und Wiederholungen. " "Antworte NUR mit der Zusammenfassung, keine Erklaerungen oder Einleitungen." ), context=rawContext, options=AiCallOptions( operationType=OperationTypeEnum.DATA_ANALYSE, priority=PriorityEnum.SPEED, ) ) response = await aiService.callAi(request) if response and response.errorCount == 0 and response.content: summary = response.content.strip() logger.info(f"Session {sessionId}: Context summarized from {len(rawContext)} to {len(summary)} chars") return summary except Exception as e: logger.warning(f"Session context summarization failed for {sessionId}: {e}") # Fallback: return original (truncated if very long) return rawContext[:2000] if len(rawContext) > 2000 else rawContext async def _summarizeContextBuffer(self, sessionId: str): """Summarize the older part of the context buffer to preserve information without exceeding the context window. This runs in the background.""" try: if self._contextSummary: return # Already summarized recently # Take the older half of the buffer for summarization halfPoint = len(self._contextBuffer) // 2 oldSegments = self._contextBuffer[:halfPoint] if len(oldSegments) < 10: return # Not enough to summarize # Build text to summarize lines = [] for seg in oldSegments: speaker = seg.get("speaker", "Unknown") text = seg.get("text", "") lines.append(f"[{speaker}]: {text}") textToSummarize = "\n".join(lines) from modules.services.serviceAi.mainServiceAi import AiService from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum serviceContext = _ServiceContext(self.currentUser, self.mandateId, self.instanceId) aiService = AiService(serviceCenter=serviceContext) await aiService.ensureAiObjectsInitialized() request = AiCallRequest( prompt="Fasse das folgende Meeting-Transkript in 3-5 Saetzen zusammen. Nenne die wichtigsten Themen, Entscheidungen und offene Fragen. Antworte NUR mit der Zusammenfassung, keine Erklaerungen.", context=textToSummarize, options=AiCallOptions( operationType=OperationTypeEnum.DATA_ANALYSE, priority=PriorityEnum.SPEED, ) ) response = await aiService.callAi(request) if response and response.errorCount == 0: self._contextSummary = response.content.strip() logger.info(f"Session {sessionId}: Context summarized ({len(oldSegments)} segments -> {len(self._contextSummary)} chars)") except Exception as e: logger.warning(f"Context summarization failed for session {sessionId}: {e}") # ========================================================================= # Meeting Summary # ========================================================================= async def _generateMeetingSummary(self, sessionId: str): """Generate an AI summary of the meeting after it ends.""" try: from . import interfaceFeatureTeamsbot as interfaceDb interface = interfaceDb.getInterface(self.currentUser, self.mandateId, self.instanceId) transcripts = interface.getTranscripts(sessionId) if not transcripts or len(transcripts) < 5: return # Not enough content for a summary # Build full transcript fullTranscript = "\n".join( f"[{t.get('speaker', 'Unknown')}]: {t.get('text', '')}" for t in transcripts ) from modules.services.serviceAi.mainServiceAi import AiService serviceContext = _ServiceContext(self.currentUser, self.mandateId, self.instanceId) aiService = AiService(serviceCenter=serviceContext) await aiService.ensureAiObjectsInitialized() request = AiCallRequest( prompt="Erstelle eine kurze Zusammenfassung dieses Meeting-Transkripts. Nenne die wichtigsten Punkte, Entscheidungen und offene Aktionspunkte.", context=fullTranscript, options=AiCallOptions( operationType=OperationTypeEnum.DATA_ANALYSE, priority=PriorityEnum.BALANCED, ) ) response = await aiService.callAi(request) if response and response.errorCount == 0: interface.updateSession(sessionId, {"summary": response.content}) logger.info(f"Meeting summary generated for session {sessionId}") except Exception as e: logger.error(f"Failed to generate meeting summary for session {sessionId}: {e}")