# Copyright (c) 2025 Patrick Motsch # All rights reserved. """ Teamsbot Service - Pipeline Orchestrator. Manages the audio processing pipeline: STT -> Context Buffer -> SPEECH_TEAMS -> TTS -> Bridge. """ import logging import json import re import asyncio import time import base64 from datetime import datetime, timezone from typing import Optional, Dict, Any, List, Callable from fastapi import WebSocket from modules.datamodels.datamodelUam import User from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum from modules.shared.timeUtils import getUtcTimestamp from modules.serviceCenter import getService as _getServiceCenterService from modules.serviceCenter.context import ServiceCenterContext from .datamodelTeamsbot import ( TeamsbotSessionStatus, TeamsbotTranscript, TeamsbotBotResponse, TeamsbotResponseType, TeamsbotConfig, TeamsbotResponseMode, TeamsbotResponseChannel, TeamsbotDetectedIntent, SpeechTeamsResponse, TeamsbotCommand, TeamsbotDirectorPrompt, TeamsbotDirectorPromptStatus, TeamsbotDirectorPromptMode, DIRECTOR_PROMPT_TEXT_LIMIT, DIRECTOR_PROMPT_FILE_LIMIT, ) from .browserBotConnector import BrowserBotConnector logger = logging.getLogger(__name__) # Agent run limits for director prompts / speech escalation (meeting context). TEAMSBOT_AGENT_MAX_ROUNDS = 8 TEAMSBOT_AGENT_MAX_COST_CHF = 0.12 # How many recent director-prompt briefings we keep in session memory. _RECENT_DIRECTOR_BRIEFINGS_MAX = 6 # Quick-ack throttle interval. _QUICK_ACK_MIN_INTERVAL_SEC = 25.0 # Number of phrase variants we generate per kind. _EPHEMERAL_PHRASE_VARIANTS = 4 # Localisation INTENTS for ephemeral phrases. _EPHEMERAL_PHRASE_INTENTS: Dict[str, str] = { "quickAck": ( "Very short verbal acknowledgment (1 to 4 words) the assistant says " "the moment its name is recognised, BEFORE it has formulated a full " "answer. The intent is purely 'I heard you, I'm thinking' — natural, " "conversational, never a complete sentence." ), "agentBusy": ( "One short sentence (max ~12 words) the assistant says BEFORE starting " "a longer research / tool-call task, so the audience knows the answer " "will take a few seconds. Polite, professional, calm." ), "agentRound": ( "One short sentence (max ~14 words) the assistant says BETWEEN rounds " "of a longer agent task to update the audience on what it is doing. " "Include the placeholder token '{activity}' which will be filled with " "the current activity — e.g. 'I am {activity}, one moment...' or " "'Currently {activity}, almost there...'. Do NOT include step numbers." ), } # ========================================================================= # Module-level utility functions (used by extracted modules too) # ========================================================================= def _voiceLineLooksLikeBillingOrMeta(line: str) -> bool: """Heuristic: trailing lines that are separators or billing/usage footers.""" s = line.strip() if not s: return True lower = s.lower() if re.match(r"^[-=*_]{3,}\s*$", s): return True if re.match(r"^#{1,6}\s*(usage|billing|costs?|meta|technical|statistics)\b", lower): return True if "chf" in lower and re.search(r"\d", s): if re.search( r"\b(total|usage|cost|billing|token|spent|used|price|estimate|" r"rounds?|calls?|duration|processing\s*time|model\s*calls?)\b", lower, ): return True if "token" in lower and re.search(r"\d", s): if re.search(r"\b(total|usage|prompt|completion)\b", lower): return True pl = lower.replace(" ", "") if "progressafter" in pl and ("aicalls:" in pl or "toolcalls:" in pl): return True return False _EMOJI_PATTERN = re.compile( "[" "\U0001F300-\U0001FAFF" "\U00002600-\U000027BF" "\U0001F1E6-\U0001F1FF" "\U00002B00-\U00002BFF" "\U0001F900-\U0001F9FF" "\U0000FE0F" "]+", flags=re.UNICODE, ) def _voiceFriendlyMeetingText(raw: str) -> str: """Sanitise a chat/markdown response so it can be SPOKEN naturally.""" if not raw: return "" low = raw.lower() if "maximum rounds reached" in low: m = re.search(r"(?is)maximum\s+rounds\s+reached", raw) if m: head = raw[: m.start()].strip() raw = head or ( "Die Abklaerung brauchte mehr Schritte als vorgesehen; Details stehen im Chat." ) if "budget exceeded" in low: m = re.search(r"(?is)budget\s+exceeded", raw) if m: head = raw[: m.start()].strip() raw = head or "Das eingestellte Kostenlimit ist erreicht; Details stehen im Chat." lines = raw.strip().split("\n") while lines and _voiceLineLooksLikeBillingOrMeta(lines[-1]): lines.pop() t = "\n".join(lines).strip() if not t: t = raw.strip() t = re.sub(r"```[\s\S]*?```", " ", t) t = re.sub(r"`([^`]+)`", r"\1", t) cleanedLines: List[str] = [] for ln in t.split("\n"): stripped = ln.strip() if stripped.count("|") >= 2: continue if re.fullmatch(r"\s*\|?[\s\-:|]+\|?\s*", stripped) and "-" in stripped: continue cleanedLines.append(ln) t = "\n".join(cleanedLines) t = re.sub(r"(?m)^\s*([-*_])\s*\1\s*\1[\s\1]*$", "", t) t = re.sub(r"(?m)^\s*#{1,6}\s+", "", t) t = re.sub(r"(?m)^\s*[-*•·]\s+", "", t) t = re.sub(r"(?m)^\s*\d+[\.\)]\s+", "", t) t = re.sub(r"\*\*([^*]+)\*\*", r"\1", t) t = re.sub(r"\*([^*\n]+)\*", r"\1", t) t = re.sub(r"__([^_]+)__", r"\1", t) t = re.sub(r"(?{}\[\]()_&@$%`]+", " ", t) t = t.replace('"', "") t = re.sub(r"(?<=\w)\s*/\s*(?=\w)", " oder ", t) t = t.replace("/", " ") t = re.sub(r"([\.,;:!\?])\1{1,}", r"\1", t) t = re.sub(r"\s+([\.,;:!\?])", r"\1", t) t = re.sub(r":\s*$", ".", t.rstrip()) t = re.sub(r"\s+:\s*$", ":", t, flags=re.MULTILINE) paragraphs = [p.strip() for p in re.split(r"\n\s*\n", t) if p.strip()] rebuilt: List[str] = [] for p in paragraphs: p = re.sub(r"\s+", " ", p).strip() if not p: continue if not re.search(r"[\.!\?\u2026:]\s*$", p): p = p.rstrip() + "." rebuilt.append(p) t = " ".join(rebuilt) t = re.sub(r"\s+", " ", t).strip() return t _TTS_MAX_CHUNK_CHARS = 800 def _splitTextForTts(text: str, maxChars: int = _TTS_MAX_CHUNK_CHARS) -> List[str]: """Split a long voice line into TTS-safe chunks at sentence/paragraph boundaries.""" cleaned = (text or "").strip() if not cleaned: return [] if len(cleaned) <= maxChars: return [cleaned] sentencePattern = re.compile(r"(?<=[\.!\?\u2026])\s+|\n+") rawSentences = [s.strip() for s in sentencePattern.split(cleaned) if s and s.strip()] if not rawSentences: rawSentences = [cleaned] chunks: List[str] = [] buffer = "" for sentence in rawSentences: if len(sentence) > maxChars: if buffer: chunks.append(buffer.strip()) buffer = "" words = sentence.split(" ") current = "" for word in words: candidate = (current + " " + word).strip() if current else word if len(candidate) > maxChars and current: chunks.append(current.strip()) current = word else: current = candidate if current: if not re.search(r"[\.!\?\u2026]\s*$", current): current = current.rstrip() + "." chunks.append(current.strip()) continue candidate = (buffer + " " + sentence).strip() if buffer else sentence if len(candidate) > maxChars and buffer: chunks.append(buffer.strip()) buffer = sentence else: buffer = candidate if buffer: chunks.append(buffer.strip()) finalized: List[str] = [] for c in chunks: if not c: continue if not re.search(r"[\.!\?\u2026]\s*$", c): c = c.rstrip() + "." finalized.append(c) return finalized async def _speakTextChunked( websocket: Optional[WebSocket], voiceInterface: Any, sessionId: str, voiceText: str, languageCode: str, voiceName: Optional[str], isCancelled: Optional[Callable[[], bool]] = None, ) -> Dict[str, Any]: """Run TTS in chunks and dispatch each ``playAudio`` over the websocket.""" chunks = _splitTextForTts(voiceText) result: Dict[str, Any] = {"success": False, "chunks": len(chunks), "played": 0, "error": None, "cancelled": False} if not chunks: result["error"] = "no text" return result if voiceInterface is None: result["error"] = "no voice interface" return result lastError: Optional[str] = None for idx, chunk in enumerate(chunks, start=1): if isCancelled is not None and isCancelled(): result["cancelled"] = True logger.info( f"Session {sessionId}: TTS chunk loop cancelled before chunk " f"{idx}/{len(chunks)} (user stop or newer answer in flight)" ) break try: ttsResult = await voiceInterface.textToSpeech( text=chunk, languageCode=languageCode, voiceName=voiceName, ) except Exception as ttsErr: lastError = f"chunk {idx}/{len(chunks)} raised: {ttsErr}" logger.warning(f"Session {sessionId}: TTS {lastError}") continue if not isinstance(ttsResult, dict) or ttsResult.get("success") is False: err = (ttsResult or {}).get("error", "unknown") if isinstance(ttsResult, dict) else "no result" lastError = f"chunk {idx}/{len(chunks)} failed: {err}" logger.warning(f"Session {sessionId}: TTS {lastError}") continue audioContent = ttsResult.get("audioContent") if not audioContent: lastError = f"chunk {idx}/{len(chunks)} returned no audioContent" logger.warning(f"Session {sessionId}: TTS {lastError}") continue if websocket is None: lastError = "websocket unavailable" break if isCancelled is not None and isCancelled(): result["cancelled"] = True logger.info( f"Session {sessionId}: TTS chunk loop cancelled before " f"sending chunk {idx}/{len(chunks)} (audio dropped)" ) break try: await websocket.send_text(json.dumps({ "type": "playAudio", "sessionId": sessionId, "audio": { "data": base64.b64encode( audioContent if isinstance(audioContent, bytes) else audioContent.encode() ).decode(), "format": "mp3", }, })) result["played"] += 1 except Exception as wsErr: lastError = f"chunk {idx}/{len(chunks)} websocket send failed: {wsErr}" logger.warning(f"Session {sessionId}: TTS {lastError}") break result["success"] = result["played"] > 0 if lastError: result["error"] = lastError return result def _coercePersistedDetectedIntent(raw: Optional[str]) -> tuple: """Map free-form intent labels to TeamsbotDetectedIntent for DB persistence.""" if not raw or not str(raw).strip(): return TeamsbotDetectedIntent.NONE, None s = str(raw).strip().lower() for member in TeamsbotDetectedIntent: if member.value == s: return member, None if s.startswith("agent:"): return TeamsbotDetectedIntent.PROACTIVE, str(raw).strip()[:120] return TeamsbotDetectedIntent.NONE, str(raw).strip()[:120] _DIRECTOR_REPLY_PATTERN = re.compile( r"^\s*(MEETING_REPLY|MEETING|REPLY|SAY|SPEAK)\s*:\s*", re.IGNORECASE, ) _DIRECTOR_SILENT_PATTERN = re.compile( r"^\s*(SILENT|INTERNAL(?:_ONLY)?|NOTE|NO_MEETING_OUTPUT|ACK(?:NOWLEDGE)?)\s*:\s*", re.IGNORECASE, ) def _parseDirectorPromptFinal(finalText: str) -> Dict[str, Any]: """Parse the agent's final answer for a director prompt.""" text = (finalText or "").strip() if not text: return {"kind": "silent", "meetingText": "", "internalNote": ""} meetingMatch = _DIRECTOR_REPLY_PATTERN.match(text) if meetingMatch: body = text[meetingMatch.end():].strip() return {"kind": "meeting", "meetingText": body, "internalNote": ""} silentMatch = _DIRECTOR_SILENT_PATTERN.match(text) if silentMatch: body = text[silentMatch.end():].strip() return {"kind": "silent", "meetingText": "", "internalNote": body} return {"kind": "silent", "meetingText": "", "internalNote": text} # ========================================================================= # Active Service Registry (sessionId -> running TeamsbotService instance) # ========================================================================= _activeServices: Dict[str, "TeamsbotService"] = {} def getActiveService(sessionId: str) -> Optional["TeamsbotService"]: """Return the running TeamsbotService for a session, or None if not active.""" return _activeServices.get(sessionId) # ========================================================================= # AI Service Factory # ========================================================================= def createAiService(user, mandateId, featureInstanceId=None): """Create a properly wired AiService via the service center.""" ctx = ServiceCenterContext( user=user, mandate_id=mandateId, feature_instance_id=featureInstanceId, feature_code="teamsbot", ) return _getServiceCenterService("ai", ctx) # ========================================================================= # Session Event Queues (for SSE streaming to frontend) # ========================================================================= sessionEvents: Dict[str, asyncio.Queue] = {} async def _emitSessionEvent(sessionId: str, eventType: str, data: Any): """Emit an event to the session's SSE stream.""" if sessionId not in sessionEvents: sessionEvents[sessionId] = asyncio.Queue() await sessionEvents[sessionId].put({"type": eventType, "data": data, "timestamp": getUtcTimestamp()}) def _normalizeGatewayHostForBotWs(host: str) -> str: """Use IPv4 loopback for local dev WebSocket URLs passed to the Node browser-bot.""" h = host.strip() lower = h.lower() if lower == "localhost": return "127.0.0.1" if lower.startswith("localhost:"): return "127.0.0.1" + h[len("localhost"):] if lower.startswith("[::1]:"): return "127.0.0.1" + h.partition("]")[2] if lower in ("[::1]", "::1"): return "127.0.0.1" return h # ========================================================================= # TeamsbotService Class # ========================================================================= class TeamsbotService: """ Pipeline Orchestrator for Teams Bot sessions. Coordinates VoiceObjects (STT/TTS), AiService (SPEECH_TEAMS), and Bridge communication. """ def __init__(self, currentUser: User, mandateId: str, instanceId: str, config: TeamsbotConfig): self.currentUser = currentUser self.mandateId = mandateId self.instanceId = instanceId self.config = config self.browserBotConnector = BrowserBotConnector(config._getEffectiveBrowserBotUrl()) # State self._lastAiCallTime: float = 0.0 self._aiAnalysisInProgress: bool = False self._contextBuffer: List[Dict[str, Any]] = [] self._sessionContext: Optional[str] = None self._contextSummary: Optional[str] = None # Differential transcript tracking self._lastTranscriptSpeaker: Optional[str] = None self._lastTranscriptText: Optional[str] = None self._lastTranscriptId: Optional[str] = None self._lastSttTime: float = 0.0 # Audio chunk aggregation self._audioBuffer: bytes = b"" self._audioBufferStartTime: float = 0.0 self._audioBufferLastChunkTime: float = 0.0 self._audioBufferSampleRate: int = 16000 self._lastBotResponseText: Optional[str] = None self._lastBotResponseTs: float = 0.0 # Speaker attribution self._lastCaptionSpeaker: Optional[str] = None self._unattributedTranscriptIds: List[str] = [] self._knownSpeakers: set = set() # Debounced name trigger self._pendingNameTrigger: Optional[Dict[str, Any]] = None self._followUpWindowEnd: float = 0.0 # Quick-ack throttle self._lastQuickAckTs: float = 0.0 # Ephemeral phrase pool self._phrasePool: Dict[str, List[str]] = {} self._phrasePoolIdx: Dict[str, int] = {} self._phrasePoolLock: asyncio.Lock = asyncio.Lock() # Voice pipeline serialisation self._meetingTtsLock: asyncio.Lock = asyncio.Lock() self._answerGenerationCounter: int = 0 self._currentEscalationTask: Optional[asyncio.Task] = None self._currentQuickAckTask: Optional[asyncio.Task] = None self._agentEscalationInFlight: bool = False # Live transport handles self._activeSessionId: Optional[str] = None self._websocket: Optional[WebSocket] = None self._voiceInterface = None # Director prompts self._activePersistentPrompts: List[Dict[str, Any]] = [] self._recentDirectorBriefings: List[Dict[str, Any]] = [] # ========================================================================= # Session Lifecycle # ========================================================================= async def joinMeeting( self, sessionId: str, meetingLink: str, connectionId: Optional[str] = None, gatewayBaseUrl: str = "", botAccountEmail: Optional[str] = None, botAccountPassword: Optional[str] = None, ): """Send join command to the Browser Bot service.""" from . import interfaceFeatureTeamsbot as interfaceDb interface = interfaceDb.getInterface(self.currentUser, self.mandateId, self.instanceId) if sessionId not in sessionEvents: sessionEvents[sessionId] = asyncio.Queue() try: interface.updateSession(sessionId, {"status": TeamsbotSessionStatus.JOINING.value}) await _emitSessionEvent(sessionId, "statusChange", {"status": "joining"}) session = interface.getSession(sessionId) if not session: raise ValueError(f"Session {sessionId} not found") wsScheme = "wss" if gatewayBaseUrl.startswith("https") else "ws" gatewayHost = gatewayBaseUrl.replace("https://", "").replace("http://", "").rstrip("/") gatewayHost = _normalizeGatewayHostForBotWs(gatewayHost) fullGatewayWsUrl = f"{wsScheme}://{gatewayHost}/api/teamsbot/{self.instanceId}/bot/ws/{sessionId}" hasAuth = bool(botAccountEmail and botAccountPassword) logger.info(f"Joining meeting for session {sessionId}: auth={hasAuth}, email={botAccountEmail or 'N/A'}, transferMode={self.config.transferMode}") avatarMediaData = None avatarMediaType = None avatarFileId = self._resolveAvatarFileId(session, interface) if avatarFileId: avatarMediaData, avatarMediaType = self._loadAvatarFileData(avatarFileId, interface) result = await self.browserBotConnector.joinMeeting( sessionId=sessionId, meetingUrl=meetingLink, botName=session.get("botName", self.config.botName), instanceId=self.instanceId, gatewayWsUrl=fullGatewayWsUrl, language=self.config.language, botAccountEmail=botAccountEmail, botAccountPassword=botAccountPassword, transferMode=self.config.transferMode if hasattr(self.config, 'transferMode') else "auto", debugMode=self.config.debugMode if hasattr(self.config, 'debugMode') else False, avatarMediaData=avatarMediaData, avatarMediaType=avatarMediaType, ) if result.get("success"): interface.updateSession(sessionId, { "status": TeamsbotSessionStatus.JOINING.value, }) logger.info(f"Browser bot deployment started for session {sessionId}") else: errorMsg = result.get("error", "Unknown error joining meeting") interface.updateSession(sessionId, { "status": TeamsbotSessionStatus.ERROR.value, "errorMessage": errorMsg, }) await _emitSessionEvent(sessionId, "statusChange", {"status": "error", "errorMessage": errorMsg}) logger.error(f"Failed to deploy browser bot for session {sessionId}: {errorMsg}") except Exception as e: logger.error(f"Error joining meeting for session {sessionId}: {e}") interface.updateSession(sessionId, { "status": TeamsbotSessionStatus.ERROR.value, "errorMessage": str(e), }) await _emitSessionEvent(sessionId, "statusChange", {"status": "error", "errorMessage": str(e)}) def _resolveAvatarFileId(self, session, interface): """Resolve avatarFileId: module override > config default.""" moduleId = session.get("moduleId") if moduleId: module = interface.getModule(moduleId) if module and module.get("defaultAvatarFileId"): return module["defaultAvatarFileId"] return getattr(self.config, "avatarFileId", None) def _loadAvatarFileData(self, fileId, _teamsbotInterface): """Load avatar file as base64 data + mime type.""" from modules.interfaces import interfaceDbManagement try: mgmt = interfaceDbManagement.getInterface(self.currentUser, self.mandateId, featureInstanceId=self.instanceId) fileRecord = mgmt.getFile(fileId) if not fileRecord: logger.warning(f"Avatar file {fileId} not found") return None, None mimeType = getattr(fileRecord, "mimeType", None) or "image/png" rawBytes = mgmt.getFileData(fileId) if not rawBytes: logger.warning(f"Avatar file {fileId} has no data") return None, None b64 = base64.b64encode(rawBytes).decode("ascii") logger.info(f"Avatar file loaded: {fileId}, {mimeType}, {len(b64)} chars base64") return b64, mimeType except Exception as e: logger.error(f"Failed to load avatar file {fileId}: {e}") return None, None async def leaveMeeting(self, sessionId: str): """Send leave command to the Browser Bot service.""" from . import interfaceFeatureTeamsbot as interfaceDb interface = interfaceDb.getInterface(self.currentUser, self.mandateId, self.instanceId) try: interface.updateSession(sessionId, {"status": TeamsbotSessionStatus.LEAVING.value}) await _emitSessionEvent(sessionId, "statusChange", {"status": "leaving"}) await self.browserBotConnector.leaveMeeting(sessionId) interface.updateSession(sessionId, { "status": TeamsbotSessionStatus.ENDED.value, "endedAt": getUtcTimestamp(), }) await _emitSessionEvent(sessionId, "statusChange", {"status": "ended"}) asyncio.create_task(self._generateMeetingSummary(sessionId)) logger.info(f"Bot left meeting for session {sessionId}") except Exception as e: logger.error(f"Error leaving meeting for session {sessionId}: {e}") interface.updateSession(sessionId, { "status": TeamsbotSessionStatus.ERROR.value, "errorMessage": str(e), "endedAt": getUtcTimestamp(), }) sessionEvents.pop(sessionId, None) # ========================================================================= # WebSocket — delegates to serviceWebSocket module # ========================================================================= async def handleBotWebSocket(self, websocket: WebSocket, sessionId: str): """Main WebSocket handler — delegates to serviceWebSocket.""" from .serviceWebSocket import handleBotWebSocket as _handleBotWebSocket await _handleBotWebSocket(self, websocket, sessionId) # ========================================================================= # Conversation — delegates to serviceConversation module # ========================================================================= async def _processTranscript(self, *args, **kwargs): from .serviceConversation import _processTranscript await _processTranscript(self, *args, **kwargs) async def _analyzeAndRespond(self, *args, **kwargs): from .serviceConversation import _analyzeAndRespond await _analyzeAndRespond(self, *args, **kwargs) async def _summarizeForVoice(self, sessionId: str, rawAnswer: str) -> str: from .serviceConversation import _summarizeForVoice return await _summarizeForVoice(self, sessionId, rawAnswer) async def _pickQuickAckText(self) -> Optional[str]: from .serviceConversation import _pickQuickAckText return await _pickQuickAckText(self) async def _pickEphemeralPhrase(self, kind: str, substitutions=None) -> Optional[str]: from .serviceConversation import _pickEphemeralPhrase return await _pickEphemeralPhrase(self, kind, substitutions) async def _getEphemeralPhrases(self, kind: str) -> List[str]: from .serviceConversation import _getEphemeralPhrases return await _getEphemeralPhrases(self, kind) async def _generateEphemeralPhrases(self, kind: str, count: int) -> List[str]: from .serviceConversation import _generateEphemeralPhrases return await _generateEphemeralPhrases(self, kind, count) async def _runQuickAck(self, sessionId: str) -> None: from .serviceConversation import _runQuickAck await _runQuickAck(self, sessionId) async def _checkPendingNameTrigger(self, delaySec: float = 3.0): from .serviceConversation import _checkPendingNameTrigger await _checkPendingNameTrigger(self, delaySec) async def _warmEphemeralPhrasePool(self, sessionId: str) -> None: from .serviceConversation import _warmEphemeralPhrasePool await _warmEphemeralPhrasePool(self, sessionId) async def _runEscalationAndRelease(self, *args, **kwargs) -> None: from .serviceConversation import _runEscalationAndRelease await _runEscalationAndRelease(self, *args, **kwargs) # ========================================================================= # Commands — delegates to serviceCommands module # ========================================================================= async def _executeCommands(self, sessionId, commands, voiceInterface, websocket): from .serviceCommands import _executeCommands await _executeCommands(self, sessionId, commands, voiceInterface, websocket) # ========================================================================= # WebSocket helpers (kept on class — used by serviceWebSocket) # ========================================================================= async def _handleBotStatus(self, sessionId, status, errorMessage, interface): from .serviceWebSocket import _handleBotStatus await _handleBotStatus(self, sessionId, status, errorMessage, interface) async def _processAudioChunk(self, *args, **kwargs): from .serviceWebSocket import _processAudioChunk await _processAudioChunk(self, *args, **kwargs) async def _cancelInFlightSpeech(self, sessionId, websocket, reason): from .serviceWebSocket import _cancelInFlightSpeech await _cancelInFlightSpeech(self, sessionId, websocket, reason) # ========================================================================= # Speaker & Trigger detection (kept on class — small, stateful) # ========================================================================= def _registerSpeakerHint(self, speaker: str, text: str, sessionId: str = ""): """Track current speaker from captions for STT attribution.""" if not speaker: return normalizedSpeaker = speaker.strip() if not normalizedSpeaker or self._isBotSpeaker(normalizedSpeaker): return self._lastCaptionSpeaker = normalizedSpeaker self._knownSpeakers.add(normalizedSpeaker) if self._unattributedTranscriptIds: from . import interfaceFeatureTeamsbot as interfaceDb interface = interfaceDb.getInterface(self.currentUser, self.mandateId, self.instanceId) for tid in self._unattributedTranscriptIds: interface.updateTranscript(tid, {"speaker": normalizedSpeaker}) for seg in self._contextBuffer: if seg.get("speaker") == "Unknown" and seg.get("source") == "audioCapture": seg["speaker"] = normalizedSpeaker if self._lastTranscriptSpeaker == "Unknown": self._lastTranscriptSpeaker = normalizedSpeaker logger.info( f"Session {sessionId}: Retroactive speaker attribution: " f"{len(self._unattributedTranscriptIds)} segments -> {normalizedSpeaker}" ) self._unattributedTranscriptIds.clear() if self._pendingNameTrigger: self._pendingNameTrigger["lastActivity"] = time.time() def _resolveSpeakerForAudioCapture(self) -> Dict[str, Any]: """Speaker name for audio chunks — uses the last caption speaker.""" if self._lastCaptionSpeaker: return {"speaker": self._lastCaptionSpeaker, "speakerResolvedFromHint": True} return {"speaker": "Unknown", "speakerResolvedFromHint": False} def _isBotSpeaker(self, speaker: str) -> bool: """Check if a transcript speaker is the bot itself.""" if not speaker: return False speakerLower = speaker.lower().strip() botName = self.config.botName.lower().strip() if botName and botName in speakerLower: return True botAccountEmail = getattr(self, '_botAccountEmail', None) or getattr(self.config, 'botAccountEmail', None) if botAccountEmail: emailPrefix = botAccountEmail.split("@")[0].lower().replace(".", " ") if emailPrefix in speakerLower: return True return False def _shouldTriggerAnalysis(self, transcriptText: str, allowPeriodic: bool = True) -> bool: """Decide whether to trigger AI analysis based on the latest transcript.""" now = time.time() timeSinceLastCall = now - self._lastAiCallTime if timeSinceLastCall < self.config.triggerCooldownSeconds: return False if allowPeriodic and timeSinceLastCall >= self.config.triggerIntervalSeconds: logger.info(f"Trigger: Periodic interval ({self.config.triggerIntervalSeconds}s) elapsed ({timeSinceLastCall:.1f}s)") return True return False def _isStopPhrase(self, text: str) -> bool: """Check if text is an immediate-cancel command from the meeting.""" if not text or len(text.strip()) < 2: return False t = text.strip().lower() words = [w.strip(".,!?:;\"'()[]") for w in t.split() if w.strip()] wordSet = set(words) stopWords = { "stop", "stopp", "halt", "ruhe", "stille", "schweig", "arrete", "quiet", "shut", "silence", "warte", "wait", "moment", "pause", } if wordSet & stopWords: return True if ( "sei still" in t or "be quiet" in t or "shut up" in t or "hold on" in t or "aufhoeren" in t or "aufhören" in t ): return True return False def _makeAnswerCancelHook(self) -> Callable[[], bool]: """Capture the current generation counter and return a cancel predicate.""" snapshot = self._answerGenerationCounter return lambda: self._answerGenerationCounter != snapshot def _detectBotName(self, text: str) -> bool: """Check if text contains the bot's name (exact or phonetically similar).""" botNameLower = self.config.botName.lower() textLower = text.lower() if botNameLower in textLower: return True botFirstName = botNameLower.split()[0] if " " in botNameLower else botNameLower if len(botFirstName) >= 3: for word in textLower.split(): cleanWord = word.strip(".,!?:;\"'()[]") if not cleanWord or len(cleanWord) < 3: continue if cleanWord == botFirstName: return True if cleanWord[0] == botFirstName[0] and abs(len(cleanWord) - len(botFirstName)) <= 2: common = sum(1 for c in set(botFirstName) if c in cleanWord) similarity = common / max(len(set(botFirstName)), len(set(cleanWord))) if similarity >= 0.6: return True return False def _setPendingNameTrigger(self, sessionId, interface, voiceInterface, websocket, triggerTranscript) -> bool: """Set or update a debounced name trigger. Returns True if newly set.""" if self._pendingNameTrigger: self._pendingNameTrigger["lastActivity"] = time.time() return False self._pendingNameTrigger = { "sessionId": sessionId, "interface": interface, "voiceInterface": voiceInterface, "websocket": websocket, "triggerTranscript": triggerTranscript, "detectedAt": time.time(), "lastActivity": time.time(), } return True def _shouldFireQuickAck(self) -> bool: """Centralized gate for quick-ack firing.""" now = time.time() if (now - self._lastQuickAckTs) < _QUICK_ACK_MIN_INTERVAL_SEC: return False if self._aiAnalysisInProgress or self._agentEscalationInFlight: return False channelRaw = self.config.responseChannel channelStr = ( channelRaw.value if hasattr(channelRaw, "value") else str(channelRaw) ).lower().strip() if channelStr not in ("voice", "both"): return False if self.config.responseMode in ( TeamsbotResponseMode.MANUAL, TeamsbotResponseMode.TRANSCRIBE_ONLY, ): return False return True # ========================================================================= # Voice helpers (kept on class) # ========================================================================= _VOICE_DIRECT_MAX_CHARS = 600 _VOICE_SUMMARY_MAX_CHARS = 350 @staticmethod def _looksLikeStructuredText(raw: str) -> bool: """Heuristic: does the original answer have markdown structure?""" if not raw: return False if raw.count("|") >= 4: return True if raw.count("\n#") >= 1: return True if raw.count("\n- ") + raw.count("\n* ") + raw.count("\n• ") >= 3: return True if re.search(r"\n\d+[\.\)]\s", raw): count = len(re.findall(r"(?m)^\s*\d+[\.\)]\s", raw)) if count >= 3: return True return False # ========================================================================= # Director Prompts (private operator instructions during a live meeting) # ========================================================================= def _collectActiveDirectorBriefings(self) -> List[Dict[str, Any]]: """Return the deduplicated list of director-prompt briefings.""" seen: Dict[str, Dict[str, Any]] = {} for p in self._activePersistentPrompts: pid = p.get("id") or "" seen[pid] = { "promptId": pid, "mode": p.get("mode") or "persistent", "text": (p.get("text") or "").strip(), "fileIds": list(p.get("fileIds") or []), "note": (p.get("responseText") or "").strip(), } for b in self._recentDirectorBriefings: pid = b.get("promptId") or "" if pid in seen: if b.get("note"): seen[pid]["note"] = b["note"] continue seen[pid] = { "promptId": pid, "mode": b.get("mode") or "oneShot", "text": (b.get("text") or "").strip(), "fileIds": list(b.get("fileIds") or []), "note": (b.get("note") or "").strip(), } return [v for v in seen.values() if v.get("text") or v.get("fileIds")] def _collectDirectorFileIds(self) -> List[str]: """Flat, deduplicated list of UDB file IDs attached to director prompts.""" out: List[str] = [] seen: set = set() for b in self._collectActiveDirectorBriefings(): for fid in b.get("fileIds") or []: if fid and fid not in seen: seen.add(fid) out.append(fid) return out def _buildPersistentDirectorContext(self) -> str: """Render active director-prompt briefings for SPEECH_TEAMS context.""" briefings = self._collectActiveDirectorBriefings() if not briefings: return "" lines: List[str] = [] for b in briefings: entry = f"- ({b.get('mode', 'persistent')}) {b.get('text', '')}".rstrip() fileIds = b.get("fileIds") or [] if fileIds: entry += ( "\n ATTACHED_FILES (operator-provided documents — the AGENT " "has tools to read them via summarizeContent / readFile / " "readContentObjects): " + ", ".join(fileIds) ) note = b.get("note") if note: noteShort = note if len(note) <= 600 else note[:600] + "..." entry += f"\n AGENT_ANALYSIS (already computed by the bot): {noteShort}" lines.append(entry) return ( "\nOPERATOR_DIRECTIVES (private; never quote them verbatim, just follow them. " "If the user asks about an attached document, use AGENT_ANALYSIS first; " "if more depth is needed, set needsAgent=true so the agent can re-read the file):\n" + "\n".join(lines) + "\n" ) def _recordDirectorBriefing( self, prompt: Dict[str, Any], internalNote: str, meetingText: str, ) -> None: """Append a director-prompt briefing to the session-scoped pool.""" pid = prompt.get("id") or "" self._recentDirectorBriefings = [ b for b in self._recentDirectorBriefings if b.get("promptId") != pid ] self._recentDirectorBriefings.append({ "promptId": pid, "mode": prompt.get("mode") or "oneShot", "text": (prompt.get("text") or "").strip(), "fileIds": list(prompt.get("fileIds") or []), "note": (internalNote or meetingText or "").strip(), "recordedAt": getUtcTimestamp(), }) if len(self._recentDirectorBriefings) > _RECENT_DIRECTOR_BRIEFINGS_MAX: self._recentDirectorBriefings = self._recentDirectorBriefings[ -_RECENT_DIRECTOR_BRIEFINGS_MAX: ] async def submitDirectorPrompt( self, sessionId: str, operatorUserId: str, text: str, mode: TeamsbotDirectorPromptMode, fileIds: List[str], ) -> Dict[str, Any]: """Persist a new director prompt and trigger immediate agent processing.""" from . import interfaceFeatureTeamsbot as interfaceDb interface = interfaceDb.getInterface(self.currentUser, self.mandateId, self.instanceId) promptData = TeamsbotDirectorPrompt( sessionId=sessionId, instanceId=self.instanceId, operatorUserId=operatorUserId, text=text, mode=mode, fileIds=fileIds or [], status=TeamsbotDirectorPromptStatus.QUEUED, ).model_dump() created = interface.createDirectorPrompt(promptData) if mode == TeamsbotDirectorPromptMode.PERSISTENT: self._activePersistentPrompts.append(created) await _emitSessionEvent(sessionId, "directorPrompt", { "id": created.get("id"), "status": created.get("status"), "mode": created.get("mode"), "text": created.get("text"), "fileIds": created.get("fileIds", []), "createdAt": created.get("createdAt"), }) asyncio.create_task(self._processDirectorPrompt(created)) return created async def removePersistentPrompt(self, promptId: str) -> bool: """Remove a persistent director prompt.""" from . import interfaceFeatureTeamsbot as interfaceDb interface = interfaceDb.getInterface(self.currentUser, self.mandateId, self.instanceId) sessionId = self._activeSessionId prompt = interface.getDirectorPrompt(promptId) if not prompt: return False interface.updateDirectorPrompt(promptId, { "status": TeamsbotDirectorPromptStatus.CONSUMED.value, "consumedAt": getUtcTimestamp(), "statusMessage": "Removed by operator", }) self._activePersistentPrompts = [ p for p in self._activePersistentPrompts if p.get("id") != promptId ] self._recentDirectorBriefings = [ b for b in self._recentDirectorBriefings if b.get("promptId") != promptId ] if sessionId: await _emitSessionEvent(sessionId, "directorPrompt", { "id": promptId, "status": TeamsbotDirectorPromptStatus.CONSUMED.value, "mode": prompt.get("mode"), "text": prompt.get("text"), "removed": True, }) return True async def _processDirectorPrompt(self, prompt: Dict[str, Any]) -> None: """Run the agent for a director prompt and deliver the result.""" from . import interfaceFeatureTeamsbot as interfaceDb sessionId = prompt.get("sessionId") promptId = prompt.get("id") interface = interfaceDb.getInterface(self.currentUser, self.mandateId, self.instanceId) interface.updateDirectorPrompt(promptId, { "status": TeamsbotDirectorPromptStatus.RUNNING.value, }) await _emitSessionEvent(sessionId, "directorPrompt", { "id": promptId, "status": TeamsbotDirectorPromptStatus.RUNNING.value, }) recentTranscript = self._renderRecentTranscriptForAgent(maxLines=20) directorText = (prompt.get("text") or "").strip() attachedFileIds = list(prompt.get("fileIds") or []) promptMode = (prompt.get("mode") or "").lower() isPersistentPrompt = promptMode == TeamsbotDirectorPromptMode.PERSISTENT.value.lower() filesBlock = "" if attachedFileIds: filesBlock = ( "\nANGEHAENGTE DOKUMENTE (UDB-File-IDs): " + ", ".join(attachedFileIds) + "\nDu MUSST diese Dokumente VOR der finalen Antwort lesen / zusammenfassen " "(z.B. summarizeContent, readFile, readContentObjects, describeImage). " "Beziehe Fakten und Zitate aus den Dokumenten in deine Notiz / dein " "Meeting-Reply ein, statt allgemein zu antworten.\n" ) persistentNoteHint = "" if isPersistentPrompt and attachedFileIds: persistentNoteHint = ( "\nSPEZIAL fuer PERSISTENT + Dokumente: Wenn die Anweisung KEIN explizites " "Meeting-Statement verlangt, antworte mit 'SILENT:' und liefere als interne " "Notiz eine STRUKTURIERTE, faktendichte Briefing-Zusammenfassung der Dokumente " "(Stichpunkte, Kennzahlen, Aussagen, die fuer Folgefragen im Meeting relevant " "sein koennen). Diese Notiz wird spaeteren Meeting-Antworten als Wissensbasis " "vorgelegt — schreibe sie also so, dass der Bot daraus zitieren kann.\n" ) taskText = ( f"Du bist der KI-Assistent in einem laufenden Teams-Meeting (Bot-Name: {self.config.botName}).\n" f"Der Operator hat dir folgende PRIVATE Regieanweisung gegeben (die anderen Teilnehmer im " f"Meeting sehen sie NICHT). Sie ist KEINE Frage an das Meeting, sondern eine interne " f"Anweisung an dich:\n\n" f"{directorText}\n" f"{filesBlock}" f"{persistentNoteHint}\n" f"AKTUELLER MEETING-KONTEXT (juengste Aussagen):\n{recentTranscript}\n\n" "ANTWORT-PROTOKOLL — Beginne deine FINALE Antwort mit GENAU EINEM dieser Marker:\n" " • 'MEETING_REPLY:' gefolgt vom Text, der im Meeting gesprochen / in den Meeting-Chat " "gepostet werden soll. Verwende diesen Marker NUR, wenn die Regieanweisung dich explizit " "auffordert, jetzt etwas im Meeting zu sagen oder zu schreiben (Beispiele: 'stell dich vor', " "'fasse zusammen', 'stelle Person X eine Frage', 'beantworte die letzte Frage'). Halte den " "Text kurz, sprachlich passend zur Stimme und ohne Marker oder Meta-Kommentare.\n" " • 'SILENT:' gefolgt von einer internen Notiz fuer das Operator-UI. " "Verwende diesen Marker fuer interne Direktiven und Wissens-Briefings (Beispiele: " "'achte ab jetzt auf X', 'merke dir Y', 'studiere Dokument Z'). " "Dieser Text wird NICHT ins Meeting gegeben, dient aber spaeteren Meeting-Antworten " "als Wissensbasis. Wenn Dokumente angehaengt sind, MUSS die Notiz konkrete, " "zitierfaehige Fakten aus den Dokumenten enthalten.\n\n" "Standard ist SILENT, wenn nicht eindeutig zur Meeting-Interaktion aufgefordert wurde. " "Wiederhole NIEMALS die Regieanweisung selbst im MEETING_REPLY-Text." ) try: finalText = await self._runAgentForMeeting( sessionId=sessionId, taskText=taskText, fileIds=attachedFileIds, sourceLabel="directorPrompt", triggerTranscriptId=None, promptId=promptId, directorPromptMode=True, ) isPersistent = prompt.get("mode") == TeamsbotDirectorPromptMode.PERSISTENT.value updates: Dict[str, Any] = { "status": TeamsbotDirectorPromptStatus.SUCCEEDED.value, "responseText": finalText or "", } if not isPersistent: updates["status"] = TeamsbotDirectorPromptStatus.CONSUMED.value updates["consumedAt"] = getUtcTimestamp() interface.updateDirectorPrompt(promptId, updates) await _emitSessionEvent(sessionId, "directorPrompt", { "id": promptId, "status": updates["status"], "responseText": finalText, }) except Exception as e: logger.error( f"Session {sessionId}: Director prompt {promptId} failed: {type(e).__name__}: {e}", exc_info=True, ) interface.updateDirectorPrompt(promptId, { "status": TeamsbotDirectorPromptStatus.FAILED.value, "statusMessage": f"{type(e).__name__}: {str(e)[:300]}", }) await _emitSessionEvent(sessionId, "directorPrompt", { "id": promptId, "status": TeamsbotDirectorPromptStatus.FAILED.value, "error": f"{type(e).__name__}: {str(e)[:300]}", }) self._activePersistentPrompts = [ p for p in self._activePersistentPrompts if p.get("id") != promptId ] def _renderRecentTranscriptForAgent(self, maxLines: int = 20) -> str: """Render the most recent context buffer entries for agent task brief.""" if not self._contextBuffer: return "(noch keine Aussagen erfasst)" recent = self._contextBuffer[-maxLines:] lines = [] for seg in recent: speaker = seg.get("speaker", "Unknown") text = seg.get("text", "") segSource = seg.get("source", "caption") prefix = "Chat: " if segSource == "chat" else "" if self._isBotSpeaker(speaker): lines.append(f"[YOU ({self.config.botName})]: {text}") else: lines.append(f"[{prefix}{speaker}]: {text}") return "\n".join(lines) async def _interimAgentBusyMessage(self) -> Optional[str]: """Short spoken/chat line before a potentially long agent run.""" return await self._pickEphemeralPhrase("agentBusy") async def _interimAgentRoundMessage( self, lastToolLabel: Optional[str] = None ) -> Optional[str]: """Per-round progress notice for long agent runs.""" targetLang = (self.config.language or "").strip() or "en-US" botName = (self.config.botName or "the assistant").strip() activityHint = lastToolLabel or "working on the task" prompt = ( f"You are a meeting assistant named '{botName}'.\n" f"Target spoken language (BCP-47): {targetLang}\n\n" f"The assistant is currently busy with: {activityHint}\n\n" f"Generate ONE short sentence (max 12 words) in {targetLang} " f"that tells the audience what the assistant is doing right now. " f"Natural, spoken style. No step numbers. No quotes around the output.\n" f"Output ONLY the sentence, nothing else." ) try: aiService = createAiService( self.currentUser, self.mandateId, self.instanceId ) await aiService.ensureAiObjectsInitialized() request = AiCallRequest( prompt=prompt, context="", options=AiCallOptions( operationType=OperationTypeEnum.DATA_ANALYSE, priority=PriorityEnum.SPEED, ), ) response = await aiService.callAi(request) except Exception as aiErr: logger.debug(f"Agent round phrase generation failed: {aiErr}") return None if not response or response.errorCount != 0 or not response.content: return None result = response.content.strip().strip('"').strip("'") if len(result) > 200: result = result[:200] return result async def _notifyMeetingEphemeral(self, sessionId: str, text: str) -> None: """Deliver a short line to the meeting without persisting botResponses.""" websocket = self._websocket voiceInterface = self._voiceInterface if not websocket: logger.warning(f"Session {sessionId}: Interim notice skipped — no WebSocket") return channelRaw = self.config.responseChannel channelStr = ( channelRaw.value if hasattr(channelRaw, "value") else str(channelRaw) ).lower().strip() sendVoice = channelStr in ("voice", "both") sendChat = channelStr in ("chat", "both") if sendVoice and voiceInterface: cancelHook = self._makeAnswerCancelHook() async with self._meetingTtsLock: outcome = await _speakTextChunked( websocket=websocket, voiceInterface=voiceInterface, sessionId=sessionId, voiceText=_voiceFriendlyMeetingText(text), languageCode=self.config.language, voiceName=self.config.voiceId, isCancelled=cancelHook, ) if not outcome.get("success"): logger.warning( f"Session {sessionId}: Interim TTS failed ({outcome.get('error')}) — falling back to chat" ) if not sendChat: sendChat = True if sendChat: try: await websocket.send_text(json.dumps({ "type": "sendChatMessage", "sessionId": sessionId, "text": text, })) except Exception as chatErr: logger.warning(f"Session {sessionId}: Interim chat failed: {chatErr}") await _emitSessionEvent(sessionId, "agentRun", { "status": "interimNotice", "message": text, "timestamp": getUtcTimestamp(), }) async def _runAgentForMeeting( self, sessionId: str, taskText: str, fileIds: List[str], sourceLabel: str, triggerTranscriptId: Optional[str] = None, promptId: Optional[str] = None, directorPromptMode: bool = False, ) -> str: """Run agentService.runAgent for a meeting context.""" from modules.serviceCenter.services.serviceAgent.datamodelAgent import ( AgentConfig, AgentEventTypeEnum ) ctx = ServiceCenterContext( user=self.currentUser, mandate_id=self.mandateId, feature_instance_id=self.instanceId, feature_code="teamsbot", ) agentService = _getServiceCenterService("agent", ctx) workflowId = f"teamsbot:{sessionId}" agentConfig = AgentConfig( maxRounds=TEAMSBOT_AGENT_MAX_ROUNDS, maxCostCHF=TEAMSBOT_AGENT_MAX_COST_CHF, toolSet="core", initialToolboxes=["core", "web"], excludeActionTools=True, ) await _emitSessionEvent(sessionId, "agentRun", { "source": sourceLabel, "promptId": promptId, "status": "started", "timestamp": getUtcTimestamp(), }) if not directorPromptMode: try: interimText = await self._interimAgentBusyMessage() if interimText: await self._notifyMeetingEphemeral(sessionId, interimText) except Exception as interimErr: logger.warning(f"Session {sessionId}: Interim agent notice failed: {interimErr}") finalText: str = "" rounds = 0 lastToolLabel: Optional[str] = None try: async for event in agentService.runAgent( prompt=taskText, fileIds=fileIds or None, config=agentConfig, toolSet="core", workflowId=workflowId, ): if event.type == AgentEventTypeEnum.AGENT_PROGRESS: rounds += 1 pdata = event.data or {} roundNum = int(pdata.get("round", rounds)) maxR = int(pdata.get("maxRounds", TEAMSBOT_AGENT_MAX_ROUNDS)) await _emitSessionEvent(sessionId, "agentRun", { "source": sourceLabel, "promptId": promptId, "status": "progress", "round": roundNum, "maxRounds": maxR, }) if roundNum >= 2 and not directorPromptMode: try: roundText = await self._interimAgentRoundMessage(lastToolLabel) if roundText: await self._notifyMeetingEphemeral(sessionId, roundText) except Exception as roundNoticeErr: logger.warning( f"Session {sessionId}: Per-round agent notice failed: {roundNoticeErr}" ) elif event.type == AgentEventTypeEnum.TOOL_CALL: evtData = event.data or {} toolName = evtData.get("toolName") lastToolLabel = evtData.get("displayLabel") await _emitSessionEvent(sessionId, "agentRun", { "source": sourceLabel, "promptId": promptId, "status": "toolCall", "toolName": toolName, "displayLabel": lastToolLabel, }) elif event.type == AgentEventTypeEnum.TOOL_RESULT: evtData = event.data or {} resultSnippet = (evtData.get("data") or "")[:200] await _emitSessionEvent(sessionId, "agentRun", { "source": sourceLabel, "promptId": promptId, "status": "toolResult", "toolName": evtData.get("toolName", ""), "success": evtData.get("success", True), "summary": resultSnippet, }) elif event.type == AgentEventTypeEnum.FILE_CREATED: await _emitSessionEvent(sessionId, "documentCreated", event.data or {}) elif event.type == AgentEventTypeEnum.FINAL: finalText = (event.content or "").strip() elif event.type == AgentEventTypeEnum.ERROR: raise RuntimeError(event.content or "Agent error") except Exception as runErr: await _emitSessionEvent(sessionId, "agentRun", { "source": sourceLabel, "promptId": promptId, "status": "error", "error": str(runErr)[:500], }) raise await _emitSessionEvent(sessionId, "agentRun", { "source": sourceLabel, "promptId": promptId, "status": "completed", "rounds": rounds, "hasText": bool(finalText), }) if finalText: if directorPromptMode: decision = _parseDirectorPromptFinal(finalText) kind = decision.get("kind", "silent") meetingText = (decision.get("meetingText") or "").strip() internalNote = (decision.get("internalNote") or "").strip() logger.info( f"Session {sessionId}: Director prompt {promptId} -> kind={kind}, " f"meetingChars={len(meetingText)}, noteChars={len(internalNote)}" ) await _emitSessionEvent(sessionId, "directorPrompt", { "id": promptId, "status": "decision", "decision": kind, "meetingText": meetingText, "internalNote": internalNote, }) try: promptRecord: Dict[str, Any] = {} if promptId: try: from . import interfaceFeatureTeamsbot as _ifaceDb _iface = _ifaceDb.getInterface( self.currentUser, self.mandateId, self.instanceId ) promptRecord = _iface.getDirectorPrompt(promptId) or {} except Exception as _lookupErr: logger.debug( f"Briefing pool: could not look up prompt {promptId}: {_lookupErr}" ) if promptRecord or promptId: self._recordDirectorBriefing( prompt=promptRecord or {"id": promptId}, internalNote=internalNote, meetingText=meetingText, ) except Exception as briefErr: logger.warning( f"Session {sessionId}: Director briefing pool update failed: {briefErr}" ) if promptId: for p in self._activePersistentPrompts: if p.get("id") == promptId: p["responseText"] = internalNote or meetingText or finalText break if kind == "meeting" and meetingText: await self._deliverTextToMeeting( sessionId=sessionId, text=meetingText, detectedIntent=f"agent:{sourceLabel}", reasoning=f"Agent run ({sourceLabel})", triggerTranscriptId=triggerTranscriptId, ) else: await self._persistInternalDirectorReply( sessionId=sessionId, internalNote=internalNote or finalText, promptId=promptId, triggerTranscriptId=triggerTranscriptId, ) return meetingText if kind == "meeting" else "" await self._deliverTextToMeeting( sessionId=sessionId, text=finalText, detectedIntent=f"agent:{sourceLabel}", reasoning=f"Agent run ({sourceLabel})", triggerTranscriptId=triggerTranscriptId, ) return finalText async def _deliverTextToMeeting( self, sessionId: str, text: str, detectedIntent: str, reasoning: str, triggerTranscriptId: Optional[str] = None, ) -> None: """Send agent text into the meeting via TTS + chat per config.""" from . import interfaceFeatureTeamsbot as interfaceDb interface = interfaceDb.getInterface(self.currentUser, self.mandateId, self.instanceId) websocket = self._websocket voiceInterface = self._voiceInterface channelRaw = self.config.responseChannel channelStr = ( channelRaw.value if hasattr(channelRaw, "value") else str(channelRaw) ).lower().strip() sendVoice = channelStr in ("voice", "both") sendChat = channelStr in ("chat", "both") if sendVoice and sendChat: responseType = TeamsbotResponseType.BOTH elif sendVoice: responseType = TeamsbotResponseType.AUDIO else: responseType = TeamsbotResponseType.CHAT ttsOutcome: Optional[Dict[str, Any]] = None if sendVoice and voiceInterface and websocket: from .serviceConversation import _summarizeForVoice spokenText = await _summarizeForVoice(self, sessionId, text) cancelHook = self._makeAnswerCancelHook() async with self._meetingTtsLock: ttsOutcome = await _speakTextChunked( websocket=websocket, voiceInterface=voiceInterface, sessionId=sessionId, voiceText=spokenText, languageCode=self.config.language, voiceName=self.config.voiceId, isCancelled=cancelHook, ) await _emitSessionEvent(sessionId, "ttsDeliveryStatus", { "status": "dispatched" if ttsOutcome.get("success") else "failed", "hasWebSocket": True, "chunks": ttsOutcome.get("chunks"), "played": ttsOutcome.get("played"), "error": ttsOutcome.get("error"), "timestamp": getUtcTimestamp(), }) if not ttsOutcome.get("success"): logger.warning( f"Session {sessionId}: Agent TTS delivery failed " f"({ttsOutcome.get('error')}) — falling back to meeting chat" ) if not sendChat: sendChat = True if sendChat and websocket: try: await websocket.send_text(json.dumps({ "type": "sendChatMessage", "sessionId": sessionId, "text": text, })) logger.info(f"Session {sessionId}: Agent chat dispatched ({len(text)} chars)") except Exception as chatErr: logger.warning(f"Session {sessionId}: Agent chat delivery failed: {chatErr}") intentEnum, intentMeta = _coercePersistedDetectedIntent(detectedIntent) reasoningForDb = ( f"{reasoning} [{intentMeta}]" if intentMeta else reasoning ) botResponseData = TeamsbotBotResponse( sessionId=sessionId, responseText=text, responseType=responseType, detectedIntent=intentEnum, reasoning=reasoningForDb, triggeredByTranscriptId=triggerTranscriptId, modelName="agent", processingTime=0.0, priceCHF=0.0, timestamp=getUtcTimestamp(), ).model_dump() createdResponse = interface.createBotResponse(botResponseData) await _emitSessionEvent(sessionId, "botResponse", { "id": createdResponse.get("id"), "responseText": text, "responseType": responseType.value, "detectedIntent": intentEnum.value, "reasoning": reasoningForDb, "modelName": "agent", "processingTime": 0.0, "priceCHF": 0.0, "timestamp": botResponseData.get("timestamp"), }) botTranscriptData = TeamsbotTranscript( sessionId=sessionId, speaker=self.config.botName, text=text, timestamp=getUtcTimestamp(), confidence=1.0, language=self.config.language, isFinal=True, source="botResponse", ).model_dump() botTranscript = interface.createTranscript(botTranscriptData) self._contextBuffer.append({ "speaker": self.config.botName, "text": text, "timestamp": getUtcTimestamp(), "source": "botResponse", }) self._lastTranscriptSpeaker = self.config.botName self._lastTranscriptText = text self._lastTranscriptId = botTranscript.get("id") self._lastBotResponseText = text.strip().lower() self._lastBotResponseTs = time.time() self._followUpWindowEnd = time.time() + 15.0 await _emitSessionEvent(sessionId, "transcript", { "id": botTranscript.get("id"), "speaker": self.config.botName, "text": text, "confidence": 1.0, "timestamp": getUtcTimestamp(), "isContinuation": False, "source": "botResponse", "speakerResolvedFromHint": False, }) session = interface.getSession(sessionId) if session: count = session.get("botResponseCount", 0) + 1 interface.updateSession(sessionId, {"botResponseCount": count}) async def _persistInternalDirectorReply( self, sessionId: str, internalNote: str, promptId: Optional[str], triggerTranscriptId: Optional[str] = None, ) -> None: """Record a director-prompt agent reply as INTERNAL (operator-UI only).""" from . import interfaceFeatureTeamsbot as interfaceDb note = (internalNote or "").strip() if not note: return interface = interfaceDb.getInterface(self.currentUser, self.mandateId, self.instanceId) intentEnum, _intentMeta = _coercePersistedDetectedIntent("agent:directorPrompt") reasoningForDb = ( f"Director prompt {promptId or ''} — silent / internal only " f"(not sent to meeting)" ).strip() botResponseData = TeamsbotBotResponse( sessionId=sessionId, responseText=note, responseType=TeamsbotResponseType.CHAT, detectedIntent=intentEnum, reasoning=reasoningForDb, triggeredByTranscriptId=triggerTranscriptId, modelName="agent", processingTime=0.0, priceCHF=0.0, timestamp=getUtcTimestamp(), ).model_dump() createdResponse = interface.createBotResponse(botResponseData) await _emitSessionEvent(sessionId, "botResponse", { "id": createdResponse.get("id"), "responseText": note, "responseType": TeamsbotResponseType.CHAT.value, "detectedIntent": intentEnum.value, "reasoning": reasoningForDb, "modelName": "agent", "processingTime": 0.0, "priceCHF": 0.0, "timestamp": botResponseData.get("timestamp"), "internalOnly": True, "promptId": promptId, }) logger.info( f"Session {sessionId}: Director prompt {promptId} silent reply " f"persisted internally ({len(note)} chars)" ) # ========================================================================= # Greeting (AI-localised) # ========================================================================= async def _generateGreetingText(self, languageCode: str) -> str: """Generate the bot's join greeting via AI.""" targetLang = (languageCode or self.config.language or "").strip() or "en-US" botName = (self.config.botName or "the assistant").strip() firstName = botName.split(" ")[0] if botName else botName persona = (self.config.aiSystemPrompt or "").strip() prompt = ( f"You are localizing the join greeting for a meeting assistant.\n\n" f"Assistant display name (use exactly this, no translation): {firstName}\n\n" f"Persona / style guide for the assistant:\n" f"{persona or '(no persona configured — use a neutral, polite, professional tone)'}\n\n" f"Target spoken language (BCP-47 code): {targetLang}\n\n" f"Generate ONE short greeting (max ~14 words) for the assistant " f"to say AND post in chat the moment it joins a meeting. The " f"greeting MUST:\n" f" - be in the target language\n" f" - introduce the assistant by name ({firstName})\n" f" - signal that it is now present and ready\n" f" - sound natural when spoken aloud (this text is also TTS'd)\n\n" f"Output ONLY the greeting text, no quotes, no markdown, no " f"commentary, no surrounding punctuation beyond what naturally " f"belongs to the sentence." ) try: aiService = createAiService( self.currentUser, self.mandateId, self.instanceId ) await aiService.ensureAiObjectsInitialized() request = AiCallRequest( prompt=prompt, context="", options=AiCallOptions( operationType=OperationTypeEnum.DATA_ANALYSE, priority=PriorityEnum.SPEED, ), ) response = await aiService.callAi(request) except Exception as aiErr: logger.warning( f"Greeting generation crashed (lang={targetLang}): {aiErr}" ) return "" if not response or response.errorCount != 0 or not response.content: logger.warning( f"Greeting generation returned empty/error (lang={targetLang})" ) return "" text = response.content.strip() text = re.sub(r"^```.*?\n", "", text, flags=re.DOTALL) text = re.sub(r"\n```\s*$", "", text) text = text.strip().strip("\"'`").strip() if not text: return "" logger.info( f"Greeting generated (lang={targetLang}, chars={len(text)}): {text[:80]}" ) return text async def _dispatchGreetingToMeeting( self, sessionId: str, greetingText: str, greetingLang: str, sendToChat: bool, interface: Any, voiceInterface: Any, websocket: WebSocket, ) -> None: """Dispatch the bot's join greeting (TTS + optional chat).""" try: await _emitSessionEvent(sessionId, "ttsDeliveryStatus", { "status": "requested", "hasWebSocket": True, "message": "Greeting TTS requested", "timestamp": getUtcTimestamp(), }) cancelHook = self._makeAnswerCancelHook() async with self._meetingTtsLock: ttsOutcome = await _speakTextChunked( websocket=websocket, voiceInterface=voiceInterface, sessionId=sessionId, voiceText=_voiceFriendlyMeetingText(greetingText), languageCode=greetingLang, voiceName=self.config.voiceId, isCancelled=cancelHook, ) if ttsOutcome.get("success"): logger.info( f"Greeting TTS sent for session {sessionId} " f"(chunks={ttsOutcome.get('chunks')})" ) await _emitSessionEvent(sessionId, "ttsDeliveryStatus", { "status": "dispatched", "hasWebSocket": True, "chunks": ttsOutcome.get("chunks"), "played": ttsOutcome.get("played"), "timestamp": getUtcTimestamp(), }) else: logger.warning( f"Greeting TTS failed for session {sessionId}: {ttsOutcome.get('error')}" ) await _emitSessionEvent(sessionId, "ttsDeliveryStatus", { "status": "failed", "hasWebSocket": True, "message": ttsOutcome.get("error"), "timestamp": getUtcTimestamp(), }) if sendToChat: try: await websocket.send_text(json.dumps({ "type": "sendChatMessage", "sessionId": sessionId, "text": greetingText, })) logger.info(f"Greeting chat dispatch queued for session {sessionId}") except Exception as chatErr: logger.warning( f"Greeting chat dispatch failed for session {sessionId}: {chatErr}" ) greetingTranscriptData = TeamsbotTranscript( sessionId=sessionId, speaker=self.config.botName, text=greetingText, timestamp=getUtcTimestamp(), confidence=1.0, language=greetingLang, isFinal=True, source="botResponse", ).model_dump() greetingTranscript = interface.createTranscript(greetingTranscriptData) self._contextBuffer.append({ "speaker": self.config.botName, "text": greetingText, "timestamp": getUtcTimestamp(), "source": "botResponse", }) self._lastTranscriptSpeaker = self.config.botName self._lastTranscriptText = greetingText self._lastTranscriptId = greetingTranscript.get("id") await _emitSessionEvent(sessionId, "botResponse", { "id": greetingTranscript.get("id"), "responseText": greetingText, "responseType": TeamsbotResponseType.AUDIO.value, "detectedIntent": "greeting", "reasoning": "Automatic join greeting", "timestamp": getUtcTimestamp(), }) await _emitSessionEvent(sessionId, "transcript", { "id": greetingTranscript.get("id"), "speaker": self.config.botName, "text": greetingText, "confidence": 1.0, "timestamp": getUtcTimestamp(), "isContinuation": False, "source": "botResponse", "speakerResolvedFromHint": False, }) except Exception as dispatchErr: logger.warning( f"Greeting dispatch failed for session {sessionId}: {dispatchErr}" ) # ========================================================================= # Context Summarization # ========================================================================= async def _summarizeSessionContext(self, sessionId: str, rawContext: str) -> str: """Summarize a long user-provided session context.""" try: aiService = createAiService(self.currentUser, self.mandateId, self.instanceId) await aiService.ensureAiObjectsInitialized() request = AiCallRequest( prompt=( "Fasse den folgenden Kontext auf die wesentlichen Punkte zusammen. " "Behalte alle wichtigen Fakten, Namen, Zahlen, Entscheidungen und Aktionspunkte. " "Entferne Fuelltext und Wiederholungen. " "Antworte NUR mit der Zusammenfassung, keine Erklaerungen oder Einleitungen." ), context=rawContext, options=AiCallOptions( operationType=OperationTypeEnum.DATA_ANALYSE, priority=PriorityEnum.SPEED, ) ) response = await aiService.callAi(request) if response and response.errorCount == 0 and response.content: summary = response.content.strip() logger.info(f"Session {sessionId}: Context summarized from {len(rawContext)} to {len(summary)} chars") return summary except Exception as e: logger.warning(f"Session context summarization failed for {sessionId}: {e}") return rawContext[:2000] if len(rawContext) > 2000 else rawContext async def _summarizeContextBuffer(self, sessionId: str): """Summarize the older part of the context buffer.""" try: if self._contextSummary: return halfPoint = len(self._contextBuffer) // 2 oldSegments = self._contextBuffer[:halfPoint] if len(oldSegments) < 10: return lines = [] for seg in oldSegments: speaker = seg.get("speaker", "Unknown") text = seg.get("text", "") lines.append(f"[{speaker}]: {text}") textToSummarize = "\n".join(lines) aiService = createAiService(self.currentUser, self.mandateId, self.instanceId) await aiService.ensureAiObjectsInitialized() request = AiCallRequest( prompt="Fasse das folgende Meeting-Transkript in 3-5 Saetzen zusammen. Nenne die wichtigsten Themen, Entscheidungen und offene Fragen. Antworte NUR mit der Zusammenfassung, keine Erklaerungen.", context=textToSummarize, options=AiCallOptions( operationType=OperationTypeEnum.DATA_ANALYSE, priority=PriorityEnum.SPEED, ) ) response = await aiService.callAi(request) if response and response.errorCount == 0: self._contextSummary = response.content.strip() logger.info(f"Session {sessionId}: Context summarized ({len(oldSegments)} segments -> {len(self._contextSummary)} chars)") except Exception as e: logger.warning(f"Context summarization failed for session {sessionId}: {e}") # ========================================================================= # Meeting Summary # ========================================================================= async def _generateMeetingSummary(self, sessionId: str): """Generate an AI summary of the meeting after it ends.""" try: from . import interfaceFeatureTeamsbot as interfaceDb interface = interfaceDb.getInterface(self.currentUser, self.mandateId, self.instanceId) transcripts = interface.getTranscripts(sessionId) if not transcripts or len(transcripts) < 5: return fullTranscript = "\n".join( f"[{t.get('speaker', 'Unknown')}]: {t.get('text', '')}" for t in transcripts ) aiService = createAiService(self.currentUser, self.mandateId, self.instanceId) await aiService.ensureAiObjectsInitialized() request = AiCallRequest( prompt="Erstelle eine kurze Zusammenfassung dieses Meeting-Transkripts. Nenne die wichtigsten Punkte, Entscheidungen und offene Aktionspunkte.", context=fullTranscript, options=AiCallOptions( operationType=OperationTypeEnum.DATA_ANALYSE, priority=PriorityEnum.BALANCED, ) ) response = await aiService.callAi(request) if response and response.errorCount == 0: interface.updateSession(sessionId, {"summary": response.content}) logger.info(f"Meeting summary generated for session {sessionId}") except Exception as e: logger.error(f"Failed to generate meeting summary for session {sessionId}: {e}")