platform-core/modules/features/teamsbot/service.py
ValueOn AG 26dd8f6f3f
Some checks failed
Deploy Plattform-Core (Int) / test (push) Failing after 12s
Deploy Plattform-Core (Int) / deploy (push) Has been skipped
cleanup intra referencings in codebase
2026-06-09 07:05:06 +02:00

1993 lines
82 KiB
Python

# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Teamsbot Service - Pipeline Orchestrator.
Manages the audio processing pipeline: STT -> Context Buffer -> SPEECH_TEAMS -> TTS -> Bridge.
"""
import logging
import json
import re
import asyncio
import time
import base64
from datetime import datetime, timezone
from typing import Optional, Dict, Any, List, Callable
from fastapi import WebSocket
from modules.datamodels.datamodelUam import User
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum
from modules.shared.timeUtils import getUtcTimestamp
from modules.serviceCenter import getService as _getServiceCenterService
from modules.serviceCenter.context import ServiceCenterContext
from .datamodelTeamsbot import (
TeamsbotSessionStatus,
TeamsbotTranscript,
TeamsbotBotResponse,
TeamsbotResponseType,
TeamsbotConfig,
TeamsbotResponseMode,
TeamsbotResponseChannel,
TeamsbotDetectedIntent,
SpeechTeamsResponse,
TeamsbotCommand,
TeamsbotDirectorPrompt,
TeamsbotDirectorPromptStatus,
TeamsbotDirectorPromptMode,
DIRECTOR_PROMPT_TEXT_LIMIT,
DIRECTOR_PROMPT_FILE_LIMIT,
)
from .browserBotConnector import BrowserBotConnector
logger = logging.getLogger(__name__)
# Agent run limits for director prompts / speech escalation (meeting context).
TEAMSBOT_AGENT_MAX_ROUNDS = 8
TEAMSBOT_AGENT_MAX_COST_CHF = 0.12
# How many recent director-prompt briefings we keep in session memory.
_RECENT_DIRECTOR_BRIEFINGS_MAX = 6
# Quick-ack throttle interval.
_QUICK_ACK_MIN_INTERVAL_SEC = 25.0
# Number of phrase variants we generate per kind.
_EPHEMERAL_PHRASE_VARIANTS = 4
# Localisation INTENTS for ephemeral phrases.
_EPHEMERAL_PHRASE_INTENTS: Dict[str, str] = {
"quickAck": (
"Very short verbal acknowledgment (1 to 4 words) the assistant says "
"the moment its name is recognised, BEFORE it has formulated a full "
"answer. The intent is purely 'I heard you, I'm thinking' — natural, "
"conversational, never a complete sentence."
),
"agentBusy": (
"One short sentence (max ~12 words) the assistant says BEFORE starting "
"a longer research / tool-call task, so the audience knows the answer "
"will take a few seconds. Polite, professional, calm."
),
"agentRound": (
"One short sentence (max ~14 words) the assistant says BETWEEN rounds "
"of a longer agent task to update the audience on what it is doing. "
"Include the placeholder token '{activity}' which will be filled with "
"the current activity — e.g. 'I am {activity}, one moment...' or "
"'Currently {activity}, almost there...'. Do NOT include step numbers."
),
}
# =========================================================================
# Module-level utility functions (used by extracted modules too)
# =========================================================================
def _voiceLineLooksLikeBillingOrMeta(line: str) -> bool:
"""Heuristic: trailing lines that are separators or billing/usage footers."""
s = line.strip()
if not s:
return True
lower = s.lower()
if re.match(r"^[-=*_]{3,}\s*$", s):
return True
if re.match(r"^#{1,6}\s*(usage|billing|costs?|meta|technical|statistics)\b", lower):
return True
if "chf" in lower and re.search(r"\d", s):
if re.search(
r"\b(total|usage|cost|billing|token|spent|used|price|estimate|"
r"rounds?|calls?|duration|processing\s*time|model\s*calls?)\b",
lower,
):
return True
if "token" in lower and re.search(r"\d", s):
if re.search(r"\b(total|usage|prompt|completion)\b", lower):
return True
pl = lower.replace(" ", "")
if "progressafter" in pl and ("aicalls:" in pl or "toolcalls:" in pl):
return True
return False
_EMOJI_PATTERN = re.compile(
"["
"\U0001F300-\U0001FAFF"
"\U00002600-\U000027BF"
"\U0001F1E6-\U0001F1FF"
"\U00002B00-\U00002BFF"
"\U0001F900-\U0001F9FF"
"\U0000FE0F"
"]+",
flags=re.UNICODE,
)
def _voiceFriendlyMeetingText(raw: str) -> str:
"""Sanitise a chat/markdown response so it can be SPOKEN naturally."""
if not raw:
return ""
low = raw.lower()
if "maximum rounds reached" in low:
m = re.search(r"(?is)maximum\s+rounds\s+reached", raw)
if m:
head = raw[: m.start()].strip()
raw = head or (
"Die Abklaerung brauchte mehr Schritte als vorgesehen; Details stehen im Chat."
)
if "budget exceeded" in low:
m = re.search(r"(?is)budget\s+exceeded", raw)
if m:
head = raw[: m.start()].strip()
raw = head or "Das eingestellte Kostenlimit ist erreicht; Details stehen im Chat."
lines = raw.strip().split("\n")
while lines and _voiceLineLooksLikeBillingOrMeta(lines[-1]):
lines.pop()
t = "\n".join(lines).strip()
if not t:
t = raw.strip()
t = re.sub(r"```[\s\S]*?```", " ", t)
t = re.sub(r"`([^`]+)`", r"\1", t)
cleanedLines: List[str] = []
for ln in t.split("\n"):
stripped = ln.strip()
if stripped.count("|") >= 2:
continue
if re.fullmatch(r"\s*\|?[\s\-:|]+\|?\s*", stripped) and "-" in stripped:
continue
cleanedLines.append(ln)
t = "\n".join(cleanedLines)
t = re.sub(r"(?m)^\s*([-*_])\s*\1\s*\1[\s\1]*$", "", t)
t = re.sub(r"(?m)^\s*#{1,6}\s+", "", t)
t = re.sub(r"(?m)^\s*[-*•·]\s+", "", t)
t = re.sub(r"(?m)^\s*\d+[\.\)]\s+", "", t)
t = re.sub(r"\*\*([^*]+)\*\*", r"\1", t)
t = re.sub(r"\*([^*\n]+)\*", r"\1", t)
t = re.sub(r"__([^_]+)__", r"\1", t)
t = re.sub(r"(?<![A-Za-z0-9])_([^_\n]+)_(?![A-Za-z0-9])", r"\1", t)
t = re.sub(r"\[([^\]]+)\]\([^)]+\)", r"\1", t)
t = _EMOJI_PATTERN.sub("", t)
t = re.sub(r"\s+[\u2013\u2014\u2212]\s+", ", ", t)
t = re.sub(r"[\u2013\u2014\u2212]", "-", t)
t = re.sub(r"[\u201C\u201D\u201E\u201F\u00AB\u00BB\u2039\u203A]", "", t)
t = re.sub(r"[\u2018\u2019\u201A\u201B]", "'", t)
t = re.sub(r"[\u2190-\u23FF]+", " ", t)
t = re.sub(r"[\u00A1-\u00BF\u00D7\u00F7\u2122]+", " ", t)
t = re.sub(r"[*#~^=+|\\<>{}\[\]()_&@$%`]+", " ", t)
t = t.replace('"', "")
t = re.sub(r"(?<=\w)\s*/\s*(?=\w)", " oder ", t)
t = t.replace("/", " ")
t = re.sub(r"([\.,;:!\?])\1{1,}", r"\1", t)
t = re.sub(r"\s+([\.,;:!\?])", r"\1", t)
t = re.sub(r":\s*$", ".", t.rstrip())
t = re.sub(r"\s+:\s*$", ":", t, flags=re.MULTILINE)
paragraphs = [p.strip() for p in re.split(r"\n\s*\n", t) if p.strip()]
rebuilt: List[str] = []
for p in paragraphs:
p = re.sub(r"\s+", " ", p).strip()
if not p:
continue
if not re.search(r"[\.!\?\u2026:]\s*$", p):
p = p.rstrip() + "."
rebuilt.append(p)
t = " ".join(rebuilt)
t = re.sub(r"\s+", " ", t).strip()
return t
_TTS_MAX_CHUNK_CHARS = 800
def _splitTextForTts(text: str, maxChars: int = _TTS_MAX_CHUNK_CHARS) -> List[str]:
"""Split a long voice line into TTS-safe chunks at sentence/paragraph boundaries."""
cleaned = (text or "").strip()
if not cleaned:
return []
if len(cleaned) <= maxChars:
return [cleaned]
sentencePattern = re.compile(r"(?<=[\.!\?\u2026])\s+|\n+")
rawSentences = [s.strip() for s in sentencePattern.split(cleaned) if s and s.strip()]
if not rawSentences:
rawSentences = [cleaned]
chunks: List[str] = []
buffer = ""
for sentence in rawSentences:
if len(sentence) > maxChars:
if buffer:
chunks.append(buffer.strip())
buffer = ""
words = sentence.split(" ")
current = ""
for word in words:
candidate = (current + " " + word).strip() if current else word
if len(candidate) > maxChars and current:
chunks.append(current.strip())
current = word
else:
current = candidate
if current:
if not re.search(r"[\.!\?\u2026]\s*$", current):
current = current.rstrip() + "."
chunks.append(current.strip())
continue
candidate = (buffer + " " + sentence).strip() if buffer else sentence
if len(candidate) > maxChars and buffer:
chunks.append(buffer.strip())
buffer = sentence
else:
buffer = candidate
if buffer:
chunks.append(buffer.strip())
finalized: List[str] = []
for c in chunks:
if not c:
continue
if not re.search(r"[\.!\?\u2026]\s*$", c):
c = c.rstrip() + "."
finalized.append(c)
return finalized
async def _speakTextChunked(
websocket: Optional[WebSocket],
voiceInterface: Any,
sessionId: str,
voiceText: str,
languageCode: str,
voiceName: Optional[str],
isCancelled: Optional[Callable[[], bool]] = None,
) -> Dict[str, Any]:
"""Run TTS in chunks and dispatch each ``playAudio`` over the websocket."""
chunks = _splitTextForTts(voiceText)
result: Dict[str, Any] = {"success": False, "chunks": len(chunks), "played": 0, "error": None, "cancelled": False}
if not chunks:
result["error"] = "no text"
return result
if voiceInterface is None:
result["error"] = "no voice interface"
return result
lastError: Optional[str] = None
for idx, chunk in enumerate(chunks, start=1):
if isCancelled is not None and isCancelled():
result["cancelled"] = True
logger.info(
f"Session {sessionId}: TTS chunk loop cancelled before chunk "
f"{idx}/{len(chunks)} (user stop or newer answer in flight)"
)
break
try:
ttsResult = await voiceInterface.textToSpeech(
text=chunk,
languageCode=languageCode,
voiceName=voiceName,
)
except Exception as ttsErr:
lastError = f"chunk {idx}/{len(chunks)} raised: {ttsErr}"
logger.warning(f"Session {sessionId}: TTS {lastError}")
continue
if not isinstance(ttsResult, dict) or ttsResult.get("success") is False:
err = (ttsResult or {}).get("error", "unknown") if isinstance(ttsResult, dict) else "no result"
lastError = f"chunk {idx}/{len(chunks)} failed: {err}"
logger.warning(f"Session {sessionId}: TTS {lastError}")
continue
audioContent = ttsResult.get("audioContent")
if not audioContent:
lastError = f"chunk {idx}/{len(chunks)} returned no audioContent"
logger.warning(f"Session {sessionId}: TTS {lastError}")
continue
if websocket is None:
lastError = "websocket unavailable"
break
if isCancelled is not None and isCancelled():
result["cancelled"] = True
logger.info(
f"Session {sessionId}: TTS chunk loop cancelled before "
f"sending chunk {idx}/{len(chunks)} (audio dropped)"
)
break
try:
await websocket.send_text(json.dumps({
"type": "playAudio",
"sessionId": sessionId,
"audio": {
"data": base64.b64encode(
audioContent if isinstance(audioContent, bytes) else audioContent.encode()
).decode(),
"format": "mp3",
},
}))
result["played"] += 1
except Exception as wsErr:
lastError = f"chunk {idx}/{len(chunks)} websocket send failed: {wsErr}"
logger.warning(f"Session {sessionId}: TTS {lastError}")
break
result["success"] = result["played"] > 0
if lastError:
result["error"] = lastError
return result
def _coercePersistedDetectedIntent(raw: Optional[str]) -> tuple:
"""Map free-form intent labels to TeamsbotDetectedIntent for DB persistence."""
if not raw or not str(raw).strip():
return TeamsbotDetectedIntent.NONE, None
s = str(raw).strip().lower()
for member in TeamsbotDetectedIntent:
if member.value == s:
return member, None
if s.startswith("agent:"):
return TeamsbotDetectedIntent.PROACTIVE, str(raw).strip()[:120]
return TeamsbotDetectedIntent.NONE, str(raw).strip()[:120]
_DIRECTOR_REPLY_PATTERN = re.compile(
r"^\s*(MEETING_REPLY|MEETING|REPLY|SAY|SPEAK)\s*:\s*",
re.IGNORECASE,
)
_DIRECTOR_SILENT_PATTERN = re.compile(
r"^\s*(SILENT|INTERNAL(?:_ONLY)?|NOTE|NO_MEETING_OUTPUT|ACK(?:NOWLEDGE)?)\s*:\s*",
re.IGNORECASE,
)
def _parseDirectorPromptFinal(finalText: str) -> Dict[str, Any]:
"""Parse the agent's final answer for a director prompt."""
text = (finalText or "").strip()
if not text:
return {"kind": "silent", "meetingText": "", "internalNote": ""}
meetingMatch = _DIRECTOR_REPLY_PATTERN.match(text)
if meetingMatch:
body = text[meetingMatch.end():].strip()
return {"kind": "meeting", "meetingText": body, "internalNote": ""}
silentMatch = _DIRECTOR_SILENT_PATTERN.match(text)
if silentMatch:
body = text[silentMatch.end():].strip()
return {"kind": "silent", "meetingText": "", "internalNote": body}
return {"kind": "silent", "meetingText": "", "internalNote": text}
# =========================================================================
# Active Service Registry (sessionId -> running TeamsbotService instance)
# =========================================================================
_activeServices: Dict[str, "TeamsbotService"] = {}
def getActiveService(sessionId: str) -> Optional["TeamsbotService"]:
"""Return the running TeamsbotService for a session, or None if not active."""
return _activeServices.get(sessionId)
# =========================================================================
# AI Service Factory
# =========================================================================
def createAiService(user, mandateId, featureInstanceId=None):
"""Create a properly wired AiService via the service center."""
ctx = ServiceCenterContext(
user=user,
mandateId=mandateId,
featureInstanceId=featureInstanceId,
featureCode="teamsbot",
)
return _getServiceCenterService("ai", ctx)
# =========================================================================
# Session Event Queues (for SSE streaming to frontend)
# =========================================================================
sessionEvents: Dict[str, asyncio.Queue] = {}
async def _emitSessionEvent(sessionId: str, eventType: str, data: Any):
"""Emit an event to the session's SSE stream."""
if sessionId not in sessionEvents:
sessionEvents[sessionId] = asyncio.Queue()
await sessionEvents[sessionId].put({"type": eventType, "data": data, "timestamp": getUtcTimestamp()})
def _normalizeGatewayHostForBotWs(host: str) -> str:
"""Use IPv4 loopback for local dev WebSocket URLs passed to the Node browser-bot."""
h = host.strip()
lower = h.lower()
if lower == "localhost":
return "127.0.0.1"
if lower.startswith("localhost:"):
return "127.0.0.1" + h[len("localhost"):]
if lower.startswith("[::1]:"):
return "127.0.0.1" + h.partition("]")[2]
if lower in ("[::1]", "::1"):
return "127.0.0.1"
return h
# =========================================================================
# TeamsbotService Class
# =========================================================================
class TeamsbotService:
"""
Pipeline Orchestrator for Teams Bot sessions.
Coordinates VoiceObjects (STT/TTS), AiService (SPEECH_TEAMS), and Bridge communication.
"""
def __init__(self, currentUser: User, mandateId: str, instanceId: str, config: TeamsbotConfig):
self.currentUser = currentUser
self.mandateId = mandateId
self.instanceId = instanceId
self.config = config
self.browserBotConnector = BrowserBotConnector(config._getEffectiveBrowserBotUrl())
# State
self._lastAiCallTime: float = 0.0
self._aiAnalysisInProgress: bool = False
self._contextBuffer: List[Dict[str, Any]] = []
self._sessionContext: Optional[str] = None
self._contextSummary: Optional[str] = None
# Differential transcript tracking
self._lastTranscriptSpeaker: Optional[str] = None
self._lastTranscriptText: Optional[str] = None
self._lastTranscriptId: Optional[str] = None
self._lastSttTime: float = 0.0
# Audio chunk aggregation
self._audioBuffer: bytes = b""
self._audioBufferStartTime: float = 0.0
self._audioBufferLastChunkTime: float = 0.0
self._audioBufferSampleRate: int = 16000
self._lastBotResponseText: Optional[str] = None
self._lastBotResponseTs: float = 0.0
# Speaker attribution
self._lastCaptionSpeaker: Optional[str] = None
self._unattributedTranscriptIds: List[str] = []
self._knownSpeakers: set = set()
# Debounced name trigger
self._pendingNameTrigger: Optional[Dict[str, Any]] = None
self._followUpWindowEnd: float = 0.0
# Quick-ack throttle
self._lastQuickAckTs: float = 0.0
# Ephemeral phrase pool
self._phrasePool: Dict[str, List[str]] = {}
self._phrasePoolIdx: Dict[str, int] = {}
self._phrasePoolLock: asyncio.Lock = asyncio.Lock()
# Voice pipeline serialisation
self._meetingTtsLock: asyncio.Lock = asyncio.Lock()
self._answerGenerationCounter: int = 0
self._currentEscalationTask: Optional[asyncio.Task] = None
self._currentQuickAckTask: Optional[asyncio.Task] = None
self._agentEscalationInFlight: bool = False
# Live transport handles
self._activeSessionId: Optional[str] = None
self._websocket: Optional[WebSocket] = None
self._voiceInterface = None
# Director prompts
self._activePersistentPrompts: List[Dict[str, Any]] = []
self._recentDirectorBriefings: List[Dict[str, Any]] = []
# =========================================================================
# Session Lifecycle
# =========================================================================
async def joinMeeting(
self,
sessionId: str,
meetingLink: str,
connectionId: Optional[str] = None,
gatewayBaseUrl: str = "",
botAccountEmail: Optional[str] = None,
botAccountPassword: Optional[str] = None,
):
"""Send join command to the Browser Bot service."""
from . import interfaceFeatureTeamsbot as interfaceDb
interface = interfaceDb.getInterface(self.currentUser, self.mandateId, self.instanceId)
if sessionId not in sessionEvents:
sessionEvents[sessionId] = asyncio.Queue()
try:
interface.updateSession(sessionId, {"status": TeamsbotSessionStatus.JOINING.value})
await _emitSessionEvent(sessionId, "statusChange", {"status": "joining"})
session = interface.getSession(sessionId)
if not session:
raise ValueError(f"Session {sessionId} not found")
wsScheme = "wss" if gatewayBaseUrl.startswith("https") else "ws"
gatewayHost = gatewayBaseUrl.replace("https://", "").replace("http://", "").rstrip("/")
gatewayHost = _normalizeGatewayHostForBotWs(gatewayHost)
fullGatewayWsUrl = f"{wsScheme}://{gatewayHost}/api/teamsbot/{self.instanceId}/bot/ws/{sessionId}"
hasAuth = bool(botAccountEmail and botAccountPassword)
logger.info(f"Joining meeting for session {sessionId}: auth={hasAuth}, email={botAccountEmail or 'N/A'}, transferMode={self.config.transferMode}")
avatarMediaData = None
avatarMediaType = None
avatarFileId = self._resolveAvatarFileId(session, interface)
if avatarFileId:
avatarMediaData, avatarMediaType = self._loadAvatarFileData(avatarFileId, interface)
result = await self.browserBotConnector.joinMeeting(
sessionId=sessionId,
meetingUrl=meetingLink,
botName=session.get("botName", self.config.botName),
instanceId=self.instanceId,
gatewayWsUrl=fullGatewayWsUrl,
language=self.config.language,
botAccountEmail=botAccountEmail,
botAccountPassword=botAccountPassword,
transferMode=self.config.transferMode if hasattr(self.config, 'transferMode') else "auto",
debugMode=self.config.debugMode if hasattr(self.config, 'debugMode') else False,
avatarMediaData=avatarMediaData,
avatarMediaType=avatarMediaType,
)
if result.get("success"):
interface.updateSession(sessionId, {
"status": TeamsbotSessionStatus.JOINING.value,
})
logger.info(f"Browser bot deployment started for session {sessionId}")
else:
errorMsg = result.get("error", "Unknown error joining meeting")
interface.updateSession(sessionId, {
"status": TeamsbotSessionStatus.ERROR.value,
"errorMessage": errorMsg,
})
await _emitSessionEvent(sessionId, "statusChange", {"status": "error", "errorMessage": errorMsg})
logger.error(f"Failed to deploy browser bot for session {sessionId}: {errorMsg}")
except Exception as e:
logger.error(f"Error joining meeting for session {sessionId}: {e}")
interface.updateSession(sessionId, {
"status": TeamsbotSessionStatus.ERROR.value,
"errorMessage": str(e),
})
await _emitSessionEvent(sessionId, "statusChange", {"status": "error", "errorMessage": str(e)})
def _resolveAvatarFileId(self, session, interface):
"""Resolve avatarFileId: module override > config default."""
moduleId = session.get("moduleId")
if moduleId:
module = interface.getModule(moduleId)
if module and module.get("defaultAvatarFileId"):
return module["defaultAvatarFileId"]
return getattr(self.config, "avatarFileId", None)
def _loadAvatarFileData(self, fileId, _teamsbotInterface):
"""Load avatar file as base64 data + mime type."""
from modules.interfaces import interfaceDbManagement
try:
mgmt = interfaceDbManagement.getInterface(self.currentUser, self.mandateId, featureInstanceId=self.instanceId)
fileRecord = mgmt.getFile(fileId)
if not fileRecord:
logger.warning(f"Avatar file {fileId} not found")
return None, None
mimeType = getattr(fileRecord, "mimeType", None) or "image/png"
rawBytes = mgmt.getFileData(fileId)
if not rawBytes:
logger.warning(f"Avatar file {fileId} has no data")
return None, None
b64 = base64.b64encode(rawBytes).decode("ascii")
logger.info(f"Avatar file loaded: {fileId}, {mimeType}, {len(b64)} chars base64")
return b64, mimeType
except Exception as e:
logger.error(f"Failed to load avatar file {fileId}: {e}")
return None, None
async def leaveMeeting(self, sessionId: str):
"""Send leave command to the Browser Bot service."""
from . import interfaceFeatureTeamsbot as interfaceDb
interface = interfaceDb.getInterface(self.currentUser, self.mandateId, self.instanceId)
try:
interface.updateSession(sessionId, {"status": TeamsbotSessionStatus.LEAVING.value})
await _emitSessionEvent(sessionId, "statusChange", {"status": "leaving"})
await self.browserBotConnector.leaveMeeting(sessionId)
interface.updateSession(sessionId, {
"status": TeamsbotSessionStatus.ENDED.value,
"endedAt": getUtcTimestamp(),
})
await _emitSessionEvent(sessionId, "statusChange", {"status": "ended"})
asyncio.create_task(self._generateMeetingSummary(sessionId))
logger.info(f"Bot left meeting for session {sessionId}")
except Exception as e:
logger.error(f"Error leaving meeting for session {sessionId}: {e}")
interface.updateSession(sessionId, {
"status": TeamsbotSessionStatus.ERROR.value,
"errorMessage": str(e),
"endedAt": getUtcTimestamp(),
})
sessionEvents.pop(sessionId, None)
# =========================================================================
# WebSocket — delegates to serviceWebSocket module
# =========================================================================
async def handleBotWebSocket(self, websocket: WebSocket, sessionId: str):
"""Main WebSocket handler — delegates to serviceWebSocket."""
from .serviceWebSocket import handleBotWebSocket as _handleBotWebSocket
await _handleBotWebSocket(self, websocket, sessionId)
# =========================================================================
# Conversation — delegates to serviceConversation module
# =========================================================================
async def _processTranscript(self, *args, **kwargs):
from .serviceConversation import _processTranscript
await _processTranscript(self, *args, **kwargs)
async def _analyzeAndRespond(self, *args, **kwargs):
from .serviceConversation import _analyzeAndRespond
await _analyzeAndRespond(self, *args, **kwargs)
async def _summarizeForVoice(self, sessionId: str, rawAnswer: str) -> str:
from .serviceConversation import _summarizeForVoice
return await _summarizeForVoice(self, sessionId, rawAnswer)
async def _pickQuickAckText(self) -> Optional[str]:
from .serviceConversation import _pickQuickAckText
return await _pickQuickAckText(self)
async def _pickEphemeralPhrase(self, kind: str, substitutions=None) -> Optional[str]:
from .serviceConversation import _pickEphemeralPhrase
return await _pickEphemeralPhrase(self, kind, substitutions)
async def _getEphemeralPhrases(self, kind: str) -> List[str]:
from .serviceConversation import _getEphemeralPhrases
return await _getEphemeralPhrases(self, kind)
async def _generateEphemeralPhrases(self, kind: str, count: int) -> List[str]:
from .serviceConversation import _generateEphemeralPhrases
return await _generateEphemeralPhrases(self, kind, count)
async def _runQuickAck(self, sessionId: str) -> None:
from .serviceConversation import _runQuickAck
await _runQuickAck(self, sessionId)
async def _checkPendingNameTrigger(self, delaySec: float = 3.0):
from .serviceConversation import _checkPendingNameTrigger
await _checkPendingNameTrigger(self, delaySec)
async def _warmEphemeralPhrasePool(self, sessionId: str) -> None:
from .serviceConversation import _warmEphemeralPhrasePool
await _warmEphemeralPhrasePool(self, sessionId)
async def _runEscalationAndRelease(self, *args, **kwargs) -> None:
from .serviceConversation import _runEscalationAndRelease
await _runEscalationAndRelease(self, *args, **kwargs)
# =========================================================================
# Commands — delegates to serviceCommands module
# =========================================================================
async def _executeCommands(self, sessionId, commands, voiceInterface, websocket):
from .serviceCommands import _executeCommands
await _executeCommands(self, sessionId, commands, voiceInterface, websocket)
# =========================================================================
# WebSocket helpers (kept on class — used by serviceWebSocket)
# =========================================================================
async def _handleBotStatus(self, sessionId, status, errorMessage, interface):
from .serviceWebSocket import _handleBotStatus
await _handleBotStatus(self, sessionId, status, errorMessage, interface)
async def _processAudioChunk(self, *args, **kwargs):
from .serviceWebSocket import _processAudioChunk
await _processAudioChunk(self, *args, **kwargs)
async def _cancelInFlightSpeech(self, sessionId, websocket, reason):
from .serviceWebSocket import _cancelInFlightSpeech
await _cancelInFlightSpeech(self, sessionId, websocket, reason)
# =========================================================================
# Speaker & Trigger detection (kept on class — small, stateful)
# =========================================================================
def _registerSpeakerHint(self, speaker: str, text: str, sessionId: str = ""):
"""Track current speaker from captions for STT attribution."""
if not speaker:
return
normalizedSpeaker = speaker.strip()
if not normalizedSpeaker or self._isBotSpeaker(normalizedSpeaker):
return
self._lastCaptionSpeaker = normalizedSpeaker
self._knownSpeakers.add(normalizedSpeaker)
if self._unattributedTranscriptIds:
from . import interfaceFeatureTeamsbot as interfaceDb
interface = interfaceDb.getInterface(self.currentUser, self.mandateId, self.instanceId)
for tid in self._unattributedTranscriptIds:
interface.updateTranscript(tid, {"speaker": normalizedSpeaker})
for seg in self._contextBuffer:
if seg.get("speaker") == "Unknown" and seg.get("source") == "audioCapture":
seg["speaker"] = normalizedSpeaker
if self._lastTranscriptSpeaker == "Unknown":
self._lastTranscriptSpeaker = normalizedSpeaker
logger.info(
f"Session {sessionId}: Retroactive speaker attribution: "
f"{len(self._unattributedTranscriptIds)} segments -> {normalizedSpeaker}"
)
self._unattributedTranscriptIds.clear()
if self._pendingNameTrigger:
self._pendingNameTrigger["lastActivity"] = time.time()
def _resolveSpeakerForAudioCapture(self) -> Dict[str, Any]:
"""Speaker name for audio chunks — uses the last caption speaker."""
if self._lastCaptionSpeaker:
return {"speaker": self._lastCaptionSpeaker, "speakerResolvedFromHint": True}
return {"speaker": "Unknown", "speakerResolvedFromHint": False}
def _isBotSpeaker(self, speaker: str) -> bool:
"""Check if a transcript speaker is the bot itself."""
if not speaker:
return False
speakerLower = speaker.lower().strip()
botName = self.config.botName.lower().strip()
if botName and botName in speakerLower:
return True
botAccountEmail = getattr(self, '_botAccountEmail', None) or getattr(self.config, 'botAccountEmail', None)
if botAccountEmail:
emailPrefix = botAccountEmail.split("@")[0].lower().replace(".", " ")
if emailPrefix in speakerLower:
return True
return False
def _shouldTriggerAnalysis(self, transcriptText: str, allowPeriodic: bool = True) -> bool:
"""Decide whether to trigger AI analysis based on the latest transcript."""
now = time.time()
timeSinceLastCall = now - self._lastAiCallTime
if timeSinceLastCall < self.config.triggerCooldownSeconds:
return False
if allowPeriodic and timeSinceLastCall >= self.config.triggerIntervalSeconds:
logger.info(f"Trigger: Periodic interval ({self.config.triggerIntervalSeconds}s) elapsed ({timeSinceLastCall:.1f}s)")
return True
return False
def _isStopPhrase(self, text: str) -> bool:
"""Check if text is an immediate-cancel command from the meeting."""
if not text or len(text.strip()) < 2:
return False
t = text.strip().lower()
words = [w.strip(".,!?:;\"'()[]") for w in t.split() if w.strip()]
wordSet = set(words)
stopWords = {
"stop", "stopp", "halt", "ruhe", "stille", "schweig",
"arrete", "quiet", "shut", "silence",
"warte", "wait", "moment", "pause",
}
if wordSet & stopWords:
return True
if (
"sei still" in t
or "be quiet" in t
or "shut up" in t
or "hold on" in t
or "aufhoeren" in t
or "aufhören" in t
):
return True
return False
def _makeAnswerCancelHook(self) -> Callable[[], bool]:
"""Capture the current generation counter and return a cancel predicate."""
snapshot = self._answerGenerationCounter
return lambda: self._answerGenerationCounter != snapshot
def _detectBotName(self, text: str) -> bool:
"""Check if text contains the bot's name (exact or phonetically similar)."""
botNameLower = self.config.botName.lower()
textLower = text.lower()
if botNameLower in textLower:
return True
botFirstName = botNameLower.split()[0] if " " in botNameLower else botNameLower
if len(botFirstName) >= 3:
for word in textLower.split():
cleanWord = word.strip(".,!?:;\"'()[]")
if not cleanWord or len(cleanWord) < 3:
continue
if cleanWord == botFirstName:
return True
if cleanWord[0] == botFirstName[0] and abs(len(cleanWord) - len(botFirstName)) <= 2:
common = sum(1 for c in set(botFirstName) if c in cleanWord)
similarity = common / max(len(set(botFirstName)), len(set(cleanWord)))
if similarity >= 0.6:
return True
return False
def _setPendingNameTrigger(self, sessionId, interface, voiceInterface, websocket, triggerTranscript) -> bool:
"""Set or update a debounced name trigger. Returns True if newly set."""
if self._pendingNameTrigger:
self._pendingNameTrigger["lastActivity"] = time.time()
return False
self._pendingNameTrigger = {
"sessionId": sessionId,
"interface": interface,
"voiceInterface": voiceInterface,
"websocket": websocket,
"triggerTranscript": triggerTranscript,
"detectedAt": time.time(),
"lastActivity": time.time(),
}
return True
def _shouldFireQuickAck(self) -> bool:
"""Centralized gate for quick-ack firing."""
now = time.time()
if (now - self._lastQuickAckTs) < _QUICK_ACK_MIN_INTERVAL_SEC:
return False
if self._aiAnalysisInProgress or self._agentEscalationInFlight:
return False
channelRaw = self.config.responseChannel
channelStr = (
channelRaw.value if hasattr(channelRaw, "value") else str(channelRaw)
).lower().strip()
if channelStr not in ("voice", "both"):
return False
if self.config.responseMode in (
TeamsbotResponseMode.MANUAL,
TeamsbotResponseMode.TRANSCRIBE_ONLY,
):
return False
return True
# =========================================================================
# Voice helpers (kept on class)
# =========================================================================
_VOICE_DIRECT_MAX_CHARS = 600
_VOICE_SUMMARY_MAX_CHARS = 350
@staticmethod
def _looksLikeStructuredText(raw: str) -> bool:
"""Heuristic: does the original answer have markdown structure?"""
if not raw:
return False
if raw.count("|") >= 4:
return True
if raw.count("\n#") >= 1:
return True
if raw.count("\n- ") + raw.count("\n* ") + raw.count("\n") >= 3:
return True
if re.search(r"\n\d+[\.\)]\s", raw):
count = len(re.findall(r"(?m)^\s*\d+[\.\)]\s", raw))
if count >= 3:
return True
return False
# =========================================================================
# Director Prompts (private operator instructions during a live meeting)
# =========================================================================
def _collectActiveDirectorBriefings(self) -> List[Dict[str, Any]]:
"""Return the deduplicated list of director-prompt briefings."""
seen: Dict[str, Dict[str, Any]] = {}
for p in self._activePersistentPrompts:
pid = p.get("id") or ""
seen[pid] = {
"promptId": pid,
"mode": p.get("mode") or "persistent",
"text": (p.get("text") or "").strip(),
"fileIds": list(p.get("fileIds") or []),
"note": (p.get("responseText") or "").strip(),
}
for b in self._recentDirectorBriefings:
pid = b.get("promptId") or ""
if pid in seen:
if b.get("note"):
seen[pid]["note"] = b["note"]
continue
seen[pid] = {
"promptId": pid,
"mode": b.get("mode") or "oneShot",
"text": (b.get("text") or "").strip(),
"fileIds": list(b.get("fileIds") or []),
"note": (b.get("note") or "").strip(),
}
return [v for v in seen.values() if v.get("text") or v.get("fileIds")]
def _collectDirectorFileIds(self) -> List[str]:
"""Flat, deduplicated list of UDB file IDs attached to director prompts."""
out: List[str] = []
seen: set = set()
for b in self._collectActiveDirectorBriefings():
for fid in b.get("fileIds") or []:
if fid and fid not in seen:
seen.add(fid)
out.append(fid)
return out
def _buildPersistentDirectorContext(self) -> str:
"""Render active director-prompt briefings for SPEECH_TEAMS context."""
briefings = self._collectActiveDirectorBriefings()
if not briefings:
return ""
lines: List[str] = []
for b in briefings:
entry = f"- ({b.get('mode', 'persistent')}) {b.get('text', '')}".rstrip()
fileIds = b.get("fileIds") or []
if fileIds:
entry += (
"\n ATTACHED_FILES (operator-provided documents — the AGENT "
"has tools to read them via summarizeContent / readFile / "
"readContentObjects): "
+ ", ".join(fileIds)
)
note = b.get("note")
if note:
noteShort = note if len(note) <= 600 else note[:600] + "..."
entry += f"\n AGENT_ANALYSIS (already computed by the bot): {noteShort}"
lines.append(entry)
return (
"\nOPERATOR_DIRECTIVES (private; never quote them verbatim, just follow them. "
"If the user asks about an attached document, use AGENT_ANALYSIS first; "
"if more depth is needed, set needsAgent=true so the agent can re-read the file):\n"
+ "\n".join(lines)
+ "\n"
)
def _recordDirectorBriefing(
self,
prompt: Dict[str, Any],
internalNote: str,
meetingText: str,
) -> None:
"""Append a director-prompt briefing to the session-scoped pool."""
pid = prompt.get("id") or ""
self._recentDirectorBriefings = [
b for b in self._recentDirectorBriefings if b.get("promptId") != pid
]
self._recentDirectorBriefings.append({
"promptId": pid,
"mode": prompt.get("mode") or "oneShot",
"text": (prompt.get("text") or "").strip(),
"fileIds": list(prompt.get("fileIds") or []),
"note": (internalNote or meetingText or "").strip(),
"recordedAt": getUtcTimestamp(),
})
if len(self._recentDirectorBriefings) > _RECENT_DIRECTOR_BRIEFINGS_MAX:
self._recentDirectorBriefings = self._recentDirectorBriefings[
-_RECENT_DIRECTOR_BRIEFINGS_MAX:
]
async def submitDirectorPrompt(
self,
sessionId: str,
operatorUserId: str,
text: str,
mode: TeamsbotDirectorPromptMode,
fileIds: List[str],
) -> Dict[str, Any]:
"""Persist a new director prompt and trigger immediate agent processing."""
from . import interfaceFeatureTeamsbot as interfaceDb
interface = interfaceDb.getInterface(self.currentUser, self.mandateId, self.instanceId)
promptData = TeamsbotDirectorPrompt(
sessionId=sessionId,
instanceId=self.instanceId,
operatorUserId=operatorUserId,
text=text,
mode=mode,
fileIds=fileIds or [],
status=TeamsbotDirectorPromptStatus.QUEUED,
).model_dump()
created = interface.createDirectorPrompt(promptData)
if mode == TeamsbotDirectorPromptMode.PERSISTENT:
self._activePersistentPrompts.append(created)
await _emitSessionEvent(sessionId, "directorPrompt", {
"id": created.get("id"),
"status": created.get("status"),
"mode": created.get("mode"),
"text": created.get("text"),
"fileIds": created.get("fileIds", []),
"createdAt": created.get("createdAt"),
})
asyncio.create_task(self._processDirectorPrompt(created))
return created
async def removePersistentPrompt(self, promptId: str) -> bool:
"""Remove a persistent director prompt."""
from . import interfaceFeatureTeamsbot as interfaceDb
interface = interfaceDb.getInterface(self.currentUser, self.mandateId, self.instanceId)
sessionId = self._activeSessionId
prompt = interface.getDirectorPrompt(promptId)
if not prompt:
return False
interface.updateDirectorPrompt(promptId, {
"status": TeamsbotDirectorPromptStatus.CONSUMED.value,
"consumedAt": getUtcTimestamp(),
"statusMessage": "Removed by operator",
})
self._activePersistentPrompts = [
p for p in self._activePersistentPrompts if p.get("id") != promptId
]
self._recentDirectorBriefings = [
b for b in self._recentDirectorBriefings if b.get("promptId") != promptId
]
if sessionId:
await _emitSessionEvent(sessionId, "directorPrompt", {
"id": promptId,
"status": TeamsbotDirectorPromptStatus.CONSUMED.value,
"mode": prompt.get("mode"),
"text": prompt.get("text"),
"removed": True,
})
return True
async def _processDirectorPrompt(self, prompt: Dict[str, Any]) -> None:
"""Run the agent for a director prompt and deliver the result."""
from . import interfaceFeatureTeamsbot as interfaceDb
sessionId = prompt.get("sessionId")
promptId = prompt.get("id")
interface = interfaceDb.getInterface(self.currentUser, self.mandateId, self.instanceId)
interface.updateDirectorPrompt(promptId, {
"status": TeamsbotDirectorPromptStatus.RUNNING.value,
})
await _emitSessionEvent(sessionId, "directorPrompt", {
"id": promptId,
"status": TeamsbotDirectorPromptStatus.RUNNING.value,
})
recentTranscript = self._renderRecentTranscriptForAgent(maxLines=20)
directorText = (prompt.get("text") or "").strip()
attachedFileIds = list(prompt.get("fileIds") or [])
promptMode = (prompt.get("mode") or "").lower()
isPersistentPrompt = promptMode == TeamsbotDirectorPromptMode.PERSISTENT.value.lower()
filesBlock = ""
if attachedFileIds:
filesBlock = (
"\nANGEHAENGTE DOKUMENTE (UDB-File-IDs): "
+ ", ".join(attachedFileIds)
+ "\nDu MUSST diese Dokumente VOR der finalen Antwort lesen / zusammenfassen "
"(z.B. summarizeContent, readFile, readContentObjects, describeImage). "
"Beziehe Fakten und Zitate aus den Dokumenten in deine Notiz / dein "
"Meeting-Reply ein, statt allgemein zu antworten.\n"
)
persistentNoteHint = ""
if isPersistentPrompt and attachedFileIds:
persistentNoteHint = (
"\nSPEZIAL fuer PERSISTENT + Dokumente: Wenn die Anweisung KEIN explizites "
"Meeting-Statement verlangt, antworte mit 'SILENT:' und liefere als interne "
"Notiz eine STRUKTURIERTE, faktendichte Briefing-Zusammenfassung der Dokumente "
"(Stichpunkte, Kennzahlen, Aussagen, die fuer Folgefragen im Meeting relevant "
"sein koennen). Diese Notiz wird spaeteren Meeting-Antworten als Wissensbasis "
"vorgelegt — schreibe sie also so, dass der Bot daraus zitieren kann.\n"
)
taskText = (
f"Du bist der KI-Assistent in einem laufenden Teams-Meeting (Bot-Name: {self.config.botName}).\n"
f"Der Operator hat dir folgende PRIVATE Regieanweisung gegeben (die anderen Teilnehmer im "
f"Meeting sehen sie NICHT). Sie ist KEINE Frage an das Meeting, sondern eine interne "
f"Anweisung an dich:\n\n"
f"{directorText}\n"
f"{filesBlock}"
f"{persistentNoteHint}\n"
f"AKTUELLER MEETING-KONTEXT (juengste Aussagen):\n{recentTranscript}\n\n"
"ANTWORT-PROTOKOLL — Beginne deine FINALE Antwort mit GENAU EINEM dieser Marker:\n"
"'MEETING_REPLY:' gefolgt vom Text, der im Meeting gesprochen / in den Meeting-Chat "
"gepostet werden soll. Verwende diesen Marker NUR, wenn die Regieanweisung dich explizit "
"auffordert, jetzt etwas im Meeting zu sagen oder zu schreiben (Beispiele: 'stell dich vor', "
"'fasse zusammen', 'stelle Person X eine Frage', 'beantworte die letzte Frage'). Halte den "
"Text kurz, sprachlich passend zur Stimme und ohne Marker oder Meta-Kommentare.\n"
"'SILENT:' gefolgt von einer internen Notiz fuer das Operator-UI. "
"Verwende diesen Marker fuer interne Direktiven und Wissens-Briefings (Beispiele: "
"'achte ab jetzt auf X', 'merke dir Y', 'studiere Dokument Z'). "
"Dieser Text wird NICHT ins Meeting gegeben, dient aber spaeteren Meeting-Antworten "
"als Wissensbasis. Wenn Dokumente angehaengt sind, MUSS die Notiz konkrete, "
"zitierfaehige Fakten aus den Dokumenten enthalten.\n\n"
"Standard ist SILENT, wenn nicht eindeutig zur Meeting-Interaktion aufgefordert wurde. "
"Wiederhole NIEMALS die Regieanweisung selbst im MEETING_REPLY-Text."
)
try:
finalText = await self._runAgentForMeeting(
sessionId=sessionId,
taskText=taskText,
fileIds=attachedFileIds,
sourceLabel="directorPrompt",
triggerTranscriptId=None,
promptId=promptId,
directorPromptMode=True,
)
isPersistent = prompt.get("mode") == TeamsbotDirectorPromptMode.PERSISTENT.value
updates: Dict[str, Any] = {
"status": TeamsbotDirectorPromptStatus.SUCCEEDED.value,
"responseText": finalText or "",
}
if not isPersistent:
updates["status"] = TeamsbotDirectorPromptStatus.CONSUMED.value
updates["consumedAt"] = getUtcTimestamp()
interface.updateDirectorPrompt(promptId, updates)
await _emitSessionEvent(sessionId, "directorPrompt", {
"id": promptId,
"status": updates["status"],
"responseText": finalText,
})
except Exception as e:
logger.error(
f"Session {sessionId}: Director prompt {promptId} failed: {type(e).__name__}: {e}",
exc_info=True,
)
interface.updateDirectorPrompt(promptId, {
"status": TeamsbotDirectorPromptStatus.FAILED.value,
"statusMessage": f"{type(e).__name__}: {str(e)[:300]}",
})
await _emitSessionEvent(sessionId, "directorPrompt", {
"id": promptId,
"status": TeamsbotDirectorPromptStatus.FAILED.value,
"error": f"{type(e).__name__}: {str(e)[:300]}",
})
self._activePersistentPrompts = [
p for p in self._activePersistentPrompts if p.get("id") != promptId
]
def _renderRecentTranscriptForAgent(self, maxLines: int = 20) -> str:
"""Render the most recent context buffer entries for agent task brief."""
if not self._contextBuffer:
return "(noch keine Aussagen erfasst)"
recent = self._contextBuffer[-maxLines:]
lines = []
for seg in recent:
speaker = seg.get("speaker", "Unknown")
text = seg.get("text", "")
segSource = seg.get("source", "caption")
prefix = "Chat: " if segSource == "chat" else ""
if self._isBotSpeaker(speaker):
lines.append(f"[YOU ({self.config.botName})]: {text}")
else:
lines.append(f"[{prefix}{speaker}]: {text}")
return "\n".join(lines)
async def _interimAgentBusyMessage(self) -> Optional[str]:
"""Short spoken/chat line before a potentially long agent run."""
return await self._pickEphemeralPhrase("agentBusy")
async def _interimAgentRoundMessage(
self, lastToolLabel: Optional[str] = None
) -> Optional[str]:
"""Per-round progress notice for long agent runs."""
targetLang = (self.config.language or "").strip() or "en-US"
botName = (self.config.botName or "the assistant").strip()
activityHint = lastToolLabel or "working on the task"
prompt = (
f"You are a meeting assistant named '{botName}'.\n"
f"Target spoken language (BCP-47): {targetLang}\n\n"
f"The assistant is currently busy with: {activityHint}\n\n"
f"Generate ONE short sentence (max 12 words) in {targetLang} "
f"that tells the audience what the assistant is doing right now. "
f"Natural, spoken style. No step numbers. No quotes around the output.\n"
f"Output ONLY the sentence, nothing else."
)
try:
aiService = createAiService(
self.currentUser, self.mandateId, self.instanceId
)
await aiService.ensureAiObjectsInitialized()
request = AiCallRequest(
prompt=prompt,
context="",
options=AiCallOptions(
operationType=OperationTypeEnum.DATA_ANALYSE,
priority=PriorityEnum.SPEED,
),
)
response = await aiService.callAi(request)
except Exception as aiErr:
logger.debug(f"Agent round phrase generation failed: {aiErr}")
return None
if not response or response.errorCount != 0 or not response.content:
return None
result = response.content.strip().strip('"').strip("'")
if len(result) > 200:
result = result[:200]
return result
async def _notifyMeetingEphemeral(self, sessionId: str, text: str) -> None:
"""Deliver a short line to the meeting without persisting botResponses."""
websocket = self._websocket
voiceInterface = self._voiceInterface
if not websocket:
logger.warning(f"Session {sessionId}: Interim notice skipped — no WebSocket")
return
channelRaw = self.config.responseChannel
channelStr = (
channelRaw.value if hasattr(channelRaw, "value") else str(channelRaw)
).lower().strip()
sendVoice = channelStr in ("voice", "both")
sendChat = channelStr in ("chat", "both")
if sendVoice and voiceInterface:
cancelHook = self._makeAnswerCancelHook()
async with self._meetingTtsLock:
outcome = await _speakTextChunked(
websocket=websocket,
voiceInterface=voiceInterface,
sessionId=sessionId,
voiceText=_voiceFriendlyMeetingText(text),
languageCode=self.config.language,
voiceName=self.config.voiceId,
isCancelled=cancelHook,
)
if not outcome.get("success"):
logger.warning(
f"Session {sessionId}: Interim TTS failed ({outcome.get('error')}) — falling back to chat"
)
if not sendChat:
sendChat = True
if sendChat:
try:
await websocket.send_text(json.dumps({
"type": "sendChatMessage",
"sessionId": sessionId,
"text": text,
}))
except Exception as chatErr:
logger.warning(f"Session {sessionId}: Interim chat failed: {chatErr}")
await _emitSessionEvent(sessionId, "agentRun", {
"status": "interimNotice",
"message": text,
"timestamp": getUtcTimestamp(),
})
async def _runAgentForMeeting(
self,
sessionId: str,
taskText: str,
fileIds: List[str],
sourceLabel: str,
triggerTranscriptId: Optional[str] = None,
promptId: Optional[str] = None,
directorPromptMode: bool = False,
) -> str:
"""Run agentService.runAgent for a meeting context."""
from modules.serviceCenter.services.serviceAgent.datamodelAgent import (
AgentConfig, AgentEventTypeEnum
)
ctx = ServiceCenterContext(
user=self.currentUser,
mandateId=self.mandateId,
featureInstanceId=self.instanceId,
featureCode="teamsbot",
)
agentService = _getServiceCenterService("agent", ctx)
workflowId = f"teamsbot:{sessionId}"
agentConfig = AgentConfig(
maxRounds=TEAMSBOT_AGENT_MAX_ROUNDS,
maxCostCHF=TEAMSBOT_AGENT_MAX_COST_CHF,
toolSet="core",
initialToolboxes=["core", "web"],
excludeActionTools=True,
)
await _emitSessionEvent(sessionId, "agentRun", {
"source": sourceLabel,
"promptId": promptId,
"status": "started",
"timestamp": getUtcTimestamp(),
})
if not directorPromptMode:
try:
interimText = await self._interimAgentBusyMessage()
if interimText:
await self._notifyMeetingEphemeral(sessionId, interimText)
except Exception as interimErr:
logger.warning(f"Session {sessionId}: Interim agent notice failed: {interimErr}")
finalText: str = ""
rounds = 0
lastToolLabel: Optional[str] = None
try:
async for event in agentService.runAgent(
prompt=taskText,
fileIds=fileIds or None,
config=agentConfig,
toolSet="core",
workflowId=workflowId,
):
if event.type == AgentEventTypeEnum.AGENT_PROGRESS:
rounds += 1
pdata = event.data or {}
roundNum = int(pdata.get("round", rounds))
maxR = int(pdata.get("maxRounds", TEAMSBOT_AGENT_MAX_ROUNDS))
await _emitSessionEvent(sessionId, "agentRun", {
"source": sourceLabel,
"promptId": promptId,
"status": "progress",
"round": roundNum,
"maxRounds": maxR,
})
if roundNum >= 2 and not directorPromptMode:
try:
roundText = await self._interimAgentRoundMessage(lastToolLabel)
if roundText:
await self._notifyMeetingEphemeral(sessionId, roundText)
except Exception as roundNoticeErr:
logger.warning(
f"Session {sessionId}: Per-round agent notice failed: {roundNoticeErr}"
)
elif event.type == AgentEventTypeEnum.TOOL_CALL:
evtData = event.data or {}
toolName = evtData.get("toolName")
lastToolLabel = evtData.get("displayLabel")
await _emitSessionEvent(sessionId, "agentRun", {
"source": sourceLabel,
"promptId": promptId,
"status": "toolCall",
"toolName": toolName,
"displayLabel": lastToolLabel,
})
elif event.type == AgentEventTypeEnum.TOOL_RESULT:
evtData = event.data or {}
resultSnippet = (evtData.get("data") or "")[:200]
await _emitSessionEvent(sessionId, "agentRun", {
"source": sourceLabel,
"promptId": promptId,
"status": "toolResult",
"toolName": evtData.get("toolName", ""),
"success": evtData.get("success", True),
"summary": resultSnippet,
})
elif event.type == AgentEventTypeEnum.FILE_CREATED:
await _emitSessionEvent(sessionId, "documentCreated", event.data or {})
elif event.type == AgentEventTypeEnum.FINAL:
finalText = (event.content or "").strip()
elif event.type == AgentEventTypeEnum.ERROR:
raise RuntimeError(event.content or "Agent error")
except Exception as runErr:
await _emitSessionEvent(sessionId, "agentRun", {
"source": sourceLabel,
"promptId": promptId,
"status": "error",
"error": str(runErr)[:500],
})
raise
await _emitSessionEvent(sessionId, "agentRun", {
"source": sourceLabel,
"promptId": promptId,
"status": "completed",
"rounds": rounds,
"hasText": bool(finalText),
})
if finalText:
if directorPromptMode:
decision = _parseDirectorPromptFinal(finalText)
kind = decision.get("kind", "silent")
meetingText = (decision.get("meetingText") or "").strip()
internalNote = (decision.get("internalNote") or "").strip()
logger.info(
f"Session {sessionId}: Director prompt {promptId} -> kind={kind}, "
f"meetingChars={len(meetingText)}, noteChars={len(internalNote)}"
)
await _emitSessionEvent(sessionId, "directorPrompt", {
"id": promptId,
"status": "decision",
"decision": kind,
"meetingText": meetingText,
"internalNote": internalNote,
})
try:
promptRecord: Dict[str, Any] = {}
if promptId:
try:
from . import interfaceFeatureTeamsbot as _ifaceDb
_iface = _ifaceDb.getInterface(
self.currentUser, self.mandateId, self.instanceId
)
promptRecord = _iface.getDirectorPrompt(promptId) or {}
except Exception as _lookupErr:
logger.debug(
f"Briefing pool: could not look up prompt {promptId}: {_lookupErr}"
)
if promptRecord or promptId:
self._recordDirectorBriefing(
prompt=promptRecord or {"id": promptId},
internalNote=internalNote,
meetingText=meetingText,
)
except Exception as briefErr:
logger.warning(
f"Session {sessionId}: Director briefing pool update failed: {briefErr}"
)
if promptId:
for p in self._activePersistentPrompts:
if p.get("id") == promptId:
p["responseText"] = internalNote or meetingText or finalText
break
if kind == "meeting" and meetingText:
await self._deliverTextToMeeting(
sessionId=sessionId,
text=meetingText,
detectedIntent=f"agent:{sourceLabel}",
reasoning=f"Agent run ({sourceLabel})",
triggerTranscriptId=triggerTranscriptId,
)
else:
await self._persistInternalDirectorReply(
sessionId=sessionId,
internalNote=internalNote or finalText,
promptId=promptId,
triggerTranscriptId=triggerTranscriptId,
)
return meetingText if kind == "meeting" else ""
await self._deliverTextToMeeting(
sessionId=sessionId,
text=finalText,
detectedIntent=f"agent:{sourceLabel}",
reasoning=f"Agent run ({sourceLabel})",
triggerTranscriptId=triggerTranscriptId,
)
return finalText
async def _deliverTextToMeeting(
self,
sessionId: str,
text: str,
detectedIntent: str,
reasoning: str,
triggerTranscriptId: Optional[str] = None,
) -> None:
"""Send agent text into the meeting via TTS + chat per config."""
from . import interfaceFeatureTeamsbot as interfaceDb
interface = interfaceDb.getInterface(self.currentUser, self.mandateId, self.instanceId)
websocket = self._websocket
voiceInterface = self._voiceInterface
channelRaw = self.config.responseChannel
channelStr = (
channelRaw.value if hasattr(channelRaw, "value") else str(channelRaw)
).lower().strip()
sendVoice = channelStr in ("voice", "both")
sendChat = channelStr in ("chat", "both")
if sendVoice and sendChat:
responseType = TeamsbotResponseType.BOTH
elif sendVoice:
responseType = TeamsbotResponseType.AUDIO
else:
responseType = TeamsbotResponseType.CHAT
ttsOutcome: Optional[Dict[str, Any]] = None
if sendVoice and voiceInterface and websocket:
from .serviceConversation import _summarizeForVoice
spokenText = await _summarizeForVoice(self, sessionId, text)
cancelHook = self._makeAnswerCancelHook()
async with self._meetingTtsLock:
ttsOutcome = await _speakTextChunked(
websocket=websocket,
voiceInterface=voiceInterface,
sessionId=sessionId,
voiceText=spokenText,
languageCode=self.config.language,
voiceName=self.config.voiceId,
isCancelled=cancelHook,
)
await _emitSessionEvent(sessionId, "ttsDeliveryStatus", {
"status": "dispatched" if ttsOutcome.get("success") else "failed",
"hasWebSocket": True,
"chunks": ttsOutcome.get("chunks"),
"played": ttsOutcome.get("played"),
"error": ttsOutcome.get("error"),
"timestamp": getUtcTimestamp(),
})
if not ttsOutcome.get("success"):
logger.warning(
f"Session {sessionId}: Agent TTS delivery failed "
f"({ttsOutcome.get('error')}) — falling back to meeting chat"
)
if not sendChat:
sendChat = True
if sendChat and websocket:
try:
await websocket.send_text(json.dumps({
"type": "sendChatMessage",
"sessionId": sessionId,
"text": text,
}))
logger.info(f"Session {sessionId}: Agent chat dispatched ({len(text)} chars)")
except Exception as chatErr:
logger.warning(f"Session {sessionId}: Agent chat delivery failed: {chatErr}")
intentEnum, intentMeta = _coercePersistedDetectedIntent(detectedIntent)
reasoningForDb = (
f"{reasoning} [{intentMeta}]" if intentMeta else reasoning
)
botResponseData = TeamsbotBotResponse(
sessionId=sessionId,
responseText=text,
responseType=responseType,
detectedIntent=intentEnum,
reasoning=reasoningForDb,
triggeredByTranscriptId=triggerTranscriptId,
modelName="agent",
processingTime=0.0,
priceCHF=0.0,
timestamp=getUtcTimestamp(),
).model_dump()
createdResponse = interface.createBotResponse(botResponseData)
await _emitSessionEvent(sessionId, "botResponse", {
"id": createdResponse.get("id"),
"responseText": text,
"responseType": responseType.value,
"detectedIntent": intentEnum.value,
"reasoning": reasoningForDb,
"modelName": "agent",
"processingTime": 0.0,
"priceCHF": 0.0,
"timestamp": botResponseData.get("timestamp"),
})
botTranscriptData = TeamsbotTranscript(
sessionId=sessionId,
speaker=self.config.botName,
text=text,
timestamp=getUtcTimestamp(),
confidence=1.0,
language=self.config.language,
isFinal=True,
source="botResponse",
).model_dump()
botTranscript = interface.createTranscript(botTranscriptData)
self._contextBuffer.append({
"speaker": self.config.botName,
"text": text,
"timestamp": getUtcTimestamp(),
"source": "botResponse",
})
self._lastTranscriptSpeaker = self.config.botName
self._lastTranscriptText = text
self._lastTranscriptId = botTranscript.get("id")
self._lastBotResponseText = text.strip().lower()
self._lastBotResponseTs = time.time()
self._followUpWindowEnd = time.time() + 15.0
await _emitSessionEvent(sessionId, "transcript", {
"id": botTranscript.get("id"),
"speaker": self.config.botName,
"text": text,
"confidence": 1.0,
"timestamp": getUtcTimestamp(),
"isContinuation": False,
"source": "botResponse",
"speakerResolvedFromHint": False,
})
session = interface.getSession(sessionId)
if session:
count = session.get("botResponseCount", 0) + 1
interface.updateSession(sessionId, {"botResponseCount": count})
async def _persistInternalDirectorReply(
self,
sessionId: str,
internalNote: str,
promptId: Optional[str],
triggerTranscriptId: Optional[str] = None,
) -> None:
"""Record a director-prompt agent reply as INTERNAL (operator-UI only)."""
from . import interfaceFeatureTeamsbot as interfaceDb
note = (internalNote or "").strip()
if not note:
return
interface = interfaceDb.getInterface(self.currentUser, self.mandateId, self.instanceId)
intentEnum, _intentMeta = _coercePersistedDetectedIntent("agent:directorPrompt")
reasoningForDb = (
f"Director prompt {promptId or ''} — silent / internal only "
f"(not sent to meeting)"
).strip()
botResponseData = TeamsbotBotResponse(
sessionId=sessionId,
responseText=note,
responseType=TeamsbotResponseType.CHAT,
detectedIntent=intentEnum,
reasoning=reasoningForDb,
triggeredByTranscriptId=triggerTranscriptId,
modelName="agent",
processingTime=0.0,
priceCHF=0.0,
timestamp=getUtcTimestamp(),
).model_dump()
createdResponse = interface.createBotResponse(botResponseData)
await _emitSessionEvent(sessionId, "botResponse", {
"id": createdResponse.get("id"),
"responseText": note,
"responseType": TeamsbotResponseType.CHAT.value,
"detectedIntent": intentEnum.value,
"reasoning": reasoningForDb,
"modelName": "agent",
"processingTime": 0.0,
"priceCHF": 0.0,
"timestamp": botResponseData.get("timestamp"),
"internalOnly": True,
"promptId": promptId,
})
logger.info(
f"Session {sessionId}: Director prompt {promptId} silent reply "
f"persisted internally ({len(note)} chars)"
)
# =========================================================================
# Greeting (AI-localised)
# =========================================================================
async def _generateGreetingText(self, languageCode: str) -> str:
"""Generate the bot's join greeting via AI."""
targetLang = (languageCode or self.config.language or "").strip() or "en-US"
botName = (self.config.botName or "the assistant").strip()
firstName = botName.split(" ")[0] if botName else botName
persona = (self.config.aiSystemPrompt or "").strip()
prompt = (
f"You are localizing the join greeting for a meeting assistant.\n\n"
f"Assistant display name (use exactly this, no translation): {firstName}\n\n"
f"Persona / style guide for the assistant:\n"
f"{persona or '(no persona configured — use a neutral, polite, professional tone)'}\n\n"
f"Target spoken language (BCP-47 code): {targetLang}\n\n"
f"Generate ONE short greeting (max ~14 words) for the assistant "
f"to say AND post in chat the moment it joins a meeting. The "
f"greeting MUST:\n"
f" - be in the target language\n"
f" - introduce the assistant by name ({firstName})\n"
f" - signal that it is now present and ready\n"
f" - sound natural when spoken aloud (this text is also TTS'd)\n\n"
f"Output ONLY the greeting text, no quotes, no markdown, no "
f"commentary, no surrounding punctuation beyond what naturally "
f"belongs to the sentence."
)
try:
aiService = createAiService(
self.currentUser, self.mandateId, self.instanceId
)
await aiService.ensureAiObjectsInitialized()
request = AiCallRequest(
prompt=prompt,
context="",
options=AiCallOptions(
operationType=OperationTypeEnum.DATA_ANALYSE,
priority=PriorityEnum.SPEED,
),
)
response = await aiService.callAi(request)
except Exception as aiErr:
logger.warning(
f"Greeting generation crashed (lang={targetLang}): {aiErr}"
)
return ""
if not response or response.errorCount != 0 or not response.content:
logger.warning(
f"Greeting generation returned empty/error (lang={targetLang})"
)
return ""
text = response.content.strip()
text = re.sub(r"^```.*?\n", "", text, flags=re.DOTALL)
text = re.sub(r"\n```\s*$", "", text)
text = text.strip().strip("\"'`").strip()
if not text:
return ""
logger.info(
f"Greeting generated (lang={targetLang}, chars={len(text)}): {text[:80]}"
)
return text
async def _dispatchGreetingToMeeting(
self,
sessionId: str,
greetingText: str,
greetingLang: str,
sendToChat: bool,
interface: Any,
voiceInterface: Any,
websocket: WebSocket,
) -> None:
"""Dispatch the bot's join greeting (TTS + optional chat)."""
try:
await _emitSessionEvent(sessionId, "ttsDeliveryStatus", {
"status": "requested",
"hasWebSocket": True,
"message": "Greeting TTS requested",
"timestamp": getUtcTimestamp(),
})
cancelHook = self._makeAnswerCancelHook()
async with self._meetingTtsLock:
ttsOutcome = await _speakTextChunked(
websocket=websocket,
voiceInterface=voiceInterface,
sessionId=sessionId,
voiceText=_voiceFriendlyMeetingText(greetingText),
languageCode=greetingLang,
voiceName=self.config.voiceId,
isCancelled=cancelHook,
)
if ttsOutcome.get("success"):
logger.info(
f"Greeting TTS sent for session {sessionId} "
f"(chunks={ttsOutcome.get('chunks')})"
)
await _emitSessionEvent(sessionId, "ttsDeliveryStatus", {
"status": "dispatched",
"hasWebSocket": True,
"chunks": ttsOutcome.get("chunks"),
"played": ttsOutcome.get("played"),
"timestamp": getUtcTimestamp(),
})
else:
logger.warning(
f"Greeting TTS failed for session {sessionId}: {ttsOutcome.get('error')}"
)
await _emitSessionEvent(sessionId, "ttsDeliveryStatus", {
"status": "failed",
"hasWebSocket": True,
"message": ttsOutcome.get("error"),
"timestamp": getUtcTimestamp(),
})
if sendToChat:
try:
await websocket.send_text(json.dumps({
"type": "sendChatMessage",
"sessionId": sessionId,
"text": greetingText,
}))
logger.info(f"Greeting chat dispatch queued for session {sessionId}")
except Exception as chatErr:
logger.warning(
f"Greeting chat dispatch failed for session {sessionId}: {chatErr}"
)
greetingTranscriptData = TeamsbotTranscript(
sessionId=sessionId,
speaker=self.config.botName,
text=greetingText,
timestamp=getUtcTimestamp(),
confidence=1.0,
language=greetingLang,
isFinal=True,
source="botResponse",
).model_dump()
greetingTranscript = interface.createTranscript(greetingTranscriptData)
self._contextBuffer.append({
"speaker": self.config.botName,
"text": greetingText,
"timestamp": getUtcTimestamp(),
"source": "botResponse",
})
self._lastTranscriptSpeaker = self.config.botName
self._lastTranscriptText = greetingText
self._lastTranscriptId = greetingTranscript.get("id")
await _emitSessionEvent(sessionId, "botResponse", {
"id": greetingTranscript.get("id"),
"responseText": greetingText,
"responseType": TeamsbotResponseType.AUDIO.value,
"detectedIntent": "greeting",
"reasoning": "Automatic join greeting",
"timestamp": getUtcTimestamp(),
})
await _emitSessionEvent(sessionId, "transcript", {
"id": greetingTranscript.get("id"),
"speaker": self.config.botName,
"text": greetingText,
"confidence": 1.0,
"timestamp": getUtcTimestamp(),
"isContinuation": False,
"source": "botResponse",
"speakerResolvedFromHint": False,
})
except Exception as dispatchErr:
logger.warning(
f"Greeting dispatch failed for session {sessionId}: {dispatchErr}"
)
# =========================================================================
# Context Summarization
# =========================================================================
async def _summarizeSessionContext(self, sessionId: str, rawContext: str) -> str:
"""Summarize a long user-provided session context."""
try:
aiService = createAiService(self.currentUser, self.mandateId, self.instanceId)
await aiService.ensureAiObjectsInitialized()
request = AiCallRequest(
prompt=(
"Fasse den folgenden Kontext auf die wesentlichen Punkte zusammen. "
"Behalte alle wichtigen Fakten, Namen, Zahlen, Entscheidungen und Aktionspunkte. "
"Entferne Fuelltext und Wiederholungen. "
"Antworte NUR mit der Zusammenfassung, keine Erklaerungen oder Einleitungen."
),
context=rawContext,
options=AiCallOptions(
operationType=OperationTypeEnum.DATA_ANALYSE,
priority=PriorityEnum.SPEED,
)
)
response = await aiService.callAi(request)
if response and response.errorCount == 0 and response.content:
summary = response.content.strip()
logger.info(f"Session {sessionId}: Context summarized from {len(rawContext)} to {len(summary)} chars")
return summary
except Exception as e:
logger.warning(f"Session context summarization failed for {sessionId}: {e}")
return rawContext[:2000] if len(rawContext) > 2000 else rawContext
async def _summarizeContextBuffer(self, sessionId: str):
"""Summarize the older part of the context buffer."""
try:
if self._contextSummary:
return
halfPoint = len(self._contextBuffer) // 2
oldSegments = self._contextBuffer[:halfPoint]
if len(oldSegments) < 10:
return
lines = []
for seg in oldSegments:
speaker = seg.get("speaker", "Unknown")
text = seg.get("text", "")
lines.append(f"[{speaker}]: {text}")
textToSummarize = "\n".join(lines)
aiService = createAiService(self.currentUser, self.mandateId, self.instanceId)
await aiService.ensureAiObjectsInitialized()
request = AiCallRequest(
prompt="Fasse das folgende Meeting-Transkript in 3-5 Saetzen zusammen. Nenne die wichtigsten Themen, Entscheidungen und offene Fragen. Antworte NUR mit der Zusammenfassung, keine Erklaerungen.",
context=textToSummarize,
options=AiCallOptions(
operationType=OperationTypeEnum.DATA_ANALYSE,
priority=PriorityEnum.SPEED,
)
)
response = await aiService.callAi(request)
if response and response.errorCount == 0:
self._contextSummary = response.content.strip()
logger.info(f"Session {sessionId}: Context summarized ({len(oldSegments)} segments -> {len(self._contextSummary)} chars)")
except Exception as e:
logger.warning(f"Context summarization failed for session {sessionId}: {e}")
# =========================================================================
# Meeting Summary
# =========================================================================
async def _generateMeetingSummary(self, sessionId: str):
"""Generate an AI summary of the meeting after it ends."""
try:
from . import interfaceFeatureTeamsbot as interfaceDb
interface = interfaceDb.getInterface(self.currentUser, self.mandateId, self.instanceId)
transcripts = interface.getTranscripts(sessionId)
if not transcripts or len(transcripts) < 5:
return
fullTranscript = "\n".join(
f"[{t.get('speaker', 'Unknown')}]: {t.get('text', '')}"
for t in transcripts
)
aiService = createAiService(self.currentUser, self.mandateId, self.instanceId)
await aiService.ensureAiObjectsInitialized()
request = AiCallRequest(
prompt="Erstelle eine kurze Zusammenfassung dieses Meeting-Transkripts. Nenne die wichtigsten Punkte, Entscheidungen und offene Aktionspunkte.",
context=fullTranscript,
options=AiCallOptions(
operationType=OperationTypeEnum.DATA_ANALYSE,
priority=PriorityEnum.BALANCED,
)
)
response = await aiService.callAi(request)
if response and response.errorCount == 0:
interface.updateSession(sessionId, {"summary": response.content})
logger.info(f"Meeting summary generated for session {sessionId}")
except Exception as e:
logger.error(f"Failed to generate meeting summary for session {sessionId}: {e}")