1993 lines
82 KiB
Python
1993 lines
82 KiB
Python
# Copyright (c) 2025 Patrick Motsch
|
|
# All rights reserved.
|
|
"""
|
|
Teamsbot Service - Pipeline Orchestrator.
|
|
Manages the audio processing pipeline: STT -> Context Buffer -> SPEECH_TEAMS -> TTS -> Bridge.
|
|
"""
|
|
|
|
import logging
|
|
import json
|
|
import re
|
|
import asyncio
|
|
import time
|
|
import base64
|
|
from datetime import datetime, timezone
|
|
from typing import Optional, Dict, Any, List, Callable
|
|
|
|
from fastapi import WebSocket
|
|
|
|
from modules.datamodels.datamodelUam import User
|
|
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum
|
|
from modules.shared.timeUtils import getUtcTimestamp
|
|
from modules.serviceCenter import getService as _getServiceCenterService
|
|
from modules.serviceCenter.context import ServiceCenterContext
|
|
|
|
from .datamodelTeamsbot import (
|
|
TeamsbotSessionStatus,
|
|
TeamsbotTranscript,
|
|
TeamsbotBotResponse,
|
|
TeamsbotResponseType,
|
|
TeamsbotConfig,
|
|
TeamsbotResponseMode,
|
|
TeamsbotResponseChannel,
|
|
TeamsbotDetectedIntent,
|
|
SpeechTeamsResponse,
|
|
TeamsbotCommand,
|
|
TeamsbotDirectorPrompt,
|
|
TeamsbotDirectorPromptStatus,
|
|
TeamsbotDirectorPromptMode,
|
|
DIRECTOR_PROMPT_TEXT_LIMIT,
|
|
DIRECTOR_PROMPT_FILE_LIMIT,
|
|
)
|
|
from .browserBotConnector import BrowserBotConnector
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Agent run limits for director prompts / speech escalation (meeting context).
|
|
TEAMSBOT_AGENT_MAX_ROUNDS = 8
|
|
TEAMSBOT_AGENT_MAX_COST_CHF = 0.12
|
|
|
|
# How many recent director-prompt briefings we keep in session memory.
|
|
_RECENT_DIRECTOR_BRIEFINGS_MAX = 6
|
|
|
|
# Quick-ack throttle interval.
|
|
_QUICK_ACK_MIN_INTERVAL_SEC = 25.0
|
|
|
|
# Number of phrase variants we generate per kind.
|
|
_EPHEMERAL_PHRASE_VARIANTS = 4
|
|
|
|
# Localisation INTENTS for ephemeral phrases.
|
|
_EPHEMERAL_PHRASE_INTENTS: Dict[str, str] = {
|
|
"quickAck": (
|
|
"Very short verbal acknowledgment (1 to 4 words) the assistant says "
|
|
"the moment its name is recognised, BEFORE it has formulated a full "
|
|
"answer. The intent is purely 'I heard you, I'm thinking' — natural, "
|
|
"conversational, never a complete sentence."
|
|
),
|
|
"agentBusy": (
|
|
"One short sentence (max ~12 words) the assistant says BEFORE starting "
|
|
"a longer research / tool-call task, so the audience knows the answer "
|
|
"will take a few seconds. Polite, professional, calm."
|
|
),
|
|
"agentRound": (
|
|
"One short sentence (max ~14 words) the assistant says BETWEEN rounds "
|
|
"of a longer agent task to update the audience on what it is doing. "
|
|
"Include the placeholder token '{activity}' which will be filled with "
|
|
"the current activity — e.g. 'I am {activity}, one moment...' or "
|
|
"'Currently {activity}, almost there...'. Do NOT include step numbers."
|
|
),
|
|
}
|
|
|
|
|
|
# =========================================================================
|
|
# Module-level utility functions (used by extracted modules too)
|
|
# =========================================================================
|
|
|
|
def _voiceLineLooksLikeBillingOrMeta(line: str) -> bool:
|
|
"""Heuristic: trailing lines that are separators or billing/usage footers."""
|
|
s = line.strip()
|
|
if not s:
|
|
return True
|
|
lower = s.lower()
|
|
if re.match(r"^[-=*_]{3,}\s*$", s):
|
|
return True
|
|
if re.match(r"^#{1,6}\s*(usage|billing|costs?|meta|technical|statistics)\b", lower):
|
|
return True
|
|
if "chf" in lower and re.search(r"\d", s):
|
|
if re.search(
|
|
r"\b(total|usage|cost|billing|token|spent|used|price|estimate|"
|
|
r"rounds?|calls?|duration|processing\s*time|model\s*calls?)\b",
|
|
lower,
|
|
):
|
|
return True
|
|
if "token" in lower and re.search(r"\d", s):
|
|
if re.search(r"\b(total|usage|prompt|completion)\b", lower):
|
|
return True
|
|
pl = lower.replace(" ", "")
|
|
if "progressafter" in pl and ("aicalls:" in pl or "toolcalls:" in pl):
|
|
return True
|
|
return False
|
|
|
|
|
|
_EMOJI_PATTERN = re.compile(
|
|
"["
|
|
"\U0001F300-\U0001FAFF"
|
|
"\U00002600-\U000027BF"
|
|
"\U0001F1E6-\U0001F1FF"
|
|
"\U00002B00-\U00002BFF"
|
|
"\U0001F900-\U0001F9FF"
|
|
"\U0000FE0F"
|
|
"]+",
|
|
flags=re.UNICODE,
|
|
)
|
|
|
|
|
|
def _voiceFriendlyMeetingText(raw: str) -> str:
|
|
"""Sanitise a chat/markdown response so it can be SPOKEN naturally."""
|
|
if not raw:
|
|
return ""
|
|
|
|
low = raw.lower()
|
|
if "maximum rounds reached" in low:
|
|
m = re.search(r"(?is)maximum\s+rounds\s+reached", raw)
|
|
if m:
|
|
head = raw[: m.start()].strip()
|
|
raw = head or (
|
|
"Die Abklaerung brauchte mehr Schritte als vorgesehen; Details stehen im Chat."
|
|
)
|
|
if "budget exceeded" in low:
|
|
m = re.search(r"(?is)budget\s+exceeded", raw)
|
|
if m:
|
|
head = raw[: m.start()].strip()
|
|
raw = head or "Das eingestellte Kostenlimit ist erreicht; Details stehen im Chat."
|
|
|
|
lines = raw.strip().split("\n")
|
|
while lines and _voiceLineLooksLikeBillingOrMeta(lines[-1]):
|
|
lines.pop()
|
|
t = "\n".join(lines).strip()
|
|
if not t:
|
|
t = raw.strip()
|
|
|
|
t = re.sub(r"```[\s\S]*?```", " ", t)
|
|
t = re.sub(r"`([^`]+)`", r"\1", t)
|
|
|
|
cleanedLines: List[str] = []
|
|
for ln in t.split("\n"):
|
|
stripped = ln.strip()
|
|
if stripped.count("|") >= 2:
|
|
continue
|
|
if re.fullmatch(r"\s*\|?[\s\-:|]+\|?\s*", stripped) and "-" in stripped:
|
|
continue
|
|
cleanedLines.append(ln)
|
|
t = "\n".join(cleanedLines)
|
|
|
|
t = re.sub(r"(?m)^\s*([-*_])\s*\1\s*\1[\s\1]*$", "", t)
|
|
t = re.sub(r"(?m)^\s*#{1,6}\s+", "", t)
|
|
t = re.sub(r"(?m)^\s*[-*•·]\s+", "", t)
|
|
t = re.sub(r"(?m)^\s*\d+[\.\)]\s+", "", t)
|
|
t = re.sub(r"\*\*([^*]+)\*\*", r"\1", t)
|
|
t = re.sub(r"\*([^*\n]+)\*", r"\1", t)
|
|
t = re.sub(r"__([^_]+)__", r"\1", t)
|
|
t = re.sub(r"(?<![A-Za-z0-9])_([^_\n]+)_(?![A-Za-z0-9])", r"\1", t)
|
|
t = re.sub(r"\[([^\]]+)\]\([^)]+\)", r"\1", t)
|
|
t = _EMOJI_PATTERN.sub("", t)
|
|
t = re.sub(r"\s+[\u2013\u2014\u2212]\s+", ", ", t)
|
|
t = re.sub(r"[\u2013\u2014\u2212]", "-", t)
|
|
t = re.sub(r"[\u201C\u201D\u201E\u201F\u00AB\u00BB\u2039\u203A]", "", t)
|
|
t = re.sub(r"[\u2018\u2019\u201A\u201B]", "'", t)
|
|
t = re.sub(r"[\u2190-\u23FF]+", " ", t)
|
|
t = re.sub(r"[\u00A1-\u00BF\u00D7\u00F7\u2122]+", " ", t)
|
|
t = re.sub(r"[*#~^=+|\\<>{}\[\]()_&@$%`]+", " ", t)
|
|
t = t.replace('"', "")
|
|
t = re.sub(r"(?<=\w)\s*/\s*(?=\w)", " oder ", t)
|
|
t = t.replace("/", " ")
|
|
t = re.sub(r"([\.,;:!\?])\1{1,}", r"\1", t)
|
|
t = re.sub(r"\s+([\.,;:!\?])", r"\1", t)
|
|
t = re.sub(r":\s*$", ".", t.rstrip())
|
|
t = re.sub(r"\s+:\s*$", ":", t, flags=re.MULTILINE)
|
|
|
|
paragraphs = [p.strip() for p in re.split(r"\n\s*\n", t) if p.strip()]
|
|
rebuilt: List[str] = []
|
|
for p in paragraphs:
|
|
p = re.sub(r"\s+", " ", p).strip()
|
|
if not p:
|
|
continue
|
|
if not re.search(r"[\.!\?\u2026:]\s*$", p):
|
|
p = p.rstrip() + "."
|
|
rebuilt.append(p)
|
|
t = " ".join(rebuilt)
|
|
t = re.sub(r"\s+", " ", t).strip()
|
|
return t
|
|
|
|
|
|
_TTS_MAX_CHUNK_CHARS = 800
|
|
|
|
|
|
def _splitTextForTts(text: str, maxChars: int = _TTS_MAX_CHUNK_CHARS) -> List[str]:
|
|
"""Split a long voice line into TTS-safe chunks at sentence/paragraph boundaries."""
|
|
cleaned = (text or "").strip()
|
|
if not cleaned:
|
|
return []
|
|
if len(cleaned) <= maxChars:
|
|
return [cleaned]
|
|
|
|
sentencePattern = re.compile(r"(?<=[\.!\?\u2026])\s+|\n+")
|
|
rawSentences = [s.strip() for s in sentencePattern.split(cleaned) if s and s.strip()]
|
|
if not rawSentences:
|
|
rawSentences = [cleaned]
|
|
|
|
chunks: List[str] = []
|
|
buffer = ""
|
|
for sentence in rawSentences:
|
|
if len(sentence) > maxChars:
|
|
if buffer:
|
|
chunks.append(buffer.strip())
|
|
buffer = ""
|
|
words = sentence.split(" ")
|
|
current = ""
|
|
for word in words:
|
|
candidate = (current + " " + word).strip() if current else word
|
|
if len(candidate) > maxChars and current:
|
|
chunks.append(current.strip())
|
|
current = word
|
|
else:
|
|
current = candidate
|
|
if current:
|
|
if not re.search(r"[\.!\?\u2026]\s*$", current):
|
|
current = current.rstrip() + "."
|
|
chunks.append(current.strip())
|
|
continue
|
|
|
|
candidate = (buffer + " " + sentence).strip() if buffer else sentence
|
|
if len(candidate) > maxChars and buffer:
|
|
chunks.append(buffer.strip())
|
|
buffer = sentence
|
|
else:
|
|
buffer = candidate
|
|
|
|
if buffer:
|
|
chunks.append(buffer.strip())
|
|
|
|
finalized: List[str] = []
|
|
for c in chunks:
|
|
if not c:
|
|
continue
|
|
if not re.search(r"[\.!\?\u2026]\s*$", c):
|
|
c = c.rstrip() + "."
|
|
finalized.append(c)
|
|
return finalized
|
|
|
|
|
|
async def _speakTextChunked(
|
|
websocket: Optional[WebSocket],
|
|
voiceInterface: Any,
|
|
sessionId: str,
|
|
voiceText: str,
|
|
languageCode: str,
|
|
voiceName: Optional[str],
|
|
isCancelled: Optional[Callable[[], bool]] = None,
|
|
) -> Dict[str, Any]:
|
|
"""Run TTS in chunks and dispatch each ``playAudio`` over the websocket."""
|
|
chunks = _splitTextForTts(voiceText)
|
|
result: Dict[str, Any] = {"success": False, "chunks": len(chunks), "played": 0, "error": None, "cancelled": False}
|
|
if not chunks:
|
|
result["error"] = "no text"
|
|
return result
|
|
if voiceInterface is None:
|
|
result["error"] = "no voice interface"
|
|
return result
|
|
|
|
lastError: Optional[str] = None
|
|
for idx, chunk in enumerate(chunks, start=1):
|
|
if isCancelled is not None and isCancelled():
|
|
result["cancelled"] = True
|
|
logger.info(
|
|
f"Session {sessionId}: TTS chunk loop cancelled before chunk "
|
|
f"{idx}/{len(chunks)} (user stop or newer answer in flight)"
|
|
)
|
|
break
|
|
try:
|
|
ttsResult = await voiceInterface.textToSpeech(
|
|
text=chunk,
|
|
languageCode=languageCode,
|
|
voiceName=voiceName,
|
|
)
|
|
except Exception as ttsErr:
|
|
lastError = f"chunk {idx}/{len(chunks)} raised: {ttsErr}"
|
|
logger.warning(f"Session {sessionId}: TTS {lastError}")
|
|
continue
|
|
|
|
if not isinstance(ttsResult, dict) or ttsResult.get("success") is False:
|
|
err = (ttsResult or {}).get("error", "unknown") if isinstance(ttsResult, dict) else "no result"
|
|
lastError = f"chunk {idx}/{len(chunks)} failed: {err}"
|
|
logger.warning(f"Session {sessionId}: TTS {lastError}")
|
|
continue
|
|
|
|
audioContent = ttsResult.get("audioContent")
|
|
if not audioContent:
|
|
lastError = f"chunk {idx}/{len(chunks)} returned no audioContent"
|
|
logger.warning(f"Session {sessionId}: TTS {lastError}")
|
|
continue
|
|
|
|
if websocket is None:
|
|
lastError = "websocket unavailable"
|
|
break
|
|
|
|
if isCancelled is not None and isCancelled():
|
|
result["cancelled"] = True
|
|
logger.info(
|
|
f"Session {sessionId}: TTS chunk loop cancelled before "
|
|
f"sending chunk {idx}/{len(chunks)} (audio dropped)"
|
|
)
|
|
break
|
|
|
|
try:
|
|
await websocket.send_text(json.dumps({
|
|
"type": "playAudio",
|
|
"sessionId": sessionId,
|
|
"audio": {
|
|
"data": base64.b64encode(
|
|
audioContent if isinstance(audioContent, bytes) else audioContent.encode()
|
|
).decode(),
|
|
"format": "mp3",
|
|
},
|
|
}))
|
|
result["played"] += 1
|
|
except Exception as wsErr:
|
|
lastError = f"chunk {idx}/{len(chunks)} websocket send failed: {wsErr}"
|
|
logger.warning(f"Session {sessionId}: TTS {lastError}")
|
|
break
|
|
|
|
result["success"] = result["played"] > 0
|
|
if lastError:
|
|
result["error"] = lastError
|
|
return result
|
|
|
|
|
|
def _coercePersistedDetectedIntent(raw: Optional[str]) -> tuple:
|
|
"""Map free-form intent labels to TeamsbotDetectedIntent for DB persistence."""
|
|
if not raw or not str(raw).strip():
|
|
return TeamsbotDetectedIntent.NONE, None
|
|
s = str(raw).strip().lower()
|
|
for member in TeamsbotDetectedIntent:
|
|
if member.value == s:
|
|
return member, None
|
|
if s.startswith("agent:"):
|
|
return TeamsbotDetectedIntent.PROACTIVE, str(raw).strip()[:120]
|
|
return TeamsbotDetectedIntent.NONE, str(raw).strip()[:120]
|
|
|
|
|
|
_DIRECTOR_REPLY_PATTERN = re.compile(
|
|
r"^\s*(MEETING_REPLY|MEETING|REPLY|SAY|SPEAK)\s*:\s*",
|
|
re.IGNORECASE,
|
|
)
|
|
_DIRECTOR_SILENT_PATTERN = re.compile(
|
|
r"^\s*(SILENT|INTERNAL(?:_ONLY)?|NOTE|NO_MEETING_OUTPUT|ACK(?:NOWLEDGE)?)\s*:\s*",
|
|
re.IGNORECASE,
|
|
)
|
|
|
|
|
|
def _parseDirectorPromptFinal(finalText: str) -> Dict[str, Any]:
|
|
"""Parse the agent's final answer for a director prompt."""
|
|
text = (finalText or "").strip()
|
|
if not text:
|
|
return {"kind": "silent", "meetingText": "", "internalNote": ""}
|
|
|
|
meetingMatch = _DIRECTOR_REPLY_PATTERN.match(text)
|
|
if meetingMatch:
|
|
body = text[meetingMatch.end():].strip()
|
|
return {"kind": "meeting", "meetingText": body, "internalNote": ""}
|
|
|
|
silentMatch = _DIRECTOR_SILENT_PATTERN.match(text)
|
|
if silentMatch:
|
|
body = text[silentMatch.end():].strip()
|
|
return {"kind": "silent", "meetingText": "", "internalNote": body}
|
|
|
|
return {"kind": "silent", "meetingText": "", "internalNote": text}
|
|
|
|
|
|
# =========================================================================
|
|
# Active Service Registry (sessionId -> running TeamsbotService instance)
|
|
# =========================================================================
|
|
_activeServices: Dict[str, "TeamsbotService"] = {}
|
|
|
|
|
|
def getActiveService(sessionId: str) -> Optional["TeamsbotService"]:
|
|
"""Return the running TeamsbotService for a session, or None if not active."""
|
|
return _activeServices.get(sessionId)
|
|
|
|
|
|
# =========================================================================
|
|
# AI Service Factory
|
|
# =========================================================================
|
|
|
|
def createAiService(user, mandateId, featureInstanceId=None):
|
|
"""Create a properly wired AiService via the service center."""
|
|
ctx = ServiceCenterContext(
|
|
user=user,
|
|
mandateId=mandateId,
|
|
featureInstanceId=featureInstanceId,
|
|
featureCode="teamsbot",
|
|
)
|
|
return _getServiceCenterService("ai", ctx)
|
|
|
|
|
|
# =========================================================================
|
|
# Session Event Queues (for SSE streaming to frontend)
|
|
# =========================================================================
|
|
sessionEvents: Dict[str, asyncio.Queue] = {}
|
|
|
|
|
|
async def _emitSessionEvent(sessionId: str, eventType: str, data: Any):
|
|
"""Emit an event to the session's SSE stream."""
|
|
if sessionId not in sessionEvents:
|
|
sessionEvents[sessionId] = asyncio.Queue()
|
|
await sessionEvents[sessionId].put({"type": eventType, "data": data, "timestamp": getUtcTimestamp()})
|
|
|
|
|
|
def _normalizeGatewayHostForBotWs(host: str) -> str:
|
|
"""Use IPv4 loopback for local dev WebSocket URLs passed to the Node browser-bot."""
|
|
h = host.strip()
|
|
lower = h.lower()
|
|
if lower == "localhost":
|
|
return "127.0.0.1"
|
|
if lower.startswith("localhost:"):
|
|
return "127.0.0.1" + h[len("localhost"):]
|
|
if lower.startswith("[::1]:"):
|
|
return "127.0.0.1" + h.partition("]")[2]
|
|
if lower in ("[::1]", "::1"):
|
|
return "127.0.0.1"
|
|
return h
|
|
|
|
|
|
# =========================================================================
|
|
# TeamsbotService Class
|
|
# =========================================================================
|
|
|
|
class TeamsbotService:
|
|
"""
|
|
Pipeline Orchestrator for Teams Bot sessions.
|
|
Coordinates VoiceObjects (STT/TTS), AiService (SPEECH_TEAMS), and Bridge communication.
|
|
"""
|
|
|
|
def __init__(self, currentUser: User, mandateId: str, instanceId: str, config: TeamsbotConfig):
|
|
self.currentUser = currentUser
|
|
self.mandateId = mandateId
|
|
self.instanceId = instanceId
|
|
self.config = config
|
|
self.browserBotConnector = BrowserBotConnector(config._getEffectiveBrowserBotUrl())
|
|
|
|
# State
|
|
self._lastAiCallTime: float = 0.0
|
|
self._aiAnalysisInProgress: bool = False
|
|
self._contextBuffer: List[Dict[str, Any]] = []
|
|
self._sessionContext: Optional[str] = None
|
|
self._contextSummary: Optional[str] = None
|
|
|
|
# Differential transcript tracking
|
|
self._lastTranscriptSpeaker: Optional[str] = None
|
|
self._lastTranscriptText: Optional[str] = None
|
|
self._lastTranscriptId: Optional[str] = None
|
|
self._lastSttTime: float = 0.0
|
|
|
|
# Audio chunk aggregation
|
|
self._audioBuffer: bytes = b""
|
|
self._audioBufferStartTime: float = 0.0
|
|
self._audioBufferLastChunkTime: float = 0.0
|
|
self._audioBufferSampleRate: int = 16000
|
|
self._lastBotResponseText: Optional[str] = None
|
|
self._lastBotResponseTs: float = 0.0
|
|
|
|
# Speaker attribution
|
|
self._lastCaptionSpeaker: Optional[str] = None
|
|
self._unattributedTranscriptIds: List[str] = []
|
|
self._knownSpeakers: set = set()
|
|
|
|
# Debounced name trigger
|
|
self._pendingNameTrigger: Optional[Dict[str, Any]] = None
|
|
self._followUpWindowEnd: float = 0.0
|
|
|
|
# Quick-ack throttle
|
|
self._lastQuickAckTs: float = 0.0
|
|
|
|
# Ephemeral phrase pool
|
|
self._phrasePool: Dict[str, List[str]] = {}
|
|
self._phrasePoolIdx: Dict[str, int] = {}
|
|
self._phrasePoolLock: asyncio.Lock = asyncio.Lock()
|
|
|
|
# Voice pipeline serialisation
|
|
self._meetingTtsLock: asyncio.Lock = asyncio.Lock()
|
|
self._answerGenerationCounter: int = 0
|
|
self._currentEscalationTask: Optional[asyncio.Task] = None
|
|
self._currentQuickAckTask: Optional[asyncio.Task] = None
|
|
self._agentEscalationInFlight: bool = False
|
|
|
|
# Live transport handles
|
|
self._activeSessionId: Optional[str] = None
|
|
self._websocket: Optional[WebSocket] = None
|
|
self._voiceInterface = None
|
|
|
|
# Director prompts
|
|
self._activePersistentPrompts: List[Dict[str, Any]] = []
|
|
self._recentDirectorBriefings: List[Dict[str, Any]] = []
|
|
|
|
# =========================================================================
|
|
# Session Lifecycle
|
|
# =========================================================================
|
|
|
|
async def joinMeeting(
|
|
self,
|
|
sessionId: str,
|
|
meetingLink: str,
|
|
connectionId: Optional[str] = None,
|
|
gatewayBaseUrl: str = "",
|
|
botAccountEmail: Optional[str] = None,
|
|
botAccountPassword: Optional[str] = None,
|
|
):
|
|
"""Send join command to the Browser Bot service."""
|
|
from . import interfaceFeatureTeamsbot as interfaceDb
|
|
|
|
interface = interfaceDb.getInterface(self.currentUser, self.mandateId, self.instanceId)
|
|
|
|
if sessionId not in sessionEvents:
|
|
sessionEvents[sessionId] = asyncio.Queue()
|
|
|
|
try:
|
|
interface.updateSession(sessionId, {"status": TeamsbotSessionStatus.JOINING.value})
|
|
await _emitSessionEvent(sessionId, "statusChange", {"status": "joining"})
|
|
|
|
session = interface.getSession(sessionId)
|
|
if not session:
|
|
raise ValueError(f"Session {sessionId} not found")
|
|
|
|
wsScheme = "wss" if gatewayBaseUrl.startswith("https") else "ws"
|
|
gatewayHost = gatewayBaseUrl.replace("https://", "").replace("http://", "").rstrip("/")
|
|
gatewayHost = _normalizeGatewayHostForBotWs(gatewayHost)
|
|
fullGatewayWsUrl = f"{wsScheme}://{gatewayHost}/api/teamsbot/{self.instanceId}/bot/ws/{sessionId}"
|
|
|
|
hasAuth = bool(botAccountEmail and botAccountPassword)
|
|
logger.info(f"Joining meeting for session {sessionId}: auth={hasAuth}, email={botAccountEmail or 'N/A'}, transferMode={self.config.transferMode}")
|
|
|
|
avatarMediaData = None
|
|
avatarMediaType = None
|
|
avatarFileId = self._resolveAvatarFileId(session, interface)
|
|
if avatarFileId:
|
|
avatarMediaData, avatarMediaType = self._loadAvatarFileData(avatarFileId, interface)
|
|
|
|
result = await self.browserBotConnector.joinMeeting(
|
|
sessionId=sessionId,
|
|
meetingUrl=meetingLink,
|
|
botName=session.get("botName", self.config.botName),
|
|
instanceId=self.instanceId,
|
|
gatewayWsUrl=fullGatewayWsUrl,
|
|
language=self.config.language,
|
|
botAccountEmail=botAccountEmail,
|
|
botAccountPassword=botAccountPassword,
|
|
transferMode=self.config.transferMode if hasattr(self.config, 'transferMode') else "auto",
|
|
debugMode=self.config.debugMode if hasattr(self.config, 'debugMode') else False,
|
|
avatarMediaData=avatarMediaData,
|
|
avatarMediaType=avatarMediaType,
|
|
)
|
|
|
|
if result.get("success"):
|
|
interface.updateSession(sessionId, {
|
|
"status": TeamsbotSessionStatus.JOINING.value,
|
|
})
|
|
logger.info(f"Browser bot deployment started for session {sessionId}")
|
|
else:
|
|
errorMsg = result.get("error", "Unknown error joining meeting")
|
|
interface.updateSession(sessionId, {
|
|
"status": TeamsbotSessionStatus.ERROR.value,
|
|
"errorMessage": errorMsg,
|
|
})
|
|
await _emitSessionEvent(sessionId, "statusChange", {"status": "error", "errorMessage": errorMsg})
|
|
logger.error(f"Failed to deploy browser bot for session {sessionId}: {errorMsg}")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error joining meeting for session {sessionId}: {e}")
|
|
interface.updateSession(sessionId, {
|
|
"status": TeamsbotSessionStatus.ERROR.value,
|
|
"errorMessage": str(e),
|
|
})
|
|
await _emitSessionEvent(sessionId, "statusChange", {"status": "error", "errorMessage": str(e)})
|
|
|
|
def _resolveAvatarFileId(self, session, interface):
|
|
"""Resolve avatarFileId: module override > config default."""
|
|
moduleId = session.get("moduleId")
|
|
if moduleId:
|
|
module = interface.getModule(moduleId)
|
|
if module and module.get("defaultAvatarFileId"):
|
|
return module["defaultAvatarFileId"]
|
|
return getattr(self.config, "avatarFileId", None)
|
|
|
|
def _loadAvatarFileData(self, fileId, _teamsbotInterface):
|
|
"""Load avatar file as base64 data + mime type."""
|
|
from modules.interfaces import interfaceDbManagement
|
|
try:
|
|
mgmt = interfaceDbManagement.getInterface(self.currentUser, self.mandateId, featureInstanceId=self.instanceId)
|
|
fileRecord = mgmt.getFile(fileId)
|
|
if not fileRecord:
|
|
logger.warning(f"Avatar file {fileId} not found")
|
|
return None, None
|
|
mimeType = getattr(fileRecord, "mimeType", None) or "image/png"
|
|
rawBytes = mgmt.getFileData(fileId)
|
|
if not rawBytes:
|
|
logger.warning(f"Avatar file {fileId} has no data")
|
|
return None, None
|
|
b64 = base64.b64encode(rawBytes).decode("ascii")
|
|
logger.info(f"Avatar file loaded: {fileId}, {mimeType}, {len(b64)} chars base64")
|
|
return b64, mimeType
|
|
except Exception as e:
|
|
logger.error(f"Failed to load avatar file {fileId}: {e}")
|
|
return None, None
|
|
|
|
async def leaveMeeting(self, sessionId: str):
|
|
"""Send leave command to the Browser Bot service."""
|
|
from . import interfaceFeatureTeamsbot as interfaceDb
|
|
|
|
interface = interfaceDb.getInterface(self.currentUser, self.mandateId, self.instanceId)
|
|
|
|
try:
|
|
interface.updateSession(sessionId, {"status": TeamsbotSessionStatus.LEAVING.value})
|
|
await _emitSessionEvent(sessionId, "statusChange", {"status": "leaving"})
|
|
|
|
await self.browserBotConnector.leaveMeeting(sessionId)
|
|
|
|
interface.updateSession(sessionId, {
|
|
"status": TeamsbotSessionStatus.ENDED.value,
|
|
"endedAt": getUtcTimestamp(),
|
|
})
|
|
await _emitSessionEvent(sessionId, "statusChange", {"status": "ended"})
|
|
|
|
asyncio.create_task(self._generateMeetingSummary(sessionId))
|
|
|
|
logger.info(f"Bot left meeting for session {sessionId}")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error leaving meeting for session {sessionId}: {e}")
|
|
interface.updateSession(sessionId, {
|
|
"status": TeamsbotSessionStatus.ERROR.value,
|
|
"errorMessage": str(e),
|
|
"endedAt": getUtcTimestamp(),
|
|
})
|
|
|
|
sessionEvents.pop(sessionId, None)
|
|
|
|
# =========================================================================
|
|
# WebSocket — delegates to serviceWebSocket module
|
|
# =========================================================================
|
|
|
|
async def handleBotWebSocket(self, websocket: WebSocket, sessionId: str):
|
|
"""Main WebSocket handler — delegates to serviceWebSocket."""
|
|
from .serviceWebSocket import handleBotWebSocket as _handleBotWebSocket
|
|
await _handleBotWebSocket(self, websocket, sessionId)
|
|
|
|
# =========================================================================
|
|
# Conversation — delegates to serviceConversation module
|
|
# =========================================================================
|
|
|
|
async def _processTranscript(self, *args, **kwargs):
|
|
from .serviceConversation import _processTranscript
|
|
await _processTranscript(self, *args, **kwargs)
|
|
|
|
async def _analyzeAndRespond(self, *args, **kwargs):
|
|
from .serviceConversation import _analyzeAndRespond
|
|
await _analyzeAndRespond(self, *args, **kwargs)
|
|
|
|
async def _summarizeForVoice(self, sessionId: str, rawAnswer: str) -> str:
|
|
from .serviceConversation import _summarizeForVoice
|
|
return await _summarizeForVoice(self, sessionId, rawAnswer)
|
|
|
|
async def _pickQuickAckText(self) -> Optional[str]:
|
|
from .serviceConversation import _pickQuickAckText
|
|
return await _pickQuickAckText(self)
|
|
|
|
async def _pickEphemeralPhrase(self, kind: str, substitutions=None) -> Optional[str]:
|
|
from .serviceConversation import _pickEphemeralPhrase
|
|
return await _pickEphemeralPhrase(self, kind, substitutions)
|
|
|
|
async def _getEphemeralPhrases(self, kind: str) -> List[str]:
|
|
from .serviceConversation import _getEphemeralPhrases
|
|
return await _getEphemeralPhrases(self, kind)
|
|
|
|
async def _generateEphemeralPhrases(self, kind: str, count: int) -> List[str]:
|
|
from .serviceConversation import _generateEphemeralPhrases
|
|
return await _generateEphemeralPhrases(self, kind, count)
|
|
|
|
async def _runQuickAck(self, sessionId: str) -> None:
|
|
from .serviceConversation import _runQuickAck
|
|
await _runQuickAck(self, sessionId)
|
|
|
|
async def _checkPendingNameTrigger(self, delaySec: float = 3.0):
|
|
from .serviceConversation import _checkPendingNameTrigger
|
|
await _checkPendingNameTrigger(self, delaySec)
|
|
|
|
async def _warmEphemeralPhrasePool(self, sessionId: str) -> None:
|
|
from .serviceConversation import _warmEphemeralPhrasePool
|
|
await _warmEphemeralPhrasePool(self, sessionId)
|
|
|
|
async def _runEscalationAndRelease(self, *args, **kwargs) -> None:
|
|
from .serviceConversation import _runEscalationAndRelease
|
|
await _runEscalationAndRelease(self, *args, **kwargs)
|
|
|
|
# =========================================================================
|
|
# Commands — delegates to serviceCommands module
|
|
# =========================================================================
|
|
|
|
async def _executeCommands(self, sessionId, commands, voiceInterface, websocket):
|
|
from .serviceCommands import _executeCommands
|
|
await _executeCommands(self, sessionId, commands, voiceInterface, websocket)
|
|
|
|
# =========================================================================
|
|
# WebSocket helpers (kept on class — used by serviceWebSocket)
|
|
# =========================================================================
|
|
|
|
async def _handleBotStatus(self, sessionId, status, errorMessage, interface):
|
|
from .serviceWebSocket import _handleBotStatus
|
|
await _handleBotStatus(self, sessionId, status, errorMessage, interface)
|
|
|
|
async def _processAudioChunk(self, *args, **kwargs):
|
|
from .serviceWebSocket import _processAudioChunk
|
|
await _processAudioChunk(self, *args, **kwargs)
|
|
|
|
async def _cancelInFlightSpeech(self, sessionId, websocket, reason):
|
|
from .serviceWebSocket import _cancelInFlightSpeech
|
|
await _cancelInFlightSpeech(self, sessionId, websocket, reason)
|
|
|
|
# =========================================================================
|
|
# Speaker & Trigger detection (kept on class — small, stateful)
|
|
# =========================================================================
|
|
|
|
def _registerSpeakerHint(self, speaker: str, text: str, sessionId: str = ""):
|
|
"""Track current speaker from captions for STT attribution."""
|
|
if not speaker:
|
|
return
|
|
normalizedSpeaker = speaker.strip()
|
|
if not normalizedSpeaker or self._isBotSpeaker(normalizedSpeaker):
|
|
return
|
|
|
|
self._lastCaptionSpeaker = normalizedSpeaker
|
|
self._knownSpeakers.add(normalizedSpeaker)
|
|
|
|
if self._unattributedTranscriptIds:
|
|
from . import interfaceFeatureTeamsbot as interfaceDb
|
|
interface = interfaceDb.getInterface(self.currentUser, self.mandateId, self.instanceId)
|
|
for tid in self._unattributedTranscriptIds:
|
|
interface.updateTranscript(tid, {"speaker": normalizedSpeaker})
|
|
for seg in self._contextBuffer:
|
|
if seg.get("speaker") == "Unknown" and seg.get("source") == "audioCapture":
|
|
seg["speaker"] = normalizedSpeaker
|
|
if self._lastTranscriptSpeaker == "Unknown":
|
|
self._lastTranscriptSpeaker = normalizedSpeaker
|
|
logger.info(
|
|
f"Session {sessionId}: Retroactive speaker attribution: "
|
|
f"{len(self._unattributedTranscriptIds)} segments -> {normalizedSpeaker}"
|
|
)
|
|
self._unattributedTranscriptIds.clear()
|
|
|
|
if self._pendingNameTrigger:
|
|
self._pendingNameTrigger["lastActivity"] = time.time()
|
|
|
|
def _resolveSpeakerForAudioCapture(self) -> Dict[str, Any]:
|
|
"""Speaker name for audio chunks — uses the last caption speaker."""
|
|
if self._lastCaptionSpeaker:
|
|
return {"speaker": self._lastCaptionSpeaker, "speakerResolvedFromHint": True}
|
|
return {"speaker": "Unknown", "speakerResolvedFromHint": False}
|
|
|
|
def _isBotSpeaker(self, speaker: str) -> bool:
|
|
"""Check if a transcript speaker is the bot itself."""
|
|
if not speaker:
|
|
return False
|
|
speakerLower = speaker.lower().strip()
|
|
botName = self.config.botName.lower().strip()
|
|
if botName and botName in speakerLower:
|
|
return True
|
|
botAccountEmail = getattr(self, '_botAccountEmail', None) or getattr(self.config, 'botAccountEmail', None)
|
|
if botAccountEmail:
|
|
emailPrefix = botAccountEmail.split("@")[0].lower().replace(".", " ")
|
|
if emailPrefix in speakerLower:
|
|
return True
|
|
return False
|
|
|
|
def _shouldTriggerAnalysis(self, transcriptText: str, allowPeriodic: bool = True) -> bool:
|
|
"""Decide whether to trigger AI analysis based on the latest transcript."""
|
|
now = time.time()
|
|
timeSinceLastCall = now - self._lastAiCallTime
|
|
if timeSinceLastCall < self.config.triggerCooldownSeconds:
|
|
return False
|
|
if allowPeriodic and timeSinceLastCall >= self.config.triggerIntervalSeconds:
|
|
logger.info(f"Trigger: Periodic interval ({self.config.triggerIntervalSeconds}s) elapsed ({timeSinceLastCall:.1f}s)")
|
|
return True
|
|
return False
|
|
|
|
def _isStopPhrase(self, text: str) -> bool:
|
|
"""Check if text is an immediate-cancel command from the meeting."""
|
|
if not text or len(text.strip()) < 2:
|
|
return False
|
|
t = text.strip().lower()
|
|
words = [w.strip(".,!?:;\"'()[]") for w in t.split() if w.strip()]
|
|
wordSet = set(words)
|
|
stopWords = {
|
|
"stop", "stopp", "halt", "ruhe", "stille", "schweig",
|
|
"arrete", "quiet", "shut", "silence",
|
|
"warte", "wait", "moment", "pause",
|
|
}
|
|
if wordSet & stopWords:
|
|
return True
|
|
if (
|
|
"sei still" in t
|
|
or "be quiet" in t
|
|
or "shut up" in t
|
|
or "hold on" in t
|
|
or "aufhoeren" in t
|
|
or "aufhören" in t
|
|
):
|
|
return True
|
|
return False
|
|
|
|
def _makeAnswerCancelHook(self) -> Callable[[], bool]:
|
|
"""Capture the current generation counter and return a cancel predicate."""
|
|
snapshot = self._answerGenerationCounter
|
|
return lambda: self._answerGenerationCounter != snapshot
|
|
|
|
def _detectBotName(self, text: str) -> bool:
|
|
"""Check if text contains the bot's name (exact or phonetically similar)."""
|
|
botNameLower = self.config.botName.lower()
|
|
textLower = text.lower()
|
|
if botNameLower in textLower:
|
|
return True
|
|
botFirstName = botNameLower.split()[0] if " " in botNameLower else botNameLower
|
|
if len(botFirstName) >= 3:
|
|
for word in textLower.split():
|
|
cleanWord = word.strip(".,!?:;\"'()[]")
|
|
if not cleanWord or len(cleanWord) < 3:
|
|
continue
|
|
if cleanWord == botFirstName:
|
|
return True
|
|
if cleanWord[0] == botFirstName[0] and abs(len(cleanWord) - len(botFirstName)) <= 2:
|
|
common = sum(1 for c in set(botFirstName) if c in cleanWord)
|
|
similarity = common / max(len(set(botFirstName)), len(set(cleanWord)))
|
|
if similarity >= 0.6:
|
|
return True
|
|
return False
|
|
|
|
def _setPendingNameTrigger(self, sessionId, interface, voiceInterface, websocket, triggerTranscript) -> bool:
|
|
"""Set or update a debounced name trigger. Returns True if newly set."""
|
|
if self._pendingNameTrigger:
|
|
self._pendingNameTrigger["lastActivity"] = time.time()
|
|
return False
|
|
self._pendingNameTrigger = {
|
|
"sessionId": sessionId,
|
|
"interface": interface,
|
|
"voiceInterface": voiceInterface,
|
|
"websocket": websocket,
|
|
"triggerTranscript": triggerTranscript,
|
|
"detectedAt": time.time(),
|
|
"lastActivity": time.time(),
|
|
}
|
|
return True
|
|
|
|
def _shouldFireQuickAck(self) -> bool:
|
|
"""Centralized gate for quick-ack firing."""
|
|
now = time.time()
|
|
if (now - self._lastQuickAckTs) < _QUICK_ACK_MIN_INTERVAL_SEC:
|
|
return False
|
|
if self._aiAnalysisInProgress or self._agentEscalationInFlight:
|
|
return False
|
|
channelRaw = self.config.responseChannel
|
|
channelStr = (
|
|
channelRaw.value if hasattr(channelRaw, "value") else str(channelRaw)
|
|
).lower().strip()
|
|
if channelStr not in ("voice", "both"):
|
|
return False
|
|
if self.config.responseMode in (
|
|
TeamsbotResponseMode.MANUAL,
|
|
TeamsbotResponseMode.TRANSCRIBE_ONLY,
|
|
):
|
|
return False
|
|
return True
|
|
|
|
# =========================================================================
|
|
# Voice helpers (kept on class)
|
|
# =========================================================================
|
|
|
|
_VOICE_DIRECT_MAX_CHARS = 600
|
|
_VOICE_SUMMARY_MAX_CHARS = 350
|
|
|
|
@staticmethod
|
|
def _looksLikeStructuredText(raw: str) -> bool:
|
|
"""Heuristic: does the original answer have markdown structure?"""
|
|
if not raw:
|
|
return False
|
|
if raw.count("|") >= 4:
|
|
return True
|
|
if raw.count("\n#") >= 1:
|
|
return True
|
|
if raw.count("\n- ") + raw.count("\n* ") + raw.count("\n• ") >= 3:
|
|
return True
|
|
if re.search(r"\n\d+[\.\)]\s", raw):
|
|
count = len(re.findall(r"(?m)^\s*\d+[\.\)]\s", raw))
|
|
if count >= 3:
|
|
return True
|
|
return False
|
|
|
|
# =========================================================================
|
|
# Director Prompts (private operator instructions during a live meeting)
|
|
# =========================================================================
|
|
|
|
def _collectActiveDirectorBriefings(self) -> List[Dict[str, Any]]:
|
|
"""Return the deduplicated list of director-prompt briefings."""
|
|
seen: Dict[str, Dict[str, Any]] = {}
|
|
for p in self._activePersistentPrompts:
|
|
pid = p.get("id") or ""
|
|
seen[pid] = {
|
|
"promptId": pid,
|
|
"mode": p.get("mode") or "persistent",
|
|
"text": (p.get("text") or "").strip(),
|
|
"fileIds": list(p.get("fileIds") or []),
|
|
"note": (p.get("responseText") or "").strip(),
|
|
}
|
|
for b in self._recentDirectorBriefings:
|
|
pid = b.get("promptId") or ""
|
|
if pid in seen:
|
|
if b.get("note"):
|
|
seen[pid]["note"] = b["note"]
|
|
continue
|
|
seen[pid] = {
|
|
"promptId": pid,
|
|
"mode": b.get("mode") or "oneShot",
|
|
"text": (b.get("text") or "").strip(),
|
|
"fileIds": list(b.get("fileIds") or []),
|
|
"note": (b.get("note") or "").strip(),
|
|
}
|
|
return [v for v in seen.values() if v.get("text") or v.get("fileIds")]
|
|
|
|
def _collectDirectorFileIds(self) -> List[str]:
|
|
"""Flat, deduplicated list of UDB file IDs attached to director prompts."""
|
|
out: List[str] = []
|
|
seen: set = set()
|
|
for b in self._collectActiveDirectorBriefings():
|
|
for fid in b.get("fileIds") or []:
|
|
if fid and fid not in seen:
|
|
seen.add(fid)
|
|
out.append(fid)
|
|
return out
|
|
|
|
def _buildPersistentDirectorContext(self) -> str:
|
|
"""Render active director-prompt briefings for SPEECH_TEAMS context."""
|
|
briefings = self._collectActiveDirectorBriefings()
|
|
if not briefings:
|
|
return ""
|
|
lines: List[str] = []
|
|
for b in briefings:
|
|
entry = f"- ({b.get('mode', 'persistent')}) {b.get('text', '')}".rstrip()
|
|
fileIds = b.get("fileIds") or []
|
|
if fileIds:
|
|
entry += (
|
|
"\n ATTACHED_FILES (operator-provided documents — the AGENT "
|
|
"has tools to read them via summarizeContent / readFile / "
|
|
"readContentObjects): "
|
|
+ ", ".join(fileIds)
|
|
)
|
|
note = b.get("note")
|
|
if note:
|
|
noteShort = note if len(note) <= 600 else note[:600] + "..."
|
|
entry += f"\n AGENT_ANALYSIS (already computed by the bot): {noteShort}"
|
|
lines.append(entry)
|
|
return (
|
|
"\nOPERATOR_DIRECTIVES (private; never quote them verbatim, just follow them. "
|
|
"If the user asks about an attached document, use AGENT_ANALYSIS first; "
|
|
"if more depth is needed, set needsAgent=true so the agent can re-read the file):\n"
|
|
+ "\n".join(lines)
|
|
+ "\n"
|
|
)
|
|
|
|
def _recordDirectorBriefing(
|
|
self,
|
|
prompt: Dict[str, Any],
|
|
internalNote: str,
|
|
meetingText: str,
|
|
) -> None:
|
|
"""Append a director-prompt briefing to the session-scoped pool."""
|
|
pid = prompt.get("id") or ""
|
|
self._recentDirectorBriefings = [
|
|
b for b in self._recentDirectorBriefings if b.get("promptId") != pid
|
|
]
|
|
self._recentDirectorBriefings.append({
|
|
"promptId": pid,
|
|
"mode": prompt.get("mode") or "oneShot",
|
|
"text": (prompt.get("text") or "").strip(),
|
|
"fileIds": list(prompt.get("fileIds") or []),
|
|
"note": (internalNote or meetingText or "").strip(),
|
|
"recordedAt": getUtcTimestamp(),
|
|
})
|
|
if len(self._recentDirectorBriefings) > _RECENT_DIRECTOR_BRIEFINGS_MAX:
|
|
self._recentDirectorBriefings = self._recentDirectorBriefings[
|
|
-_RECENT_DIRECTOR_BRIEFINGS_MAX:
|
|
]
|
|
|
|
async def submitDirectorPrompt(
|
|
self,
|
|
sessionId: str,
|
|
operatorUserId: str,
|
|
text: str,
|
|
mode: TeamsbotDirectorPromptMode,
|
|
fileIds: List[str],
|
|
) -> Dict[str, Any]:
|
|
"""Persist a new director prompt and trigger immediate agent processing."""
|
|
from . import interfaceFeatureTeamsbot as interfaceDb
|
|
|
|
interface = interfaceDb.getInterface(self.currentUser, self.mandateId, self.instanceId)
|
|
|
|
promptData = TeamsbotDirectorPrompt(
|
|
sessionId=sessionId,
|
|
instanceId=self.instanceId,
|
|
operatorUserId=operatorUserId,
|
|
text=text,
|
|
mode=mode,
|
|
fileIds=fileIds or [],
|
|
status=TeamsbotDirectorPromptStatus.QUEUED,
|
|
).model_dump()
|
|
created = interface.createDirectorPrompt(promptData)
|
|
|
|
if mode == TeamsbotDirectorPromptMode.PERSISTENT:
|
|
self._activePersistentPrompts.append(created)
|
|
|
|
await _emitSessionEvent(sessionId, "directorPrompt", {
|
|
"id": created.get("id"),
|
|
"status": created.get("status"),
|
|
"mode": created.get("mode"),
|
|
"text": created.get("text"),
|
|
"fileIds": created.get("fileIds", []),
|
|
"createdAt": created.get("createdAt"),
|
|
})
|
|
|
|
asyncio.create_task(self._processDirectorPrompt(created))
|
|
return created
|
|
|
|
async def removePersistentPrompt(self, promptId: str) -> bool:
|
|
"""Remove a persistent director prompt."""
|
|
from . import interfaceFeatureTeamsbot as interfaceDb
|
|
|
|
interface = interfaceDb.getInterface(self.currentUser, self.mandateId, self.instanceId)
|
|
sessionId = self._activeSessionId
|
|
prompt = interface.getDirectorPrompt(promptId)
|
|
if not prompt:
|
|
return False
|
|
interface.updateDirectorPrompt(promptId, {
|
|
"status": TeamsbotDirectorPromptStatus.CONSUMED.value,
|
|
"consumedAt": getUtcTimestamp(),
|
|
"statusMessage": "Removed by operator",
|
|
})
|
|
self._activePersistentPrompts = [
|
|
p for p in self._activePersistentPrompts if p.get("id") != promptId
|
|
]
|
|
self._recentDirectorBriefings = [
|
|
b for b in self._recentDirectorBriefings if b.get("promptId") != promptId
|
|
]
|
|
if sessionId:
|
|
await _emitSessionEvent(sessionId, "directorPrompt", {
|
|
"id": promptId,
|
|
"status": TeamsbotDirectorPromptStatus.CONSUMED.value,
|
|
"mode": prompt.get("mode"),
|
|
"text": prompt.get("text"),
|
|
"removed": True,
|
|
})
|
|
return True
|
|
|
|
async def _processDirectorPrompt(self, prompt: Dict[str, Any]) -> None:
|
|
"""Run the agent for a director prompt and deliver the result."""
|
|
from . import interfaceFeatureTeamsbot as interfaceDb
|
|
|
|
sessionId = prompt.get("sessionId")
|
|
promptId = prompt.get("id")
|
|
interface = interfaceDb.getInterface(self.currentUser, self.mandateId, self.instanceId)
|
|
|
|
interface.updateDirectorPrompt(promptId, {
|
|
"status": TeamsbotDirectorPromptStatus.RUNNING.value,
|
|
})
|
|
await _emitSessionEvent(sessionId, "directorPrompt", {
|
|
"id": promptId,
|
|
"status": TeamsbotDirectorPromptStatus.RUNNING.value,
|
|
})
|
|
|
|
recentTranscript = self._renderRecentTranscriptForAgent(maxLines=20)
|
|
directorText = (prompt.get("text") or "").strip()
|
|
attachedFileIds = list(prompt.get("fileIds") or [])
|
|
promptMode = (prompt.get("mode") or "").lower()
|
|
isPersistentPrompt = promptMode == TeamsbotDirectorPromptMode.PERSISTENT.value.lower()
|
|
|
|
filesBlock = ""
|
|
if attachedFileIds:
|
|
filesBlock = (
|
|
"\nANGEHAENGTE DOKUMENTE (UDB-File-IDs): "
|
|
+ ", ".join(attachedFileIds)
|
|
+ "\nDu MUSST diese Dokumente VOR der finalen Antwort lesen / zusammenfassen "
|
|
"(z.B. summarizeContent, readFile, readContentObjects, describeImage). "
|
|
"Beziehe Fakten und Zitate aus den Dokumenten in deine Notiz / dein "
|
|
"Meeting-Reply ein, statt allgemein zu antworten.\n"
|
|
)
|
|
|
|
persistentNoteHint = ""
|
|
if isPersistentPrompt and attachedFileIds:
|
|
persistentNoteHint = (
|
|
"\nSPEZIAL fuer PERSISTENT + Dokumente: Wenn die Anweisung KEIN explizites "
|
|
"Meeting-Statement verlangt, antworte mit 'SILENT:' und liefere als interne "
|
|
"Notiz eine STRUKTURIERTE, faktendichte Briefing-Zusammenfassung der Dokumente "
|
|
"(Stichpunkte, Kennzahlen, Aussagen, die fuer Folgefragen im Meeting relevant "
|
|
"sein koennen). Diese Notiz wird spaeteren Meeting-Antworten als Wissensbasis "
|
|
"vorgelegt — schreibe sie also so, dass der Bot daraus zitieren kann.\n"
|
|
)
|
|
|
|
taskText = (
|
|
f"Du bist der KI-Assistent in einem laufenden Teams-Meeting (Bot-Name: {self.config.botName}).\n"
|
|
f"Der Operator hat dir folgende PRIVATE Regieanweisung gegeben (die anderen Teilnehmer im "
|
|
f"Meeting sehen sie NICHT). Sie ist KEINE Frage an das Meeting, sondern eine interne "
|
|
f"Anweisung an dich:\n\n"
|
|
f"{directorText}\n"
|
|
f"{filesBlock}"
|
|
f"{persistentNoteHint}\n"
|
|
f"AKTUELLER MEETING-KONTEXT (juengste Aussagen):\n{recentTranscript}\n\n"
|
|
"ANTWORT-PROTOKOLL — Beginne deine FINALE Antwort mit GENAU EINEM dieser Marker:\n"
|
|
" • 'MEETING_REPLY:' gefolgt vom Text, der im Meeting gesprochen / in den Meeting-Chat "
|
|
"gepostet werden soll. Verwende diesen Marker NUR, wenn die Regieanweisung dich explizit "
|
|
"auffordert, jetzt etwas im Meeting zu sagen oder zu schreiben (Beispiele: 'stell dich vor', "
|
|
"'fasse zusammen', 'stelle Person X eine Frage', 'beantworte die letzte Frage'). Halte den "
|
|
"Text kurz, sprachlich passend zur Stimme und ohne Marker oder Meta-Kommentare.\n"
|
|
" • 'SILENT:' gefolgt von einer internen Notiz fuer das Operator-UI. "
|
|
"Verwende diesen Marker fuer interne Direktiven und Wissens-Briefings (Beispiele: "
|
|
"'achte ab jetzt auf X', 'merke dir Y', 'studiere Dokument Z'). "
|
|
"Dieser Text wird NICHT ins Meeting gegeben, dient aber spaeteren Meeting-Antworten "
|
|
"als Wissensbasis. Wenn Dokumente angehaengt sind, MUSS die Notiz konkrete, "
|
|
"zitierfaehige Fakten aus den Dokumenten enthalten.\n\n"
|
|
"Standard ist SILENT, wenn nicht eindeutig zur Meeting-Interaktion aufgefordert wurde. "
|
|
"Wiederhole NIEMALS die Regieanweisung selbst im MEETING_REPLY-Text."
|
|
)
|
|
|
|
try:
|
|
finalText = await self._runAgentForMeeting(
|
|
sessionId=sessionId,
|
|
taskText=taskText,
|
|
fileIds=attachedFileIds,
|
|
sourceLabel="directorPrompt",
|
|
triggerTranscriptId=None,
|
|
promptId=promptId,
|
|
directorPromptMode=True,
|
|
)
|
|
|
|
isPersistent = prompt.get("mode") == TeamsbotDirectorPromptMode.PERSISTENT.value
|
|
updates: Dict[str, Any] = {
|
|
"status": TeamsbotDirectorPromptStatus.SUCCEEDED.value,
|
|
"responseText": finalText or "",
|
|
}
|
|
if not isPersistent:
|
|
updates["status"] = TeamsbotDirectorPromptStatus.CONSUMED.value
|
|
updates["consumedAt"] = getUtcTimestamp()
|
|
interface.updateDirectorPrompt(promptId, updates)
|
|
await _emitSessionEvent(sessionId, "directorPrompt", {
|
|
"id": promptId,
|
|
"status": updates["status"],
|
|
"responseText": finalText,
|
|
})
|
|
|
|
except Exception as e:
|
|
logger.error(
|
|
f"Session {sessionId}: Director prompt {promptId} failed: {type(e).__name__}: {e}",
|
|
exc_info=True,
|
|
)
|
|
interface.updateDirectorPrompt(promptId, {
|
|
"status": TeamsbotDirectorPromptStatus.FAILED.value,
|
|
"statusMessage": f"{type(e).__name__}: {str(e)[:300]}",
|
|
})
|
|
await _emitSessionEvent(sessionId, "directorPrompt", {
|
|
"id": promptId,
|
|
"status": TeamsbotDirectorPromptStatus.FAILED.value,
|
|
"error": f"{type(e).__name__}: {str(e)[:300]}",
|
|
})
|
|
self._activePersistentPrompts = [
|
|
p for p in self._activePersistentPrompts if p.get("id") != promptId
|
|
]
|
|
|
|
def _renderRecentTranscriptForAgent(self, maxLines: int = 20) -> str:
|
|
"""Render the most recent context buffer entries for agent task brief."""
|
|
if not self._contextBuffer:
|
|
return "(noch keine Aussagen erfasst)"
|
|
recent = self._contextBuffer[-maxLines:]
|
|
lines = []
|
|
for seg in recent:
|
|
speaker = seg.get("speaker", "Unknown")
|
|
text = seg.get("text", "")
|
|
segSource = seg.get("source", "caption")
|
|
prefix = "Chat: " if segSource == "chat" else ""
|
|
if self._isBotSpeaker(speaker):
|
|
lines.append(f"[YOU ({self.config.botName})]: {text}")
|
|
else:
|
|
lines.append(f"[{prefix}{speaker}]: {text}")
|
|
return "\n".join(lines)
|
|
|
|
async def _interimAgentBusyMessage(self) -> Optional[str]:
|
|
"""Short spoken/chat line before a potentially long agent run."""
|
|
return await self._pickEphemeralPhrase("agentBusy")
|
|
|
|
async def _interimAgentRoundMessage(
|
|
self, lastToolLabel: Optional[str] = None
|
|
) -> Optional[str]:
|
|
"""Per-round progress notice for long agent runs."""
|
|
targetLang = (self.config.language or "").strip() or "en-US"
|
|
botName = (self.config.botName or "the assistant").strip()
|
|
activityHint = lastToolLabel or "working on the task"
|
|
|
|
prompt = (
|
|
f"You are a meeting assistant named '{botName}'.\n"
|
|
f"Target spoken language (BCP-47): {targetLang}\n\n"
|
|
f"The assistant is currently busy with: {activityHint}\n\n"
|
|
f"Generate ONE short sentence (max 12 words) in {targetLang} "
|
|
f"that tells the audience what the assistant is doing right now. "
|
|
f"Natural, spoken style. No step numbers. No quotes around the output.\n"
|
|
f"Output ONLY the sentence, nothing else."
|
|
)
|
|
|
|
try:
|
|
aiService = createAiService(
|
|
self.currentUser, self.mandateId, self.instanceId
|
|
)
|
|
await aiService.ensureAiObjectsInitialized()
|
|
request = AiCallRequest(
|
|
prompt=prompt,
|
|
context="",
|
|
options=AiCallOptions(
|
|
operationType=OperationTypeEnum.DATA_ANALYSE,
|
|
priority=PriorityEnum.SPEED,
|
|
),
|
|
)
|
|
response = await aiService.callAi(request)
|
|
except Exception as aiErr:
|
|
logger.debug(f"Agent round phrase generation failed: {aiErr}")
|
|
return None
|
|
|
|
if not response or response.errorCount != 0 or not response.content:
|
|
return None
|
|
|
|
result = response.content.strip().strip('"').strip("'")
|
|
if len(result) > 200:
|
|
result = result[:200]
|
|
return result
|
|
|
|
async def _notifyMeetingEphemeral(self, sessionId: str, text: str) -> None:
|
|
"""Deliver a short line to the meeting without persisting botResponses."""
|
|
websocket = self._websocket
|
|
voiceInterface = self._voiceInterface
|
|
if not websocket:
|
|
logger.warning(f"Session {sessionId}: Interim notice skipped — no WebSocket")
|
|
return
|
|
|
|
channelRaw = self.config.responseChannel
|
|
channelStr = (
|
|
channelRaw.value if hasattr(channelRaw, "value") else str(channelRaw)
|
|
).lower().strip()
|
|
sendVoice = channelStr in ("voice", "both")
|
|
sendChat = channelStr in ("chat", "both")
|
|
|
|
if sendVoice and voiceInterface:
|
|
cancelHook = self._makeAnswerCancelHook()
|
|
async with self._meetingTtsLock:
|
|
outcome = await _speakTextChunked(
|
|
websocket=websocket,
|
|
voiceInterface=voiceInterface,
|
|
sessionId=sessionId,
|
|
voiceText=_voiceFriendlyMeetingText(text),
|
|
languageCode=self.config.language,
|
|
voiceName=self.config.voiceId,
|
|
isCancelled=cancelHook,
|
|
)
|
|
if not outcome.get("success"):
|
|
logger.warning(
|
|
f"Session {sessionId}: Interim TTS failed ({outcome.get('error')}) — falling back to chat"
|
|
)
|
|
if not sendChat:
|
|
sendChat = True
|
|
|
|
if sendChat:
|
|
try:
|
|
await websocket.send_text(json.dumps({
|
|
"type": "sendChatMessage",
|
|
"sessionId": sessionId,
|
|
"text": text,
|
|
}))
|
|
except Exception as chatErr:
|
|
logger.warning(f"Session {sessionId}: Interim chat failed: {chatErr}")
|
|
|
|
await _emitSessionEvent(sessionId, "agentRun", {
|
|
"status": "interimNotice",
|
|
"message": text,
|
|
"timestamp": getUtcTimestamp(),
|
|
})
|
|
|
|
async def _runAgentForMeeting(
|
|
self,
|
|
sessionId: str,
|
|
taskText: str,
|
|
fileIds: List[str],
|
|
sourceLabel: str,
|
|
triggerTranscriptId: Optional[str] = None,
|
|
promptId: Optional[str] = None,
|
|
directorPromptMode: bool = False,
|
|
) -> str:
|
|
"""Run agentService.runAgent for a meeting context."""
|
|
from modules.serviceCenter.services.serviceAgent.datamodelAgent import (
|
|
AgentConfig, AgentEventTypeEnum
|
|
)
|
|
|
|
ctx = ServiceCenterContext(
|
|
user=self.currentUser,
|
|
mandateId=self.mandateId,
|
|
featureInstanceId=self.instanceId,
|
|
featureCode="teamsbot",
|
|
)
|
|
agentService = _getServiceCenterService("agent", ctx)
|
|
|
|
workflowId = f"teamsbot:{sessionId}"
|
|
|
|
agentConfig = AgentConfig(
|
|
maxRounds=TEAMSBOT_AGENT_MAX_ROUNDS,
|
|
maxCostCHF=TEAMSBOT_AGENT_MAX_COST_CHF,
|
|
toolSet="core",
|
|
initialToolboxes=["core", "web"],
|
|
excludeActionTools=True,
|
|
)
|
|
|
|
await _emitSessionEvent(sessionId, "agentRun", {
|
|
"source": sourceLabel,
|
|
"promptId": promptId,
|
|
"status": "started",
|
|
"timestamp": getUtcTimestamp(),
|
|
})
|
|
|
|
if not directorPromptMode:
|
|
try:
|
|
interimText = await self._interimAgentBusyMessage()
|
|
if interimText:
|
|
await self._notifyMeetingEphemeral(sessionId, interimText)
|
|
except Exception as interimErr:
|
|
logger.warning(f"Session {sessionId}: Interim agent notice failed: {interimErr}")
|
|
|
|
finalText: str = ""
|
|
rounds = 0
|
|
lastToolLabel: Optional[str] = None
|
|
try:
|
|
async for event in agentService.runAgent(
|
|
prompt=taskText,
|
|
fileIds=fileIds or None,
|
|
config=agentConfig,
|
|
toolSet="core",
|
|
workflowId=workflowId,
|
|
):
|
|
if event.type == AgentEventTypeEnum.AGENT_PROGRESS:
|
|
rounds += 1
|
|
pdata = event.data or {}
|
|
roundNum = int(pdata.get("round", rounds))
|
|
maxR = int(pdata.get("maxRounds", TEAMSBOT_AGENT_MAX_ROUNDS))
|
|
await _emitSessionEvent(sessionId, "agentRun", {
|
|
"source": sourceLabel,
|
|
"promptId": promptId,
|
|
"status": "progress",
|
|
"round": roundNum,
|
|
"maxRounds": maxR,
|
|
})
|
|
if roundNum >= 2 and not directorPromptMode:
|
|
try:
|
|
roundText = await self._interimAgentRoundMessage(lastToolLabel)
|
|
if roundText:
|
|
await self._notifyMeetingEphemeral(sessionId, roundText)
|
|
except Exception as roundNoticeErr:
|
|
logger.warning(
|
|
f"Session {sessionId}: Per-round agent notice failed: {roundNoticeErr}"
|
|
)
|
|
elif event.type == AgentEventTypeEnum.TOOL_CALL:
|
|
evtData = event.data or {}
|
|
toolName = evtData.get("toolName")
|
|
lastToolLabel = evtData.get("displayLabel")
|
|
await _emitSessionEvent(sessionId, "agentRun", {
|
|
"source": sourceLabel,
|
|
"promptId": promptId,
|
|
"status": "toolCall",
|
|
"toolName": toolName,
|
|
"displayLabel": lastToolLabel,
|
|
})
|
|
elif event.type == AgentEventTypeEnum.TOOL_RESULT:
|
|
evtData = event.data or {}
|
|
resultSnippet = (evtData.get("data") or "")[:200]
|
|
await _emitSessionEvent(sessionId, "agentRun", {
|
|
"source": sourceLabel,
|
|
"promptId": promptId,
|
|
"status": "toolResult",
|
|
"toolName": evtData.get("toolName", ""),
|
|
"success": evtData.get("success", True),
|
|
"summary": resultSnippet,
|
|
})
|
|
elif event.type == AgentEventTypeEnum.FILE_CREATED:
|
|
await _emitSessionEvent(sessionId, "documentCreated", event.data or {})
|
|
elif event.type == AgentEventTypeEnum.FINAL:
|
|
finalText = (event.content or "").strip()
|
|
elif event.type == AgentEventTypeEnum.ERROR:
|
|
raise RuntimeError(event.content or "Agent error")
|
|
except Exception as runErr:
|
|
await _emitSessionEvent(sessionId, "agentRun", {
|
|
"source": sourceLabel,
|
|
"promptId": promptId,
|
|
"status": "error",
|
|
"error": str(runErr)[:500],
|
|
})
|
|
raise
|
|
|
|
await _emitSessionEvent(sessionId, "agentRun", {
|
|
"source": sourceLabel,
|
|
"promptId": promptId,
|
|
"status": "completed",
|
|
"rounds": rounds,
|
|
"hasText": bool(finalText),
|
|
})
|
|
|
|
if finalText:
|
|
if directorPromptMode:
|
|
decision = _parseDirectorPromptFinal(finalText)
|
|
kind = decision.get("kind", "silent")
|
|
meetingText = (decision.get("meetingText") or "").strip()
|
|
internalNote = (decision.get("internalNote") or "").strip()
|
|
|
|
logger.info(
|
|
f"Session {sessionId}: Director prompt {promptId} -> kind={kind}, "
|
|
f"meetingChars={len(meetingText)}, noteChars={len(internalNote)}"
|
|
)
|
|
|
|
await _emitSessionEvent(sessionId, "directorPrompt", {
|
|
"id": promptId,
|
|
"status": "decision",
|
|
"decision": kind,
|
|
"meetingText": meetingText,
|
|
"internalNote": internalNote,
|
|
})
|
|
|
|
try:
|
|
promptRecord: Dict[str, Any] = {}
|
|
if promptId:
|
|
try:
|
|
from . import interfaceFeatureTeamsbot as _ifaceDb
|
|
_iface = _ifaceDb.getInterface(
|
|
self.currentUser, self.mandateId, self.instanceId
|
|
)
|
|
promptRecord = _iface.getDirectorPrompt(promptId) or {}
|
|
except Exception as _lookupErr:
|
|
logger.debug(
|
|
f"Briefing pool: could not look up prompt {promptId}: {_lookupErr}"
|
|
)
|
|
if promptRecord or promptId:
|
|
self._recordDirectorBriefing(
|
|
prompt=promptRecord or {"id": promptId},
|
|
internalNote=internalNote,
|
|
meetingText=meetingText,
|
|
)
|
|
except Exception as briefErr:
|
|
logger.warning(
|
|
f"Session {sessionId}: Director briefing pool update failed: {briefErr}"
|
|
)
|
|
|
|
if promptId:
|
|
for p in self._activePersistentPrompts:
|
|
if p.get("id") == promptId:
|
|
p["responseText"] = internalNote or meetingText or finalText
|
|
break
|
|
|
|
if kind == "meeting" and meetingText:
|
|
await self._deliverTextToMeeting(
|
|
sessionId=sessionId,
|
|
text=meetingText,
|
|
detectedIntent=f"agent:{sourceLabel}",
|
|
reasoning=f"Agent run ({sourceLabel})",
|
|
triggerTranscriptId=triggerTranscriptId,
|
|
)
|
|
else:
|
|
await self._persistInternalDirectorReply(
|
|
sessionId=sessionId,
|
|
internalNote=internalNote or finalText,
|
|
promptId=promptId,
|
|
triggerTranscriptId=triggerTranscriptId,
|
|
)
|
|
return meetingText if kind == "meeting" else ""
|
|
|
|
await self._deliverTextToMeeting(
|
|
sessionId=sessionId,
|
|
text=finalText,
|
|
detectedIntent=f"agent:{sourceLabel}",
|
|
reasoning=f"Agent run ({sourceLabel})",
|
|
triggerTranscriptId=triggerTranscriptId,
|
|
)
|
|
|
|
return finalText
|
|
|
|
async def _deliverTextToMeeting(
|
|
self,
|
|
sessionId: str,
|
|
text: str,
|
|
detectedIntent: str,
|
|
reasoning: str,
|
|
triggerTranscriptId: Optional[str] = None,
|
|
) -> None:
|
|
"""Send agent text into the meeting via TTS + chat per config."""
|
|
from . import interfaceFeatureTeamsbot as interfaceDb
|
|
interface = interfaceDb.getInterface(self.currentUser, self.mandateId, self.instanceId)
|
|
|
|
websocket = self._websocket
|
|
voiceInterface = self._voiceInterface
|
|
|
|
channelRaw = self.config.responseChannel
|
|
channelStr = (
|
|
channelRaw.value if hasattr(channelRaw, "value") else str(channelRaw)
|
|
).lower().strip()
|
|
sendVoice = channelStr in ("voice", "both")
|
|
sendChat = channelStr in ("chat", "both")
|
|
|
|
if sendVoice and sendChat:
|
|
responseType = TeamsbotResponseType.BOTH
|
|
elif sendVoice:
|
|
responseType = TeamsbotResponseType.AUDIO
|
|
else:
|
|
responseType = TeamsbotResponseType.CHAT
|
|
|
|
ttsOutcome: Optional[Dict[str, Any]] = None
|
|
if sendVoice and voiceInterface and websocket:
|
|
from .serviceConversation import _summarizeForVoice
|
|
spokenText = await _summarizeForVoice(self, sessionId, text)
|
|
cancelHook = self._makeAnswerCancelHook()
|
|
async with self._meetingTtsLock:
|
|
ttsOutcome = await _speakTextChunked(
|
|
websocket=websocket,
|
|
voiceInterface=voiceInterface,
|
|
sessionId=sessionId,
|
|
voiceText=spokenText,
|
|
languageCode=self.config.language,
|
|
voiceName=self.config.voiceId,
|
|
isCancelled=cancelHook,
|
|
)
|
|
await _emitSessionEvent(sessionId, "ttsDeliveryStatus", {
|
|
"status": "dispatched" if ttsOutcome.get("success") else "failed",
|
|
"hasWebSocket": True,
|
|
"chunks": ttsOutcome.get("chunks"),
|
|
"played": ttsOutcome.get("played"),
|
|
"error": ttsOutcome.get("error"),
|
|
"timestamp": getUtcTimestamp(),
|
|
})
|
|
if not ttsOutcome.get("success"):
|
|
logger.warning(
|
|
f"Session {sessionId}: Agent TTS delivery failed "
|
|
f"({ttsOutcome.get('error')}) — falling back to meeting chat"
|
|
)
|
|
if not sendChat:
|
|
sendChat = True
|
|
|
|
if sendChat and websocket:
|
|
try:
|
|
await websocket.send_text(json.dumps({
|
|
"type": "sendChatMessage",
|
|
"sessionId": sessionId,
|
|
"text": text,
|
|
}))
|
|
logger.info(f"Session {sessionId}: Agent chat dispatched ({len(text)} chars)")
|
|
except Exception as chatErr:
|
|
logger.warning(f"Session {sessionId}: Agent chat delivery failed: {chatErr}")
|
|
|
|
intentEnum, intentMeta = _coercePersistedDetectedIntent(detectedIntent)
|
|
reasoningForDb = (
|
|
f"{reasoning} [{intentMeta}]" if intentMeta else reasoning
|
|
)
|
|
|
|
botResponseData = TeamsbotBotResponse(
|
|
sessionId=sessionId,
|
|
responseText=text,
|
|
responseType=responseType,
|
|
detectedIntent=intentEnum,
|
|
reasoning=reasoningForDb,
|
|
triggeredByTranscriptId=triggerTranscriptId,
|
|
modelName="agent",
|
|
processingTime=0.0,
|
|
priceCHF=0.0,
|
|
timestamp=getUtcTimestamp(),
|
|
).model_dump()
|
|
createdResponse = interface.createBotResponse(botResponseData)
|
|
|
|
await _emitSessionEvent(sessionId, "botResponse", {
|
|
"id": createdResponse.get("id"),
|
|
"responseText": text,
|
|
"responseType": responseType.value,
|
|
"detectedIntent": intentEnum.value,
|
|
"reasoning": reasoningForDb,
|
|
"modelName": "agent",
|
|
"processingTime": 0.0,
|
|
"priceCHF": 0.0,
|
|
"timestamp": botResponseData.get("timestamp"),
|
|
})
|
|
|
|
botTranscriptData = TeamsbotTranscript(
|
|
sessionId=sessionId,
|
|
speaker=self.config.botName,
|
|
text=text,
|
|
timestamp=getUtcTimestamp(),
|
|
confidence=1.0,
|
|
language=self.config.language,
|
|
isFinal=True,
|
|
source="botResponse",
|
|
).model_dump()
|
|
botTranscript = interface.createTranscript(botTranscriptData)
|
|
|
|
self._contextBuffer.append({
|
|
"speaker": self.config.botName,
|
|
"text": text,
|
|
"timestamp": getUtcTimestamp(),
|
|
"source": "botResponse",
|
|
})
|
|
self._lastTranscriptSpeaker = self.config.botName
|
|
self._lastTranscriptText = text
|
|
self._lastTranscriptId = botTranscript.get("id")
|
|
self._lastBotResponseText = text.strip().lower()
|
|
self._lastBotResponseTs = time.time()
|
|
self._followUpWindowEnd = time.time() + 15.0
|
|
|
|
await _emitSessionEvent(sessionId, "transcript", {
|
|
"id": botTranscript.get("id"),
|
|
"speaker": self.config.botName,
|
|
"text": text,
|
|
"confidence": 1.0,
|
|
"timestamp": getUtcTimestamp(),
|
|
"isContinuation": False,
|
|
"source": "botResponse",
|
|
"speakerResolvedFromHint": False,
|
|
})
|
|
|
|
session = interface.getSession(sessionId)
|
|
if session:
|
|
count = session.get("botResponseCount", 0) + 1
|
|
interface.updateSession(sessionId, {"botResponseCount": count})
|
|
|
|
async def _persistInternalDirectorReply(
|
|
self,
|
|
sessionId: str,
|
|
internalNote: str,
|
|
promptId: Optional[str],
|
|
triggerTranscriptId: Optional[str] = None,
|
|
) -> None:
|
|
"""Record a director-prompt agent reply as INTERNAL (operator-UI only)."""
|
|
from . import interfaceFeatureTeamsbot as interfaceDb
|
|
|
|
note = (internalNote or "").strip()
|
|
if not note:
|
|
return
|
|
|
|
interface = interfaceDb.getInterface(self.currentUser, self.mandateId, self.instanceId)
|
|
|
|
intentEnum, _intentMeta = _coercePersistedDetectedIntent("agent:directorPrompt")
|
|
reasoningForDb = (
|
|
f"Director prompt {promptId or ''} — silent / internal only "
|
|
f"(not sent to meeting)"
|
|
).strip()
|
|
|
|
botResponseData = TeamsbotBotResponse(
|
|
sessionId=sessionId,
|
|
responseText=note,
|
|
responseType=TeamsbotResponseType.CHAT,
|
|
detectedIntent=intentEnum,
|
|
reasoning=reasoningForDb,
|
|
triggeredByTranscriptId=triggerTranscriptId,
|
|
modelName="agent",
|
|
processingTime=0.0,
|
|
priceCHF=0.0,
|
|
timestamp=getUtcTimestamp(),
|
|
).model_dump()
|
|
createdResponse = interface.createBotResponse(botResponseData)
|
|
|
|
await _emitSessionEvent(sessionId, "botResponse", {
|
|
"id": createdResponse.get("id"),
|
|
"responseText": note,
|
|
"responseType": TeamsbotResponseType.CHAT.value,
|
|
"detectedIntent": intentEnum.value,
|
|
"reasoning": reasoningForDb,
|
|
"modelName": "agent",
|
|
"processingTime": 0.0,
|
|
"priceCHF": 0.0,
|
|
"timestamp": botResponseData.get("timestamp"),
|
|
"internalOnly": True,
|
|
"promptId": promptId,
|
|
})
|
|
|
|
logger.info(
|
|
f"Session {sessionId}: Director prompt {promptId} silent reply "
|
|
f"persisted internally ({len(note)} chars)"
|
|
)
|
|
|
|
# =========================================================================
|
|
# Greeting (AI-localised)
|
|
# =========================================================================
|
|
|
|
async def _generateGreetingText(self, languageCode: str) -> str:
|
|
"""Generate the bot's join greeting via AI."""
|
|
targetLang = (languageCode or self.config.language or "").strip() or "en-US"
|
|
botName = (self.config.botName or "the assistant").strip()
|
|
firstName = botName.split(" ")[0] if botName else botName
|
|
persona = (self.config.aiSystemPrompt or "").strip()
|
|
|
|
prompt = (
|
|
f"You are localizing the join greeting for a meeting assistant.\n\n"
|
|
f"Assistant display name (use exactly this, no translation): {firstName}\n\n"
|
|
f"Persona / style guide for the assistant:\n"
|
|
f"{persona or '(no persona configured — use a neutral, polite, professional tone)'}\n\n"
|
|
f"Target spoken language (BCP-47 code): {targetLang}\n\n"
|
|
f"Generate ONE short greeting (max ~14 words) for the assistant "
|
|
f"to say AND post in chat the moment it joins a meeting. The "
|
|
f"greeting MUST:\n"
|
|
f" - be in the target language\n"
|
|
f" - introduce the assistant by name ({firstName})\n"
|
|
f" - signal that it is now present and ready\n"
|
|
f" - sound natural when spoken aloud (this text is also TTS'd)\n\n"
|
|
f"Output ONLY the greeting text, no quotes, no markdown, no "
|
|
f"commentary, no surrounding punctuation beyond what naturally "
|
|
f"belongs to the sentence."
|
|
)
|
|
|
|
try:
|
|
aiService = createAiService(
|
|
self.currentUser, self.mandateId, self.instanceId
|
|
)
|
|
await aiService.ensureAiObjectsInitialized()
|
|
request = AiCallRequest(
|
|
prompt=prompt,
|
|
context="",
|
|
options=AiCallOptions(
|
|
operationType=OperationTypeEnum.DATA_ANALYSE,
|
|
priority=PriorityEnum.SPEED,
|
|
),
|
|
)
|
|
response = await aiService.callAi(request)
|
|
except Exception as aiErr:
|
|
logger.warning(
|
|
f"Greeting generation crashed (lang={targetLang}): {aiErr}"
|
|
)
|
|
return ""
|
|
|
|
if not response or response.errorCount != 0 or not response.content:
|
|
logger.warning(
|
|
f"Greeting generation returned empty/error (lang={targetLang})"
|
|
)
|
|
return ""
|
|
|
|
text = response.content.strip()
|
|
text = re.sub(r"^```.*?\n", "", text, flags=re.DOTALL)
|
|
text = re.sub(r"\n```\s*$", "", text)
|
|
text = text.strip().strip("\"'`").strip()
|
|
if not text:
|
|
return ""
|
|
logger.info(
|
|
f"Greeting generated (lang={targetLang}, chars={len(text)}): {text[:80]}"
|
|
)
|
|
return text
|
|
|
|
async def _dispatchGreetingToMeeting(
|
|
self,
|
|
sessionId: str,
|
|
greetingText: str,
|
|
greetingLang: str,
|
|
sendToChat: bool,
|
|
interface: Any,
|
|
voiceInterface: Any,
|
|
websocket: WebSocket,
|
|
) -> None:
|
|
"""Dispatch the bot's join greeting (TTS + optional chat)."""
|
|
try:
|
|
await _emitSessionEvent(sessionId, "ttsDeliveryStatus", {
|
|
"status": "requested",
|
|
"hasWebSocket": True,
|
|
"message": "Greeting TTS requested",
|
|
"timestamp": getUtcTimestamp(),
|
|
})
|
|
cancelHook = self._makeAnswerCancelHook()
|
|
async with self._meetingTtsLock:
|
|
ttsOutcome = await _speakTextChunked(
|
|
websocket=websocket,
|
|
voiceInterface=voiceInterface,
|
|
sessionId=sessionId,
|
|
voiceText=_voiceFriendlyMeetingText(greetingText),
|
|
languageCode=greetingLang,
|
|
voiceName=self.config.voiceId,
|
|
isCancelled=cancelHook,
|
|
)
|
|
if ttsOutcome.get("success"):
|
|
logger.info(
|
|
f"Greeting TTS sent for session {sessionId} "
|
|
f"(chunks={ttsOutcome.get('chunks')})"
|
|
)
|
|
await _emitSessionEvent(sessionId, "ttsDeliveryStatus", {
|
|
"status": "dispatched",
|
|
"hasWebSocket": True,
|
|
"chunks": ttsOutcome.get("chunks"),
|
|
"played": ttsOutcome.get("played"),
|
|
"timestamp": getUtcTimestamp(),
|
|
})
|
|
else:
|
|
logger.warning(
|
|
f"Greeting TTS failed for session {sessionId}: {ttsOutcome.get('error')}"
|
|
)
|
|
await _emitSessionEvent(sessionId, "ttsDeliveryStatus", {
|
|
"status": "failed",
|
|
"hasWebSocket": True,
|
|
"message": ttsOutcome.get("error"),
|
|
"timestamp": getUtcTimestamp(),
|
|
})
|
|
|
|
if sendToChat:
|
|
try:
|
|
await websocket.send_text(json.dumps({
|
|
"type": "sendChatMessage",
|
|
"sessionId": sessionId,
|
|
"text": greetingText,
|
|
}))
|
|
logger.info(f"Greeting chat dispatch queued for session {sessionId}")
|
|
except Exception as chatErr:
|
|
logger.warning(
|
|
f"Greeting chat dispatch failed for session {sessionId}: {chatErr}"
|
|
)
|
|
|
|
greetingTranscriptData = TeamsbotTranscript(
|
|
sessionId=sessionId,
|
|
speaker=self.config.botName,
|
|
text=greetingText,
|
|
timestamp=getUtcTimestamp(),
|
|
confidence=1.0,
|
|
language=greetingLang,
|
|
isFinal=True,
|
|
source="botResponse",
|
|
).model_dump()
|
|
greetingTranscript = interface.createTranscript(greetingTranscriptData)
|
|
|
|
self._contextBuffer.append({
|
|
"speaker": self.config.botName,
|
|
"text": greetingText,
|
|
"timestamp": getUtcTimestamp(),
|
|
"source": "botResponse",
|
|
})
|
|
self._lastTranscriptSpeaker = self.config.botName
|
|
self._lastTranscriptText = greetingText
|
|
self._lastTranscriptId = greetingTranscript.get("id")
|
|
|
|
await _emitSessionEvent(sessionId, "botResponse", {
|
|
"id": greetingTranscript.get("id"),
|
|
"responseText": greetingText,
|
|
"responseType": TeamsbotResponseType.AUDIO.value,
|
|
"detectedIntent": "greeting",
|
|
"reasoning": "Automatic join greeting",
|
|
"timestamp": getUtcTimestamp(),
|
|
})
|
|
await _emitSessionEvent(sessionId, "transcript", {
|
|
"id": greetingTranscript.get("id"),
|
|
"speaker": self.config.botName,
|
|
"text": greetingText,
|
|
"confidence": 1.0,
|
|
"timestamp": getUtcTimestamp(),
|
|
"isContinuation": False,
|
|
"source": "botResponse",
|
|
"speakerResolvedFromHint": False,
|
|
})
|
|
except Exception as dispatchErr:
|
|
logger.warning(
|
|
f"Greeting dispatch failed for session {sessionId}: {dispatchErr}"
|
|
)
|
|
|
|
# =========================================================================
|
|
# Context Summarization
|
|
# =========================================================================
|
|
|
|
async def _summarizeSessionContext(self, sessionId: str, rawContext: str) -> str:
|
|
"""Summarize a long user-provided session context."""
|
|
try:
|
|
aiService = createAiService(self.currentUser, self.mandateId, self.instanceId)
|
|
await aiService.ensureAiObjectsInitialized()
|
|
|
|
request = AiCallRequest(
|
|
prompt=(
|
|
"Fasse den folgenden Kontext auf die wesentlichen Punkte zusammen. "
|
|
"Behalte alle wichtigen Fakten, Namen, Zahlen, Entscheidungen und Aktionspunkte. "
|
|
"Entferne Fuelltext und Wiederholungen. "
|
|
"Antworte NUR mit der Zusammenfassung, keine Erklaerungen oder Einleitungen."
|
|
),
|
|
context=rawContext,
|
|
options=AiCallOptions(
|
|
operationType=OperationTypeEnum.DATA_ANALYSE,
|
|
priority=PriorityEnum.SPEED,
|
|
)
|
|
)
|
|
|
|
response = await aiService.callAi(request)
|
|
if response and response.errorCount == 0 and response.content:
|
|
summary = response.content.strip()
|
|
logger.info(f"Session {sessionId}: Context summarized from {len(rawContext)} to {len(summary)} chars")
|
|
return summary
|
|
except Exception as e:
|
|
logger.warning(f"Session context summarization failed for {sessionId}: {e}")
|
|
|
|
return rawContext[:2000] if len(rawContext) > 2000 else rawContext
|
|
|
|
async def _summarizeContextBuffer(self, sessionId: str):
|
|
"""Summarize the older part of the context buffer."""
|
|
try:
|
|
if self._contextSummary:
|
|
return
|
|
|
|
halfPoint = len(self._contextBuffer) // 2
|
|
oldSegments = self._contextBuffer[:halfPoint]
|
|
|
|
if len(oldSegments) < 10:
|
|
return
|
|
|
|
lines = []
|
|
for seg in oldSegments:
|
|
speaker = seg.get("speaker", "Unknown")
|
|
text = seg.get("text", "")
|
|
lines.append(f"[{speaker}]: {text}")
|
|
textToSummarize = "\n".join(lines)
|
|
|
|
aiService = createAiService(self.currentUser, self.mandateId, self.instanceId)
|
|
await aiService.ensureAiObjectsInitialized()
|
|
|
|
request = AiCallRequest(
|
|
prompt="Fasse das folgende Meeting-Transkript in 3-5 Saetzen zusammen. Nenne die wichtigsten Themen, Entscheidungen und offene Fragen. Antworte NUR mit der Zusammenfassung, keine Erklaerungen.",
|
|
context=textToSummarize,
|
|
options=AiCallOptions(
|
|
operationType=OperationTypeEnum.DATA_ANALYSE,
|
|
priority=PriorityEnum.SPEED,
|
|
)
|
|
)
|
|
|
|
response = await aiService.callAi(request)
|
|
if response and response.errorCount == 0:
|
|
self._contextSummary = response.content.strip()
|
|
logger.info(f"Session {sessionId}: Context summarized ({len(oldSegments)} segments -> {len(self._contextSummary)} chars)")
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Context summarization failed for session {sessionId}: {e}")
|
|
|
|
# =========================================================================
|
|
# Meeting Summary
|
|
# =========================================================================
|
|
|
|
async def _generateMeetingSummary(self, sessionId: str):
|
|
"""Generate an AI summary of the meeting after it ends."""
|
|
try:
|
|
from . import interfaceFeatureTeamsbot as interfaceDb
|
|
|
|
interface = interfaceDb.getInterface(self.currentUser, self.mandateId, self.instanceId)
|
|
transcripts = interface.getTranscripts(sessionId)
|
|
|
|
if not transcripts or len(transcripts) < 5:
|
|
return
|
|
|
|
fullTranscript = "\n".join(
|
|
f"[{t.get('speaker', 'Unknown')}]: {t.get('text', '')}"
|
|
for t in transcripts
|
|
)
|
|
|
|
aiService = createAiService(self.currentUser, self.mandateId, self.instanceId)
|
|
await aiService.ensureAiObjectsInitialized()
|
|
|
|
request = AiCallRequest(
|
|
prompt="Erstelle eine kurze Zusammenfassung dieses Meeting-Transkripts. Nenne die wichtigsten Punkte, Entscheidungen und offene Aktionspunkte.",
|
|
context=fullTranscript,
|
|
options=AiCallOptions(
|
|
operationType=OperationTypeEnum.DATA_ANALYSE,
|
|
priority=PriorityEnum.BALANCED,
|
|
)
|
|
)
|
|
|
|
response = await aiService.callAi(request)
|
|
if response and response.errorCount == 0:
|
|
interface.updateSession(sessionId, {"summary": response.content})
|
|
logger.info(f"Meeting summary generated for session {sessionId}")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to generate meeting summary for session {sessionId}: {e}")
|