Fix: add missing Automation2Workflow/Automation2WorkflowRun imports to interfaceFeatureGraphicalEditor.py (caused scheduler crash on boot) Refactor: gdprDeletion via onUserDelete lifecycle hooks Refactor: i18nBootSync accounting labels via app.py parameter injection Refactor: serviceHub moved to serviceCenter/serviceHub.py Split: teamsbot/service.py, realEstate/main, routeTrustee, routeBilling Cleanup: remove obsolete methodTrustee, serviceExceptions shim Co-authored-by: Cursor <cursoragent@cursor.com>
545 lines
22 KiB
Python
545 lines
22 KiB
Python
# Copyright (c) 2025 Patrick Motsch
|
|
# All rights reserved.
|
|
"""
|
|
Teamsbot Service — WebSocket handler & audio chunk processing.
|
|
|
|
Extracted from service.py. All functions accept `service` (a TeamsbotService
|
|
instance) as the first parameter so the class can delegate to them.
|
|
"""
|
|
|
|
import logging
|
|
import json
|
|
import asyncio
|
|
import time
|
|
import base64
|
|
from typing import Optional, Dict, Any
|
|
|
|
from fastapi import WebSocket
|
|
|
|
from modules.shared.timeUtils import getUtcTimestamp
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
async def handleBotWebSocket(service, websocket: WebSocket, sessionId: str):
|
|
"""Main WebSocket handler for Browser Bot communication."""
|
|
from . import interfaceFeatureTeamsbot as interfaceDb
|
|
from modules.interfaces.interfaceVoiceObjects import getVoiceInterface
|
|
from .service import _activeServices, _emitSessionEvent, sessionEvents
|
|
from .serviceConversation import _processTranscript, _warmEphemeralPhrasePool
|
|
|
|
interface = interfaceDb.getInterface(service.currentUser, service.mandateId, service.instanceId)
|
|
voiceInterface = getVoiceInterface(service.currentUser, service.mandateId)
|
|
|
|
session = interface.getSession(sessionId)
|
|
if session:
|
|
rawContext = session.get("sessionContext")
|
|
if rawContext and len(rawContext) > 500:
|
|
logger.info(f"Session {sessionId}: Summarizing long session context ({len(rawContext)} chars)...")
|
|
service._sessionContext = await service._summarizeSessionContext(sessionId, rawContext)
|
|
elif rawContext:
|
|
service._sessionContext = rawContext
|
|
if service._sessionContext:
|
|
logger.info(f"Session {sessionId}: Session context ready ({len(service._sessionContext)} chars)")
|
|
|
|
try:
|
|
systemBot = interface.getActiveSystemBot(service.mandateId)
|
|
service._botAccountEmail = systemBot.get("email") if systemBot else None
|
|
if service._botAccountEmail:
|
|
logger.info(f"Session {sessionId}: Bot account email resolved: {service._botAccountEmail}")
|
|
except Exception:
|
|
service._botAccountEmail = None
|
|
|
|
service._activeSessionId = sessionId
|
|
service._websocket = websocket
|
|
service._voiceInterface = voiceInterface
|
|
_activeServices[sessionId] = service
|
|
|
|
try:
|
|
await _emitSessionEvent(sessionId, "botConnectionState", {
|
|
"connected": True,
|
|
"timestamp": getUtcTimestamp(),
|
|
})
|
|
except Exception:
|
|
pass
|
|
|
|
try:
|
|
service._activePersistentPrompts = interface.getActivePersistentPrompts(sessionId) or []
|
|
if service._activePersistentPrompts:
|
|
logger.info(
|
|
f"Session {sessionId}: Loaded {len(service._activePersistentPrompts)} active persistent director prompt(s)"
|
|
)
|
|
except Exception as restoreErr:
|
|
logger.warning(f"Session {sessionId}: Could not restore persistent director prompts: {restoreErr}")
|
|
service._activePersistentPrompts = []
|
|
|
|
asyncio.create_task(_warmEphemeralPhrasePool(service, sessionId))
|
|
|
|
logger.info(f"[WS] Handler started for session {sessionId}")
|
|
|
|
try:
|
|
msgCount = 0
|
|
while True:
|
|
data = await websocket.receive()
|
|
msgCount += 1
|
|
|
|
if "text" not in data:
|
|
logger.debug(f"[WS] session={sessionId} msg #{msgCount}: non-text data (keys: {list(data.keys())})")
|
|
continue
|
|
|
|
message = json.loads(data["text"])
|
|
msgType = message.get("type")
|
|
if msgType not in ("audioChunk", "ping"):
|
|
logger.info(f"[WS] session={sessionId} msg #{msgCount}: type={msgType}")
|
|
|
|
if msgType == "transcript":
|
|
transcript = message.get("transcript", {})
|
|
source = transcript.get("source", "caption")
|
|
speaker = transcript.get("speaker", "Unknown")
|
|
textPreview = (transcript.get("text", "") or "")[:60]
|
|
logger.info(f"[WS] Transcript (source={source}, speaker={speaker}): {textPreview}...")
|
|
await _processTranscript(
|
|
service,
|
|
sessionId=sessionId,
|
|
speaker=transcript.get("speaker", "Unknown"),
|
|
text=transcript.get("text", ""),
|
|
isFinal=transcript.get("isFinal", True),
|
|
interface=interface,
|
|
voiceInterface=voiceInterface,
|
|
websocket=websocket,
|
|
source=source,
|
|
)
|
|
|
|
elif msgType == "chatMessage":
|
|
chat = message.get("chat", {})
|
|
isHistory = chat.get("isHistory", False)
|
|
source = "chatHistory" if isHistory else "chat"
|
|
logger.info(
|
|
f"[WS] Chat{'[HISTORY]' if isHistory else ''}: "
|
|
f"speaker={chat.get('speaker')}, text={chat.get('text', '')[:60]}..."
|
|
)
|
|
await _processTranscript(
|
|
service,
|
|
sessionId=sessionId,
|
|
speaker=chat.get("speaker", "Unknown"),
|
|
text=chat.get("text", ""),
|
|
isFinal=True,
|
|
interface=interface,
|
|
voiceInterface=voiceInterface,
|
|
websocket=websocket,
|
|
source=source,
|
|
)
|
|
|
|
elif msgType == "status":
|
|
status = message.get("status")
|
|
errorMessage = message.get("message")
|
|
logger.info(f"[WS] Status: status={status}, message={errorMessage}")
|
|
await _handleBotStatus(service, sessionId, status, errorMessage, interface)
|
|
|
|
elif msgType == "audioChunk":
|
|
audioData = message.get("audio", {})
|
|
audioBase64 = audioData.get("data", "")
|
|
sampleRate = audioData.get("sampleRate", 16000)
|
|
captureDiagnostics = audioData.get("captureDiagnostics") or {}
|
|
if audioBase64:
|
|
await _processAudioChunk(
|
|
service,
|
|
sessionId=sessionId,
|
|
audioBase64=audioBase64,
|
|
sampleRate=sampleRate,
|
|
captureDiagnostics=captureDiagnostics,
|
|
interface=interface,
|
|
voiceInterface=voiceInterface,
|
|
websocket=websocket,
|
|
)
|
|
|
|
elif msgType == "voiceGreeting":
|
|
greetingText = message.get("text", "")
|
|
greetingLang = message.get("language", service.config.language)
|
|
logger.info(
|
|
f"[WS] Voice greeting (legacy): text={greetingText[:60]}..., language={greetingLang}"
|
|
)
|
|
if greetingText and voiceInterface:
|
|
await service._dispatchGreetingToMeeting(
|
|
sessionId=sessionId,
|
|
greetingText=greetingText,
|
|
greetingLang=greetingLang,
|
|
sendToChat=False,
|
|
interface=interface,
|
|
voiceInterface=voiceInterface,
|
|
websocket=websocket,
|
|
)
|
|
|
|
elif msgType == "requestGreeting":
|
|
requestedLang = (
|
|
message.get("language") or service.config.language or ""
|
|
).strip() or "en-US"
|
|
botNameHint = (
|
|
message.get("botName") or service.config.botName or ""
|
|
).strip() or service.config.botName
|
|
logger.info(
|
|
f"[WS] Greeting request from bot: language={requestedLang}, name={botNameHint}"
|
|
)
|
|
if voiceInterface:
|
|
try:
|
|
greetingText = await service._generateGreetingText(
|
|
requestedLang
|
|
)
|
|
except Exception as genErr:
|
|
logger.warning(
|
|
f"Greeting generation failed for session {sessionId}: {genErr}"
|
|
)
|
|
greetingText = ""
|
|
if greetingText:
|
|
await service._dispatchGreetingToMeeting(
|
|
sessionId=sessionId,
|
|
greetingText=greetingText,
|
|
greetingLang=requestedLang,
|
|
sendToChat=True,
|
|
interface=interface,
|
|
voiceInterface=voiceInterface,
|
|
websocket=websocket,
|
|
)
|
|
else:
|
|
logger.warning(
|
|
f"Session {sessionId}: Skipping greeting — AI generation produced no text"
|
|
)
|
|
|
|
elif msgType == "ping":
|
|
await websocket.send_text(json.dumps({"type": "pong"}))
|
|
|
|
elif msgType == "ttsPlaybackAck":
|
|
playback = message.get("playback", {}) or {}
|
|
status = playback.get("status", "unknown")
|
|
ackMessage = playback.get("message") or "Bot playback status update"
|
|
logger.info(
|
|
f"[WS] TTS playback ack: status={status}, format={playback.get('format')}, "
|
|
f"bytesBase64={playback.get('bytesBase64')}"
|
|
)
|
|
await _emitSessionEvent(sessionId, "ttsDeliveryStatus", {
|
|
"status": f"playback_{status}",
|
|
"hasWebSocket": True,
|
|
"message": ackMessage,
|
|
"timestamp": playback.get("timestamp") or getUtcTimestamp(),
|
|
"format": playback.get("format"),
|
|
"bytesBase64": playback.get("bytesBase64"),
|
|
})
|
|
|
|
elif msgType == "mfaChallenge":
|
|
mfaData = message.get("mfa", {})
|
|
mfaType = mfaData.get("type", "unknown")
|
|
displayNumber = mfaData.get("displayNumber")
|
|
prompt = mfaData.get("prompt", "")
|
|
logger.info(f"[WS] MFA challenge: type={mfaType}, number={displayNumber}, prompt={prompt[:60]}")
|
|
|
|
await _emitSessionEvent(sessionId, "mfaChallenge", {
|
|
"mfaType": mfaType,
|
|
"displayNumber": displayNumber,
|
|
"prompt": prompt,
|
|
"timestamp": getUtcTimestamp(),
|
|
})
|
|
|
|
from .routeFeatureTeamsbot import mfaCodeQueues, mfaWaitTasks
|
|
mfaQueue = asyncio.Queue()
|
|
mfaCodeQueues[sessionId] = mfaQueue
|
|
|
|
mfaWaitTasks[sessionId] = asyncio.create_task(
|
|
_waitAndForwardMfa(sessionId, mfaQueue, websocket)
|
|
)
|
|
|
|
elif msgType == "chatSendFailed":
|
|
errorData = message.get("error", {})
|
|
reason = errorData.get("reason", "unknown")
|
|
failedText = errorData.get("text", "")
|
|
logger.warning(
|
|
f"[WS] Chat send failed for session {sessionId}: "
|
|
f"reason={reason}, text={failedText[:60]}"
|
|
)
|
|
await _emitSessionEvent(sessionId, "chatSendFailed", {
|
|
"reason": reason,
|
|
"message": errorData.get("message", "Chat message could not be sent"),
|
|
"text": failedText,
|
|
"timestamp": getUtcTimestamp(),
|
|
})
|
|
|
|
elif msgType == "mfaResolved":
|
|
success = message.get("success", False)
|
|
logger.info(f"[WS] MFA resolved: success={success}")
|
|
from .routeFeatureTeamsbot import mfaCodeQueues, mfaWaitTasks
|
|
task = mfaWaitTasks.pop(sessionId, None)
|
|
if task and not task.done():
|
|
task.cancel()
|
|
mfaCodeQueues.pop(sessionId, None)
|
|
await _emitSessionEvent(sessionId, "mfaResolved", {
|
|
"success": success,
|
|
"timestamp": getUtcTimestamp(),
|
|
})
|
|
|
|
except Exception as e:
|
|
if "disconnect" not in str(e).lower():
|
|
logger.error(f"[WS] Error for session {sessionId}: {type(e).__name__}: {e}")
|
|
finally:
|
|
if _activeServices.get(sessionId) is service:
|
|
_activeServices.pop(sessionId, None)
|
|
service._websocket = None
|
|
service._voiceInterface = None
|
|
service._activeSessionId = None
|
|
try:
|
|
await _emitSessionEvent(sessionId, "botConnectionState", {
|
|
"connected": False,
|
|
"timestamp": getUtcTimestamp(),
|
|
})
|
|
except Exception:
|
|
pass
|
|
|
|
logger.info(f"[WS] Handler ended for session {sessionId} after {msgCount} messages")
|
|
|
|
|
|
async def _waitAndForwardMfa(sid: str, queue: asyncio.Queue, ws: WebSocket):
|
|
"""Wait for an MFA code from the operator and forward it to the bot."""
|
|
from .service import _emitSessionEvent
|
|
from .routeFeatureTeamsbot import mfaCodeQueues, mfaWaitTasks
|
|
|
|
try:
|
|
mfaResponse = await asyncio.wait_for(queue.get(), timeout=120.0)
|
|
logger.info(f"[WS] MFA response received for session {sid}: action={mfaResponse.get('action')}")
|
|
await ws.send_text(json.dumps({
|
|
"type": "mfaResponse",
|
|
"sessionId": sid,
|
|
"mfa": mfaResponse,
|
|
}))
|
|
except asyncio.TimeoutError:
|
|
logger.warning(f"[WS] MFA response timeout for session {sid}")
|
|
await ws.send_text(json.dumps({
|
|
"type": "mfaResponse",
|
|
"sessionId": sid,
|
|
"mfa": {"action": "timeout"},
|
|
}))
|
|
await _emitSessionEvent(sid, "mfaChallenge", {
|
|
"mfaType": "timeout",
|
|
"prompt": "MFA-Zeitlimit ueberschritten. Bitte erneut versuchen.",
|
|
})
|
|
except asyncio.CancelledError:
|
|
logger.info(f"[WS] MFA wait cancelled for session {sid} (resolved via page)")
|
|
finally:
|
|
mfaCodeQueues.pop(sid, None)
|
|
mfaWaitTasks.pop(sid, None)
|
|
|
|
|
|
async def _handleBotStatus(
|
|
service,
|
|
sessionId: str,
|
|
status: str,
|
|
errorMessage: Optional[str],
|
|
interface,
|
|
):
|
|
"""Handle status updates from the browser bot."""
|
|
from .service import _emitSessionEvent
|
|
from .datamodelTeamsbot import TeamsbotSessionStatus
|
|
|
|
logger.info(f"Bot status update for session {sessionId}: {status}")
|
|
|
|
statusMap = {
|
|
"connecting": TeamsbotSessionStatus.JOINING.value,
|
|
"launching": TeamsbotSessionStatus.JOINING.value,
|
|
"navigating": TeamsbotSessionStatus.JOINING.value,
|
|
"in_lobby": TeamsbotSessionStatus.JOINING.value,
|
|
"joined": TeamsbotSessionStatus.ACTIVE.value,
|
|
"in_meeting": TeamsbotSessionStatus.ACTIVE.value,
|
|
"left": TeamsbotSessionStatus.ENDED.value,
|
|
"error": TeamsbotSessionStatus.ERROR.value,
|
|
}
|
|
|
|
dbStatus = statusMap.get(status, TeamsbotSessionStatus.ACTIVE.value)
|
|
|
|
updates = {"status": dbStatus}
|
|
if errorMessage:
|
|
updates["errorMessage"] = errorMessage
|
|
if dbStatus == TeamsbotSessionStatus.ACTIVE.value:
|
|
updates["startedAt"] = getUtcTimestamp()
|
|
elif dbStatus in [TeamsbotSessionStatus.ENDED.value, TeamsbotSessionStatus.ERROR.value]:
|
|
updates["endedAt"] = getUtcTimestamp()
|
|
|
|
interface.updateSession(sessionId, updates)
|
|
await _emitSessionEvent(sessionId, "statusChange", {"status": status, "errorMessage": errorMessage})
|
|
|
|
if dbStatus in [TeamsbotSessionStatus.ENDED.value, TeamsbotSessionStatus.ERROR.value]:
|
|
if service._audioBuffer:
|
|
logger.info(f"[AudioChunk] Flushing remaining buffer on session end ({len(service._audioBuffer)} bytes)")
|
|
service._audioBuffer = b""
|
|
service._audioBufferStartTime = 0.0
|
|
service._audioBufferLastChunkTime = 0.0
|
|
|
|
if dbStatus == TeamsbotSessionStatus.ENDED.value:
|
|
asyncio.create_task(service._generateMeetingSummary(sessionId))
|
|
|
|
|
|
async def _processAudioChunk(
|
|
service,
|
|
sessionId: str,
|
|
audioBase64: str,
|
|
sampleRate: int,
|
|
captureDiagnostics: Optional[Dict[str, Any]],
|
|
interface,
|
|
voiceInterface,
|
|
websocket: WebSocket,
|
|
):
|
|
"""Process an audio chunk from WebRTC capture."""
|
|
from .serviceConversation import _processTranscript
|
|
|
|
_MIN_CHUNK_SEC = 1.0
|
|
_STALE_TIMEOUT_SEC = 3.0
|
|
|
|
try:
|
|
audioBytes = base64.b64decode(audioBase64)
|
|
if len(audioBytes) < 500:
|
|
return
|
|
|
|
if captureDiagnostics:
|
|
trackId = captureDiagnostics.get("trackId")
|
|
readyState = captureDiagnostics.get("readyState")
|
|
rms = captureDiagnostics.get("rms")
|
|
nativeSampleRate = captureDiagnostics.get("nativeSampleRate")
|
|
logger.debug(
|
|
f"[AudioChunk] diagnostics: track={trackId}, readyState={readyState}, "
|
|
f"rms={rms}, nativeRate={nativeSampleRate}, bytes={len(audioBytes)}"
|
|
)
|
|
|
|
isSilent = False
|
|
if captureDiagnostics and captureDiagnostics.get("rms") is not None:
|
|
try:
|
|
rmsVal = float(captureDiagnostics.get("rms"))
|
|
if rmsVal < 0.0003:
|
|
isSilent = True
|
|
except Exception:
|
|
pass
|
|
|
|
if not voiceInterface:
|
|
logger.warning(f"[AudioChunk] No voice interface available for session {sessionId}")
|
|
return
|
|
|
|
now = time.time()
|
|
effectiveRate = sampleRate if sampleRate and sampleRate > 0 else 16000
|
|
|
|
if not isSilent:
|
|
if not service._audioBuffer:
|
|
service._audioBufferStartTime = now
|
|
service._audioBuffer += audioBytes
|
|
service._audioBufferLastChunkTime = now
|
|
service._audioBufferSampleRate = effectiveRate
|
|
|
|
bufferDuration = len(service._audioBuffer) / (effectiveRate * 2) if service._audioBuffer else 0.0
|
|
bufferAge = (now - service._audioBufferStartTime) if service._audioBuffer else 0.0
|
|
|
|
shouldFlush = (
|
|
service._audioBuffer
|
|
and (
|
|
bufferDuration >= _MIN_CHUNK_SEC
|
|
or (bufferAge >= _STALE_TIMEOUT_SEC and bufferDuration > 0.3)
|
|
)
|
|
)
|
|
|
|
if not shouldFlush:
|
|
return
|
|
|
|
flushBytes = service._audioBuffer
|
|
flushRate = service._audioBufferSampleRate
|
|
service._audioBuffer = b""
|
|
service._audioBufferStartTime = 0.0
|
|
service._audioBufferLastChunkTime = 0.0
|
|
|
|
flushDuration = len(flushBytes) / (flushRate * 2)
|
|
logger.info(f"[AudioChunk] Flushing buffer: {len(flushBytes)} bytes, {flushDuration:.1f}s, {flushRate}Hz")
|
|
|
|
phraseHints = list(service._knownSpeakers)
|
|
if service.config.botName:
|
|
phraseHints.append(service.config.botName)
|
|
|
|
sttResult = await voiceInterface.speechToText(
|
|
audioContent=flushBytes,
|
|
language=service.config.language or "de-DE",
|
|
sampleRate=flushRate,
|
|
channels=1,
|
|
skipFallbacks=True,
|
|
phraseHints=phraseHints if phraseHints else None,
|
|
audioFormat="linear16",
|
|
)
|
|
|
|
if sttResult and sttResult.get("success") and sttResult.get("text"):
|
|
text = sttResult["text"].strip()
|
|
if text:
|
|
resolvedSpeaker = service._resolveSpeakerForAudioCapture()
|
|
fromCaption = resolvedSpeaker.get("speakerResolvedFromHint", False)
|
|
logger.info(
|
|
f"[AudioChunk] STT result: speaker={resolvedSpeaker.get('speaker', 'Meeting Audio')} "
|
|
f"(fromCaption={fromCaption}), text={text[:80]}..."
|
|
)
|
|
await _processTranscript(
|
|
service,
|
|
sessionId=sessionId,
|
|
speaker=resolvedSpeaker["speaker"],
|
|
text=text,
|
|
isFinal=True,
|
|
interface=interface,
|
|
voiceInterface=voiceInterface,
|
|
websocket=websocket,
|
|
source="audioCapture",
|
|
speakerResolvedFromHint=resolvedSpeaker["speakerResolvedFromHint"],
|
|
)
|
|
except Exception as e:
|
|
logger.error(f"[AudioChunk] STT error for session {sessionId}: {type(e).__name__}: {e}")
|
|
|
|
|
|
async def _cancelInFlightSpeech(
|
|
service,
|
|
sessionId: str,
|
|
websocket: Optional[WebSocket],
|
|
reason: str,
|
|
) -> None:
|
|
"""Hard stop everything the bot is currently doing in the meeting."""
|
|
from .service import _emitSessionEvent
|
|
|
|
service._answerGenerationCounter += 1
|
|
gen = service._answerGenerationCounter
|
|
logger.info(
|
|
f"Session {sessionId}: Cancelling in-flight speech "
|
|
f"(reason={reason}, gen={gen})"
|
|
)
|
|
|
|
if service._pendingNameTrigger:
|
|
logger.info(
|
|
f"Session {sessionId}: Dropping pending debounced name "
|
|
f"trigger (was queued before stop)"
|
|
)
|
|
service._pendingNameTrigger = None
|
|
|
|
for taskAttr in ("_currentEscalationTask", "_currentQuickAckTask"):
|
|
task = getattr(service, taskAttr, None)
|
|
if task is not None and not task.done():
|
|
logger.info(
|
|
f"Session {sessionId}: Cancelling background task "
|
|
f"{taskAttr}"
|
|
)
|
|
task.cancel()
|
|
|
|
if websocket is not None:
|
|
try:
|
|
await websocket.send_text(json.dumps({
|
|
"type": "stopAudio",
|
|
"sessionId": sessionId,
|
|
"reason": reason,
|
|
}))
|
|
except Exception as stopErr:
|
|
logger.warning(
|
|
f"Session {sessionId}: Failed to send stopAudio to "
|
|
f"browser bot: {stopErr}"
|
|
)
|
|
|
|
try:
|
|
await _emitSessionEvent(sessionId, "speechCancelled", {
|
|
"reason": reason,
|
|
"generation": gen,
|
|
"timestamp": getUtcTimestamp(),
|
|
})
|
|
except Exception:
|
|
pass
|