platform-core/modules/features/teamsbot/serviceWebSocket.py
ValueOn AG cf0233f193
Some checks failed
Deploy Plattform-Core (Int) / test (push) Failing after 13s
Deploy Plattform-Core (Int) / deploy (push) Has been skipped
refactor: architecture cleanup + fix scheduler Automation2Workflow error
Fix: add missing Automation2Workflow/Automation2WorkflowRun imports to interfaceFeatureGraphicalEditor.py (caused scheduler crash on boot)
Refactor: gdprDeletion via onUserDelete lifecycle hooks
Refactor: i18nBootSync accounting labels via app.py parameter injection
Refactor: serviceHub moved to serviceCenter/serviceHub.py
Split: teamsbot/service.py, realEstate/main, routeTrustee, routeBilling
Cleanup: remove obsolete methodTrustee, serviceExceptions shim
Co-authored-by: Cursor <cursoragent@cursor.com>
2026-06-07 07:59:31 +02:00

545 lines
22 KiB
Python

# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Teamsbot Service — WebSocket handler & audio chunk processing.
Extracted from service.py. All functions accept `service` (a TeamsbotService
instance) as the first parameter so the class can delegate to them.
"""
import logging
import json
import asyncio
import time
import base64
from typing import Optional, Dict, Any
from fastapi import WebSocket
from modules.shared.timeUtils import getUtcTimestamp
logger = logging.getLogger(__name__)
async def handleBotWebSocket(service, websocket: WebSocket, sessionId: str):
"""Main WebSocket handler for Browser Bot communication."""
from . import interfaceFeatureTeamsbot as interfaceDb
from modules.interfaces.interfaceVoiceObjects import getVoiceInterface
from .service import _activeServices, _emitSessionEvent, sessionEvents
from .serviceConversation import _processTranscript, _warmEphemeralPhrasePool
interface = interfaceDb.getInterface(service.currentUser, service.mandateId, service.instanceId)
voiceInterface = getVoiceInterface(service.currentUser, service.mandateId)
session = interface.getSession(sessionId)
if session:
rawContext = session.get("sessionContext")
if rawContext and len(rawContext) > 500:
logger.info(f"Session {sessionId}: Summarizing long session context ({len(rawContext)} chars)...")
service._sessionContext = await service._summarizeSessionContext(sessionId, rawContext)
elif rawContext:
service._sessionContext = rawContext
if service._sessionContext:
logger.info(f"Session {sessionId}: Session context ready ({len(service._sessionContext)} chars)")
try:
systemBot = interface.getActiveSystemBot(service.mandateId)
service._botAccountEmail = systemBot.get("email") if systemBot else None
if service._botAccountEmail:
logger.info(f"Session {sessionId}: Bot account email resolved: {service._botAccountEmail}")
except Exception:
service._botAccountEmail = None
service._activeSessionId = sessionId
service._websocket = websocket
service._voiceInterface = voiceInterface
_activeServices[sessionId] = service
try:
await _emitSessionEvent(sessionId, "botConnectionState", {
"connected": True,
"timestamp": getUtcTimestamp(),
})
except Exception:
pass
try:
service._activePersistentPrompts = interface.getActivePersistentPrompts(sessionId) or []
if service._activePersistentPrompts:
logger.info(
f"Session {sessionId}: Loaded {len(service._activePersistentPrompts)} active persistent director prompt(s)"
)
except Exception as restoreErr:
logger.warning(f"Session {sessionId}: Could not restore persistent director prompts: {restoreErr}")
service._activePersistentPrompts = []
asyncio.create_task(_warmEphemeralPhrasePool(service, sessionId))
logger.info(f"[WS] Handler started for session {sessionId}")
try:
msgCount = 0
while True:
data = await websocket.receive()
msgCount += 1
if "text" not in data:
logger.debug(f"[WS] session={sessionId} msg #{msgCount}: non-text data (keys: {list(data.keys())})")
continue
message = json.loads(data["text"])
msgType = message.get("type")
if msgType not in ("audioChunk", "ping"):
logger.info(f"[WS] session={sessionId} msg #{msgCount}: type={msgType}")
if msgType == "transcript":
transcript = message.get("transcript", {})
source = transcript.get("source", "caption")
speaker = transcript.get("speaker", "Unknown")
textPreview = (transcript.get("text", "") or "")[:60]
logger.info(f"[WS] Transcript (source={source}, speaker={speaker}): {textPreview}...")
await _processTranscript(
service,
sessionId=sessionId,
speaker=transcript.get("speaker", "Unknown"),
text=transcript.get("text", ""),
isFinal=transcript.get("isFinal", True),
interface=interface,
voiceInterface=voiceInterface,
websocket=websocket,
source=source,
)
elif msgType == "chatMessage":
chat = message.get("chat", {})
isHistory = chat.get("isHistory", False)
source = "chatHistory" if isHistory else "chat"
logger.info(
f"[WS] Chat{'[HISTORY]' if isHistory else ''}: "
f"speaker={chat.get('speaker')}, text={chat.get('text', '')[:60]}..."
)
await _processTranscript(
service,
sessionId=sessionId,
speaker=chat.get("speaker", "Unknown"),
text=chat.get("text", ""),
isFinal=True,
interface=interface,
voiceInterface=voiceInterface,
websocket=websocket,
source=source,
)
elif msgType == "status":
status = message.get("status")
errorMessage = message.get("message")
logger.info(f"[WS] Status: status={status}, message={errorMessage}")
await _handleBotStatus(service, sessionId, status, errorMessage, interface)
elif msgType == "audioChunk":
audioData = message.get("audio", {})
audioBase64 = audioData.get("data", "")
sampleRate = audioData.get("sampleRate", 16000)
captureDiagnostics = audioData.get("captureDiagnostics") or {}
if audioBase64:
await _processAudioChunk(
service,
sessionId=sessionId,
audioBase64=audioBase64,
sampleRate=sampleRate,
captureDiagnostics=captureDiagnostics,
interface=interface,
voiceInterface=voiceInterface,
websocket=websocket,
)
elif msgType == "voiceGreeting":
greetingText = message.get("text", "")
greetingLang = message.get("language", service.config.language)
logger.info(
f"[WS] Voice greeting (legacy): text={greetingText[:60]}..., language={greetingLang}"
)
if greetingText and voiceInterface:
await service._dispatchGreetingToMeeting(
sessionId=sessionId,
greetingText=greetingText,
greetingLang=greetingLang,
sendToChat=False,
interface=interface,
voiceInterface=voiceInterface,
websocket=websocket,
)
elif msgType == "requestGreeting":
requestedLang = (
message.get("language") or service.config.language or ""
).strip() or "en-US"
botNameHint = (
message.get("botName") or service.config.botName or ""
).strip() or service.config.botName
logger.info(
f"[WS] Greeting request from bot: language={requestedLang}, name={botNameHint}"
)
if voiceInterface:
try:
greetingText = await service._generateGreetingText(
requestedLang
)
except Exception as genErr:
logger.warning(
f"Greeting generation failed for session {sessionId}: {genErr}"
)
greetingText = ""
if greetingText:
await service._dispatchGreetingToMeeting(
sessionId=sessionId,
greetingText=greetingText,
greetingLang=requestedLang,
sendToChat=True,
interface=interface,
voiceInterface=voiceInterface,
websocket=websocket,
)
else:
logger.warning(
f"Session {sessionId}: Skipping greeting — AI generation produced no text"
)
elif msgType == "ping":
await websocket.send_text(json.dumps({"type": "pong"}))
elif msgType == "ttsPlaybackAck":
playback = message.get("playback", {}) or {}
status = playback.get("status", "unknown")
ackMessage = playback.get("message") or "Bot playback status update"
logger.info(
f"[WS] TTS playback ack: status={status}, format={playback.get('format')}, "
f"bytesBase64={playback.get('bytesBase64')}"
)
await _emitSessionEvent(sessionId, "ttsDeliveryStatus", {
"status": f"playback_{status}",
"hasWebSocket": True,
"message": ackMessage,
"timestamp": playback.get("timestamp") or getUtcTimestamp(),
"format": playback.get("format"),
"bytesBase64": playback.get("bytesBase64"),
})
elif msgType == "mfaChallenge":
mfaData = message.get("mfa", {})
mfaType = mfaData.get("type", "unknown")
displayNumber = mfaData.get("displayNumber")
prompt = mfaData.get("prompt", "")
logger.info(f"[WS] MFA challenge: type={mfaType}, number={displayNumber}, prompt={prompt[:60]}")
await _emitSessionEvent(sessionId, "mfaChallenge", {
"mfaType": mfaType,
"displayNumber": displayNumber,
"prompt": prompt,
"timestamp": getUtcTimestamp(),
})
from .routeFeatureTeamsbot import mfaCodeQueues, mfaWaitTasks
mfaQueue = asyncio.Queue()
mfaCodeQueues[sessionId] = mfaQueue
mfaWaitTasks[sessionId] = asyncio.create_task(
_waitAndForwardMfa(sessionId, mfaQueue, websocket)
)
elif msgType == "chatSendFailed":
errorData = message.get("error", {})
reason = errorData.get("reason", "unknown")
failedText = errorData.get("text", "")
logger.warning(
f"[WS] Chat send failed for session {sessionId}: "
f"reason={reason}, text={failedText[:60]}"
)
await _emitSessionEvent(sessionId, "chatSendFailed", {
"reason": reason,
"message": errorData.get("message", "Chat message could not be sent"),
"text": failedText,
"timestamp": getUtcTimestamp(),
})
elif msgType == "mfaResolved":
success = message.get("success", False)
logger.info(f"[WS] MFA resolved: success={success}")
from .routeFeatureTeamsbot import mfaCodeQueues, mfaWaitTasks
task = mfaWaitTasks.pop(sessionId, None)
if task and not task.done():
task.cancel()
mfaCodeQueues.pop(sessionId, None)
await _emitSessionEvent(sessionId, "mfaResolved", {
"success": success,
"timestamp": getUtcTimestamp(),
})
except Exception as e:
if "disconnect" not in str(e).lower():
logger.error(f"[WS] Error for session {sessionId}: {type(e).__name__}: {e}")
finally:
if _activeServices.get(sessionId) is service:
_activeServices.pop(sessionId, None)
service._websocket = None
service._voiceInterface = None
service._activeSessionId = None
try:
await _emitSessionEvent(sessionId, "botConnectionState", {
"connected": False,
"timestamp": getUtcTimestamp(),
})
except Exception:
pass
logger.info(f"[WS] Handler ended for session {sessionId} after {msgCount} messages")
async def _waitAndForwardMfa(sid: str, queue: asyncio.Queue, ws: WebSocket):
"""Wait for an MFA code from the operator and forward it to the bot."""
from .service import _emitSessionEvent
from .routeFeatureTeamsbot import mfaCodeQueues, mfaWaitTasks
try:
mfaResponse = await asyncio.wait_for(queue.get(), timeout=120.0)
logger.info(f"[WS] MFA response received for session {sid}: action={mfaResponse.get('action')}")
await ws.send_text(json.dumps({
"type": "mfaResponse",
"sessionId": sid,
"mfa": mfaResponse,
}))
except asyncio.TimeoutError:
logger.warning(f"[WS] MFA response timeout for session {sid}")
await ws.send_text(json.dumps({
"type": "mfaResponse",
"sessionId": sid,
"mfa": {"action": "timeout"},
}))
await _emitSessionEvent(sid, "mfaChallenge", {
"mfaType": "timeout",
"prompt": "MFA-Zeitlimit ueberschritten. Bitte erneut versuchen.",
})
except asyncio.CancelledError:
logger.info(f"[WS] MFA wait cancelled for session {sid} (resolved via page)")
finally:
mfaCodeQueues.pop(sid, None)
mfaWaitTasks.pop(sid, None)
async def _handleBotStatus(
service,
sessionId: str,
status: str,
errorMessage: Optional[str],
interface,
):
"""Handle status updates from the browser bot."""
from .service import _emitSessionEvent
from .datamodelTeamsbot import TeamsbotSessionStatus
logger.info(f"Bot status update for session {sessionId}: {status}")
statusMap = {
"connecting": TeamsbotSessionStatus.JOINING.value,
"launching": TeamsbotSessionStatus.JOINING.value,
"navigating": TeamsbotSessionStatus.JOINING.value,
"in_lobby": TeamsbotSessionStatus.JOINING.value,
"joined": TeamsbotSessionStatus.ACTIVE.value,
"in_meeting": TeamsbotSessionStatus.ACTIVE.value,
"left": TeamsbotSessionStatus.ENDED.value,
"error": TeamsbotSessionStatus.ERROR.value,
}
dbStatus = statusMap.get(status, TeamsbotSessionStatus.ACTIVE.value)
updates = {"status": dbStatus}
if errorMessage:
updates["errorMessage"] = errorMessage
if dbStatus == TeamsbotSessionStatus.ACTIVE.value:
updates["startedAt"] = getUtcTimestamp()
elif dbStatus in [TeamsbotSessionStatus.ENDED.value, TeamsbotSessionStatus.ERROR.value]:
updates["endedAt"] = getUtcTimestamp()
interface.updateSession(sessionId, updates)
await _emitSessionEvent(sessionId, "statusChange", {"status": status, "errorMessage": errorMessage})
if dbStatus in [TeamsbotSessionStatus.ENDED.value, TeamsbotSessionStatus.ERROR.value]:
if service._audioBuffer:
logger.info(f"[AudioChunk] Flushing remaining buffer on session end ({len(service._audioBuffer)} bytes)")
service._audioBuffer = b""
service._audioBufferStartTime = 0.0
service._audioBufferLastChunkTime = 0.0
if dbStatus == TeamsbotSessionStatus.ENDED.value:
asyncio.create_task(service._generateMeetingSummary(sessionId))
async def _processAudioChunk(
service,
sessionId: str,
audioBase64: str,
sampleRate: int,
captureDiagnostics: Optional[Dict[str, Any]],
interface,
voiceInterface,
websocket: WebSocket,
):
"""Process an audio chunk from WebRTC capture."""
from .serviceConversation import _processTranscript
_MIN_CHUNK_SEC = 1.0
_STALE_TIMEOUT_SEC = 3.0
try:
audioBytes = base64.b64decode(audioBase64)
if len(audioBytes) < 500:
return
if captureDiagnostics:
trackId = captureDiagnostics.get("trackId")
readyState = captureDiagnostics.get("readyState")
rms = captureDiagnostics.get("rms")
nativeSampleRate = captureDiagnostics.get("nativeSampleRate")
logger.debug(
f"[AudioChunk] diagnostics: track={trackId}, readyState={readyState}, "
f"rms={rms}, nativeRate={nativeSampleRate}, bytes={len(audioBytes)}"
)
isSilent = False
if captureDiagnostics and captureDiagnostics.get("rms") is not None:
try:
rmsVal = float(captureDiagnostics.get("rms"))
if rmsVal < 0.0003:
isSilent = True
except Exception:
pass
if not voiceInterface:
logger.warning(f"[AudioChunk] No voice interface available for session {sessionId}")
return
now = time.time()
effectiveRate = sampleRate if sampleRate and sampleRate > 0 else 16000
if not isSilent:
if not service._audioBuffer:
service._audioBufferStartTime = now
service._audioBuffer += audioBytes
service._audioBufferLastChunkTime = now
service._audioBufferSampleRate = effectiveRate
bufferDuration = len(service._audioBuffer) / (effectiveRate * 2) if service._audioBuffer else 0.0
bufferAge = (now - service._audioBufferStartTime) if service._audioBuffer else 0.0
shouldFlush = (
service._audioBuffer
and (
bufferDuration >= _MIN_CHUNK_SEC
or (bufferAge >= _STALE_TIMEOUT_SEC and bufferDuration > 0.3)
)
)
if not shouldFlush:
return
flushBytes = service._audioBuffer
flushRate = service._audioBufferSampleRate
service._audioBuffer = b""
service._audioBufferStartTime = 0.0
service._audioBufferLastChunkTime = 0.0
flushDuration = len(flushBytes) / (flushRate * 2)
logger.info(f"[AudioChunk] Flushing buffer: {len(flushBytes)} bytes, {flushDuration:.1f}s, {flushRate}Hz")
phraseHints = list(service._knownSpeakers)
if service.config.botName:
phraseHints.append(service.config.botName)
sttResult = await voiceInterface.speechToText(
audioContent=flushBytes,
language=service.config.language or "de-DE",
sampleRate=flushRate,
channels=1,
skipFallbacks=True,
phraseHints=phraseHints if phraseHints else None,
audioFormat="linear16",
)
if sttResult and sttResult.get("success") and sttResult.get("text"):
text = sttResult["text"].strip()
if text:
resolvedSpeaker = service._resolveSpeakerForAudioCapture()
fromCaption = resolvedSpeaker.get("speakerResolvedFromHint", False)
logger.info(
f"[AudioChunk] STT result: speaker={resolvedSpeaker.get('speaker', 'Meeting Audio')} "
f"(fromCaption={fromCaption}), text={text[:80]}..."
)
await _processTranscript(
service,
sessionId=sessionId,
speaker=resolvedSpeaker["speaker"],
text=text,
isFinal=True,
interface=interface,
voiceInterface=voiceInterface,
websocket=websocket,
source="audioCapture",
speakerResolvedFromHint=resolvedSpeaker["speakerResolvedFromHint"],
)
except Exception as e:
logger.error(f"[AudioChunk] STT error for session {sessionId}: {type(e).__name__}: {e}")
async def _cancelInFlightSpeech(
service,
sessionId: str,
websocket: Optional[WebSocket],
reason: str,
) -> None:
"""Hard stop everything the bot is currently doing in the meeting."""
from .service import _emitSessionEvent
service._answerGenerationCounter += 1
gen = service._answerGenerationCounter
logger.info(
f"Session {sessionId}: Cancelling in-flight speech "
f"(reason={reason}, gen={gen})"
)
if service._pendingNameTrigger:
logger.info(
f"Session {sessionId}: Dropping pending debounced name "
f"trigger (was queued before stop)"
)
service._pendingNameTrigger = None
for taskAttr in ("_currentEscalationTask", "_currentQuickAckTask"):
task = getattr(service, taskAttr, None)
if task is not None and not task.done():
logger.info(
f"Session {sessionId}: Cancelling background task "
f"{taskAttr}"
)
task.cancel()
if websocket is not None:
try:
await websocket.send_text(json.dumps({
"type": "stopAudio",
"sessionId": sessionId,
"reason": reason,
}))
except Exception as stopErr:
logger.warning(
f"Session {sessionId}: Failed to send stopAudio to "
f"browser bot: {stopErr}"
)
try:
await _emitSessionEvent(sessionId, "speechCancelled", {
"reason": reason,
"generation": gen,
"timestamp": getUtcTimestamp(),
})
except Exception:
pass