From e2c1c58442017ba0b92cb54189a33de775141ebd Mon Sep 17 00:00:00 2001
From: patrick-motsch
Date: Sat, 7 Mar 2026 00:56:04 +0100
Subject: [PATCH] fix: teamsbot SSE-Events fuer Greeting, Commands, Chat-Fehler
und command-only Responses
Made-with: Cursor
---
modules/features/teamsbot/service.py | 158 ++++++++++++++++++++++++++-
1 file changed, 156 insertions(+), 2 deletions(-)
diff --git a/modules/features/teamsbot/service.py b/modules/features/teamsbot/service.py
index 787ee1c8..361577d9 100644
--- a/modules/features/teamsbot/service.py
+++ b/modules/features/teamsbot/service.py
@@ -342,6 +342,12 @@ class TeamsbotService:
logger.info(f"[WS] Voice greeting: text={greetingText[:60]}..., language={greetingLang}")
if greetingText and voiceInterface:
try:
+ await _emitSessionEvent(sessionId, "ttsDeliveryStatus", {
+ "status": "requested",
+ "hasWebSocket": True,
+ "message": "Voice greeting TTS requested",
+ "timestamp": getIsoTimestamp(),
+ })
ttsResult = await voiceInterface.textToSpeech(
text=greetingText,
languageCode=greetingLang,
@@ -359,6 +365,53 @@ class TeamsbotService:
}
}))
logger.info(f"Voice greeting TTS sent for session {sessionId}")
+ await _emitSessionEvent(sessionId, "ttsDeliveryStatus", {
+ "status": "dispatched",
+ "hasWebSocket": True,
+ "message": "Voice greeting TTS dispatched to bot",
+ "timestamp": getIsoTimestamp(),
+ })
+
+ greetingTranscriptData = TeamsbotTranscript(
+ sessionId=sessionId,
+ speaker=self.config.botName,
+ text=greetingText,
+ timestamp=getIsoTimestamp(),
+ confidence=1.0,
+ language=greetingLang,
+ isFinal=True,
+ source="botResponse",
+ ).model_dump()
+ greetingTranscript = interface.createTranscript(greetingTranscriptData)
+
+ self._contextBuffer.append({
+ "speaker": self.config.botName,
+ "text": greetingText,
+ "timestamp": getUtcTimestamp(),
+ "source": "botResponse",
+ })
+ self._lastTranscriptSpeaker = self.config.botName
+ self._lastTranscriptText = greetingText
+ self._lastTranscriptId = greetingTranscript.get("id")
+
+ await _emitSessionEvent(sessionId, "botResponse", {
+ "id": greetingTranscript.get("id"),
+ "responseText": greetingText,
+ "responseType": TeamsbotResponseType.AUDIO.value,
+ "detectedIntent": "greeting",
+ "reasoning": "Automatic join greeting",
+ "timestamp": getIsoTimestamp(),
+ })
+ await _emitSessionEvent(sessionId, "transcript", {
+ "id": greetingTranscript.get("id"),
+ "speaker": self.config.botName,
+ "text": greetingText,
+ "confidence": 1.0,
+ "timestamp": getIsoTimestamp(),
+ "isContinuation": False,
+ "source": "botResponse",
+ "speakerResolvedFromHint": False,
+ })
except Exception as ttsErr:
logger.warning(f"Voice greeting TTS failed for session {sessionId}: {ttsErr}")
@@ -430,6 +483,21 @@ class TeamsbotService:
_waitAndForwardMfa(sessionId, mfaQueue, websocket)
)
+ elif msgType == "chatSendFailed":
+ errorData = message.get("error", {})
+ reason = errorData.get("reason", "unknown")
+ failedText = errorData.get("text", "")
+ logger.warning(
+ f"[WS] Chat send failed for session {sessionId}: "
+ f"reason={reason}, text={failedText[:60]}"
+ )
+ await _emitSessionEvent(sessionId, "chatSendFailed", {
+ "reason": reason,
+ "message": errorData.get("message", "Chat message could not be sent"),
+ "text": failedText,
+ "timestamp": getIsoTimestamp(),
+ })
+
elif msgType == "mfaResolved":
success = message.get("success", False)
logger.info(f"[WS] MFA resolved: success={success}")
@@ -1282,6 +1350,51 @@ class TeamsbotService:
if speechResult.commands:
await self._executeCommands(sessionId, speechResult.commands, voiceInterface, websocket)
+ # When AI used only commands (no responseText), emit botResponse SSE
+ # so the UI shows the response. Extract text from sendChat commands.
+ if speechResult.shouldRespond and not speechResult.responseText:
+ cmdTexts = [
+ c.params.get("text", "") for c in speechResult.commands
+ if c.action == "sendChat" and c.params and c.params.get("text")
+ ]
+ combinedText = " ".join(cmdTexts) if cmdTexts else None
+ if combinedText:
+ botResponseData = TeamsbotBotResponse(
+ sessionId=sessionId,
+ responseText=combinedText,
+ responseType=TeamsbotResponseType.CHAT,
+ detectedIntent=speechResult.detectedIntent,
+ reasoning=speechResult.reasoning,
+ triggeredByTranscriptId=triggerTranscript.get("id"),
+ modelName=response.modelName,
+ processingTime=response.processingTime,
+ priceCHF=response.priceCHF,
+ timestamp=getIsoTimestamp(),
+ ).model_dump()
+ createdResponse = interface.createBotResponse(botResponseData)
+ await _emitSessionEvent(sessionId, "botResponse", {
+ "id": createdResponse.get("id"),
+ "responseText": combinedText,
+ "responseType": TeamsbotResponseType.CHAT.value,
+ "detectedIntent": speechResult.detectedIntent,
+ "reasoning": speechResult.reasoning,
+ "modelName": response.modelName,
+ "processingTime": response.processingTime,
+ "priceCHF": response.priceCHF,
+ "timestamp": botResponseData.get("timestamp"),
+ })
+
+ session = interface.getSession(sessionId)
+ if session:
+ count = session.get("botResponseCount", 0) + 1
+ interface.updateSession(sessionId, {"botResponseCount": count})
+
+ self._followUpWindowEnd = time.time() + 15.0
+ logger.info(
+ f"Bot responded via commands in session {sessionId}: "
+ f"intent={speechResult.detectedIntent}, follow-up window open for 15s"
+ )
+
except Exception as e:
logger.error(f"SPEECH_TEAMS analysis failed for session {sessionId}: {type(e).__name__}: {e}", exc_info=True)
await _emitSessionEvent(sessionId, "error", {"message": f"AI analysis failed: {type(e).__name__}: {str(e)}"})
@@ -1352,14 +1465,55 @@ class TeamsbotService:
}))
async def _cmdSendChat(self, sessionId: str, params: dict, websocket: WebSocket):
- """Send a message to the meeting chat."""
+ """Send a message to the meeting chat and record it in transcript/SSE."""
chatText = params.get("text", "")
- if chatText and websocket:
+ if not chatText:
+ return
+ if websocket:
await websocket.send_text(json.dumps({
"type": "sendChatMessage",
"sessionId": sessionId,
"text": chatText,
}))
+ logger.info(f"Chat command sent for session {sessionId}")
+
+ from . import interfaceFeatureTeamsbot as interfaceDb
+ interface = interfaceDb.getInterface(self.currentUser, self.mandateId, self.instanceId)
+
+ transcriptData = TeamsbotTranscript(
+ sessionId=sessionId,
+ speaker=self.config.botName,
+ text=chatText,
+ timestamp=getIsoTimestamp(),
+ confidence=1.0,
+ language=self.config.language,
+ isFinal=True,
+ source="chat",
+ ).model_dump()
+ createdTranscript = interface.createTranscript(transcriptData)
+
+ self._contextBuffer.append({
+ "speaker": self.config.botName,
+ "text": chatText,
+ "timestamp": getUtcTimestamp(),
+ "source": "chat",
+ })
+ self._lastTranscriptSpeaker = self.config.botName
+ self._lastTranscriptText = chatText
+ self._lastTranscriptId = createdTranscript.get("id")
+ self._lastBotResponseText = chatText.strip().lower()
+ self._lastBotResponseTs = time.time()
+
+ await _emitSessionEvent(sessionId, "transcript", {
+ "id": createdTranscript.get("id"),
+ "speaker": self.config.botName,
+ "text": chatText,
+ "confidence": 1.0,
+ "timestamp": getIsoTimestamp(),
+ "isContinuation": False,
+ "source": "chat",
+ "speakerResolvedFromHint": False,
+ })
async def _cmdReadChat(
self,