From e2c1c58442017ba0b92cb54189a33de775141ebd Mon Sep 17 00:00:00 2001
From: patrick-motsch <p.motsch@valueon.ch>
Date: Sat, 7 Mar 2026 00:56:04 +0100
Subject: [PATCH] fix: teamsbot SSE-Events fuer Greeting, Commands, Chat-Fehler
 und command-only Responses

Made-with: Cursor
---
 modules/features/teamsbot/service.py | 158 ++++++++++++++++++++++++++-
 1 file changed, 156 insertions(+), 2 deletions(-)

diff --git a/modules/features/teamsbot/service.py b/modules/features/teamsbot/service.py
index 787ee1c8..361577d9 100644
--- a/modules/features/teamsbot/service.py
+++ b/modules/features/teamsbot/service.py
@@ -342,6 +342,12 @@ class TeamsbotService:
                     logger.info(f"[WS] Voice greeting: text={greetingText[:60]}..., language={greetingLang}")
                     if greetingText and voiceInterface:
                         try:
+                            await _emitSessionEvent(sessionId, "ttsDeliveryStatus", {
+                                "status": "requested",
+                                "hasWebSocket": True,
+                                "message": "Voice greeting TTS requested",
+                                "timestamp": getIsoTimestamp(),
+                            })
                             ttsResult = await voiceInterface.textToSpeech(
                                 text=greetingText,
                                 languageCode=greetingLang,
@@ -359,6 +365,53 @@ class TeamsbotService:
                                         }
                                     }))
                                     logger.info(f"Voice greeting TTS sent for session {sessionId}")
+                                    await _emitSessionEvent(sessionId, "ttsDeliveryStatus", {
+                                        "status": "dispatched",
+                                        "hasWebSocket": True,
+                                        "message": "Voice greeting TTS dispatched to bot",
+                                        "timestamp": getIsoTimestamp(),
+                                    })
+
+                            greetingTranscriptData = TeamsbotTranscript(
+                                sessionId=sessionId,
+                                speaker=self.config.botName,
+                                text=greetingText,
+                                timestamp=getIsoTimestamp(),
+                                confidence=1.0,
+                                language=greetingLang,
+                                isFinal=True,
+                                source="botResponse",
+                            ).model_dump()
+                            greetingTranscript = interface.createTranscript(greetingTranscriptData)
+
+                            self._contextBuffer.append({
+                                "speaker": self.config.botName,
+                                "text": greetingText,
+                                "timestamp": getUtcTimestamp(),
+                                "source": "botResponse",
+                            })
+                            self._lastTranscriptSpeaker = self.config.botName
+                            self._lastTranscriptText = greetingText
+                            self._lastTranscriptId = greetingTranscript.get("id")
+
+                            await _emitSessionEvent(sessionId, "botResponse", {
+                                "id": greetingTranscript.get("id"),
+                                "responseText": greetingText,
+                                "responseType": TeamsbotResponseType.AUDIO.value,
+                                "detectedIntent": "greeting",
+                                "reasoning": "Automatic join greeting",
+                                "timestamp": getIsoTimestamp(),
+                            })
+                            await _emitSessionEvent(sessionId, "transcript", {
+                                "id": greetingTranscript.get("id"),
+                                "speaker": self.config.botName,
+                                "text": greetingText,
+                                "confidence": 1.0,
+                                "timestamp": getIsoTimestamp(),
+                                "isContinuation": False,
+                                "source": "botResponse",
+                                "speakerResolvedFromHint": False,
+                            })
                         except Exception as ttsErr:
                             logger.warning(f"Voice greeting TTS failed for session {sessionId}: {ttsErr}")
 
@@ -430,6 +483,21 @@ class TeamsbotService:
                         _waitAndForwardMfa(sessionId, mfaQueue, websocket)
                     )
 
+                elif msgType == "chatSendFailed":
+                    errorData = message.get("error", {})
+                    reason = errorData.get("reason", "unknown")
+                    failedText = errorData.get("text", "")
+                    logger.warning(
+                        f"[WS] Chat send failed for session {sessionId}: "
+                        f"reason={reason}, text={failedText[:60]}"
+                    )
+                    await _emitSessionEvent(sessionId, "chatSendFailed", {
+                        "reason": reason,
+                        "message": errorData.get("message", "Chat message could not be sent"),
+                        "text": failedText,
+                        "timestamp": getIsoTimestamp(),
+                    })
+
                 elif msgType == "mfaResolved":
                     success = message.get("success", False)
                     logger.info(f"[WS] MFA resolved: success={success}")
@@ -1282,6 +1350,51 @@ class TeamsbotService:
             if speechResult.commands:
                 await self._executeCommands(sessionId, speechResult.commands, voiceInterface, websocket)
 
+                # When AI used only commands (no responseText), emit botResponse SSE
+                # so the UI shows the response. Extract text from sendChat commands.
+                if speechResult.shouldRespond and not speechResult.responseText:
+                    cmdTexts = [
+                        c.params.get("text", "") for c in speechResult.commands
+                        if c.action == "sendChat" and c.params and c.params.get("text")
+                    ]
+                    combinedText = " ".join(cmdTexts) if cmdTexts else None
+                    if combinedText:
+                        botResponseData = TeamsbotBotResponse(
+                            sessionId=sessionId,
+                            responseText=combinedText,
+                            responseType=TeamsbotResponseType.CHAT,
+                            detectedIntent=speechResult.detectedIntent,
+                            reasoning=speechResult.reasoning,
+                            triggeredByTranscriptId=triggerTranscript.get("id"),
+                            modelName=response.modelName,
+                            processingTime=response.processingTime,
+                            priceCHF=response.priceCHF,
+                            timestamp=getIsoTimestamp(),
+                        ).model_dump()
+                        createdResponse = interface.createBotResponse(botResponseData)
+                        await _emitSessionEvent(sessionId, "botResponse", {
+                            "id": createdResponse.get("id"),
+                            "responseText": combinedText,
+                            "responseType": TeamsbotResponseType.CHAT.value,
+                            "detectedIntent": speechResult.detectedIntent,
+                            "reasoning": speechResult.reasoning,
+                            "modelName": response.modelName,
+                            "processingTime": response.processingTime,
+                            "priceCHF": response.priceCHF,
+                            "timestamp": botResponseData.get("timestamp"),
+                        })
+
+                        session = interface.getSession(sessionId)
+                        if session:
+                            count = session.get("botResponseCount", 0) + 1
+                            interface.updateSession(sessionId, {"botResponseCount": count})
+
+                        self._followUpWindowEnd = time.time() + 15.0
+                        logger.info(
+                            f"Bot responded via commands in session {sessionId}: "
+                            f"intent={speechResult.detectedIntent}, follow-up window open for 15s"
+                        )
+
         except Exception as e:
             logger.error(f"SPEECH_TEAMS analysis failed for session {sessionId}: {type(e).__name__}: {e}", exc_info=True)
             await _emitSessionEvent(sessionId, "error", {"message": f"AI analysis failed: {type(e).__name__}: {str(e)}"})
@@ -1352,14 +1465,55 @@ class TeamsbotService:
             }))
 
     async def _cmdSendChat(self, sessionId: str, params: dict, websocket: WebSocket):
-        """Send a message to the meeting chat."""
+        """Send a message to the meeting chat and record it in transcript/SSE."""
         chatText = params.get("text", "")
-        if chatText and websocket:
+        if not chatText:
+            return
+        if websocket:
             await websocket.send_text(json.dumps({
                 "type": "sendChatMessage",
                 "sessionId": sessionId,
                 "text": chatText,
             }))
+            logger.info(f"Chat command sent for session {sessionId}")
+
+        from . import interfaceFeatureTeamsbot as interfaceDb
+        interface = interfaceDb.getInterface(self.currentUser, self.mandateId, self.instanceId)
+
+        transcriptData = TeamsbotTranscript(
+            sessionId=sessionId,
+            speaker=self.config.botName,
+            text=chatText,
+            timestamp=getIsoTimestamp(),
+            confidence=1.0,
+            language=self.config.language,
+            isFinal=True,
+            source="chat",
+        ).model_dump()
+        createdTranscript = interface.createTranscript(transcriptData)
+
+        self._contextBuffer.append({
+            "speaker": self.config.botName,
+            "text": chatText,
+            "timestamp": getUtcTimestamp(),
+            "source": "chat",
+        })
+        self._lastTranscriptSpeaker = self.config.botName
+        self._lastTranscriptText = chatText
+        self._lastTranscriptId = createdTranscript.get("id")
+        self._lastBotResponseText = chatText.strip().lower()
+        self._lastBotResponseTs = time.time()
+
+        await _emitSessionEvent(sessionId, "transcript", {
+            "id": createdTranscript.get("id"),
+            "speaker": self.config.botName,
+            "text": chatText,
+            "confidence": 1.0,
+            "timestamp": getIsoTimestamp(),
+            "isContinuation": False,
+            "source": "chat",
+            "speakerResolvedFromHint": False,
+        })
 
     async def _cmdReadChat(
         self,