From e2c1c58442017ba0b92cb54189a33de775141ebd Mon Sep 17 00:00:00 2001 From: patrick-motsch Date: Sat, 7 Mar 2026 00:56:04 +0100 Subject: [PATCH] fix: teamsbot SSE-Events fuer Greeting, Commands, Chat-Fehler und command-only Responses Made-with: Cursor --- modules/features/teamsbot/service.py | 158 ++++++++++++++++++++++++++- 1 file changed, 156 insertions(+), 2 deletions(-) diff --git a/modules/features/teamsbot/service.py b/modules/features/teamsbot/service.py index 787ee1c8..361577d9 100644 --- a/modules/features/teamsbot/service.py +++ b/modules/features/teamsbot/service.py @@ -342,6 +342,12 @@ class TeamsbotService: logger.info(f"[WS] Voice greeting: text={greetingText[:60]}..., language={greetingLang}") if greetingText and voiceInterface: try: + await _emitSessionEvent(sessionId, "ttsDeliveryStatus", { + "status": "requested", + "hasWebSocket": True, + "message": "Voice greeting TTS requested", + "timestamp": getIsoTimestamp(), + }) ttsResult = await voiceInterface.textToSpeech( text=greetingText, languageCode=greetingLang, @@ -359,6 +365,53 @@ class TeamsbotService: } })) logger.info(f"Voice greeting TTS sent for session {sessionId}") + await _emitSessionEvent(sessionId, "ttsDeliveryStatus", { + "status": "dispatched", + "hasWebSocket": True, + "message": "Voice greeting TTS dispatched to bot", + "timestamp": getIsoTimestamp(), + }) + + greetingTranscriptData = TeamsbotTranscript( + sessionId=sessionId, + speaker=self.config.botName, + text=greetingText, + timestamp=getIsoTimestamp(), + confidence=1.0, + language=greetingLang, + isFinal=True, + source="botResponse", + ).model_dump() + greetingTranscript = interface.createTranscript(greetingTranscriptData) + + self._contextBuffer.append({ + "speaker": self.config.botName, + "text": greetingText, + "timestamp": getUtcTimestamp(), + "source": "botResponse", + }) + self._lastTranscriptSpeaker = self.config.botName + self._lastTranscriptText = greetingText + self._lastTranscriptId = greetingTranscript.get("id") + + await _emitSessionEvent(sessionId, "botResponse", { + "id": greetingTranscript.get("id"), + "responseText": greetingText, + "responseType": TeamsbotResponseType.AUDIO.value, + "detectedIntent": "greeting", + "reasoning": "Automatic join greeting", + "timestamp": getIsoTimestamp(), + }) + await _emitSessionEvent(sessionId, "transcript", { + "id": greetingTranscript.get("id"), + "speaker": self.config.botName, + "text": greetingText, + "confidence": 1.0, + "timestamp": getIsoTimestamp(), + "isContinuation": False, + "source": "botResponse", + "speakerResolvedFromHint": False, + }) except Exception as ttsErr: logger.warning(f"Voice greeting TTS failed for session {sessionId}: {ttsErr}") @@ -430,6 +483,21 @@ class TeamsbotService: _waitAndForwardMfa(sessionId, mfaQueue, websocket) ) + elif msgType == "chatSendFailed": + errorData = message.get("error", {}) + reason = errorData.get("reason", "unknown") + failedText = errorData.get("text", "") + logger.warning( + f"[WS] Chat send failed for session {sessionId}: " + f"reason={reason}, text={failedText[:60]}" + ) + await _emitSessionEvent(sessionId, "chatSendFailed", { + "reason": reason, + "message": errorData.get("message", "Chat message could not be sent"), + "text": failedText, + "timestamp": getIsoTimestamp(), + }) + elif msgType == "mfaResolved": success = message.get("success", False) logger.info(f"[WS] MFA resolved: success={success}") @@ -1282,6 +1350,51 @@ class TeamsbotService: if speechResult.commands: await self._executeCommands(sessionId, speechResult.commands, voiceInterface, websocket) + # When AI used only commands (no responseText), emit botResponse SSE + # so the UI shows the response. Extract text from sendChat commands. + if speechResult.shouldRespond and not speechResult.responseText: + cmdTexts = [ + c.params.get("text", "") for c in speechResult.commands + if c.action == "sendChat" and c.params and c.params.get("text") + ] + combinedText = " ".join(cmdTexts) if cmdTexts else None + if combinedText: + botResponseData = TeamsbotBotResponse( + sessionId=sessionId, + responseText=combinedText, + responseType=TeamsbotResponseType.CHAT, + detectedIntent=speechResult.detectedIntent, + reasoning=speechResult.reasoning, + triggeredByTranscriptId=triggerTranscript.get("id"), + modelName=response.modelName, + processingTime=response.processingTime, + priceCHF=response.priceCHF, + timestamp=getIsoTimestamp(), + ).model_dump() + createdResponse = interface.createBotResponse(botResponseData) + await _emitSessionEvent(sessionId, "botResponse", { + "id": createdResponse.get("id"), + "responseText": combinedText, + "responseType": TeamsbotResponseType.CHAT.value, + "detectedIntent": speechResult.detectedIntent, + "reasoning": speechResult.reasoning, + "modelName": response.modelName, + "processingTime": response.processingTime, + "priceCHF": response.priceCHF, + "timestamp": botResponseData.get("timestamp"), + }) + + session = interface.getSession(sessionId) + if session: + count = session.get("botResponseCount", 0) + 1 + interface.updateSession(sessionId, {"botResponseCount": count}) + + self._followUpWindowEnd = time.time() + 15.0 + logger.info( + f"Bot responded via commands in session {sessionId}: " + f"intent={speechResult.detectedIntent}, follow-up window open for 15s" + ) + except Exception as e: logger.error(f"SPEECH_TEAMS analysis failed for session {sessionId}: {type(e).__name__}: {e}", exc_info=True) await _emitSessionEvent(sessionId, "error", {"message": f"AI analysis failed: {type(e).__name__}: {str(e)}"}) @@ -1352,14 +1465,55 @@ class TeamsbotService: })) async def _cmdSendChat(self, sessionId: str, params: dict, websocket: WebSocket): - """Send a message to the meeting chat.""" + """Send a message to the meeting chat and record it in transcript/SSE.""" chatText = params.get("text", "") - if chatText and websocket: + if not chatText: + return + if websocket: await websocket.send_text(json.dumps({ "type": "sendChatMessage", "sessionId": sessionId, "text": chatText, })) + logger.info(f"Chat command sent for session {sessionId}") + + from . import interfaceFeatureTeamsbot as interfaceDb + interface = interfaceDb.getInterface(self.currentUser, self.mandateId, self.instanceId) + + transcriptData = TeamsbotTranscript( + sessionId=sessionId, + speaker=self.config.botName, + text=chatText, + timestamp=getIsoTimestamp(), + confidence=1.0, + language=self.config.language, + isFinal=True, + source="chat", + ).model_dump() + createdTranscript = interface.createTranscript(transcriptData) + + self._contextBuffer.append({ + "speaker": self.config.botName, + "text": chatText, + "timestamp": getUtcTimestamp(), + "source": "chat", + }) + self._lastTranscriptSpeaker = self.config.botName + self._lastTranscriptText = chatText + self._lastTranscriptId = createdTranscript.get("id") + self._lastBotResponseText = chatText.strip().lower() + self._lastBotResponseTs = time.time() + + await _emitSessionEvent(sessionId, "transcript", { + "id": createdTranscript.get("id"), + "speaker": self.config.botName, + "text": chatText, + "confidence": 1.0, + "timestamp": getIsoTimestamp(), + "isContinuation": False, + "source": "chat", + "speakerResolvedFromHint": False, + }) async def _cmdReadChat( self,