From d63a41fbc832896d39c249f6d336d6dd250f3581 Mon Sep 17 00:00:00 2001
From: ValueOn AG <p.motsch@valueon.ch>
Date: Wed, 18 Mar 2026 13:57:01 +0100
Subject: [PATCH] llm failover enhanced

---
 modules/aicore/aicorePluginAnthropic.py       | 26 ++++++++++-
 modules/aicore/aicorePluginMistral.py         | 46 ++++++++++++++++---
 .../services/serviceAgent/agentLoop.py        | 30 +++++++++++-
 3 files changed, 93 insertions(+), 9 deletions(-)

diff --git a/modules/aicore/aicorePluginAnthropic.py b/modules/aicore/aicorePluginAnthropic.py
index 8b6ec197..81a2175e 100644
--- a/modules/aicore/aicorePluginAnthropic.py
+++ b/modules/aicore/aicorePluginAnthropic.py
@@ -4,7 +4,7 @@ import json
 import logging
 import httpx
 import os
-from typing import Dict, Any, List, AsyncGenerator, Union
+from typing import Dict, Any, List, AsyncGenerator, Optional, Union
 from fastapi import HTTPException
 from modules.shared.configuration import APP_CONFIG
 from .aicoreBase import BaseConnectorAi, RateLimitExceededException
@@ -295,6 +295,7 @@ class AiAnthropic(BaseConnectorAi):
             fullContent = ""
             toolUseBlocks: Dict[int, Dict[str, Any]] = {}
             currentToolIdx = -1
+            stopReason: Optional[str] = None
 
             async with self.httpClient.stream("POST", model.apiUrl, json=payload) as response:
                 if response.status_code != 200:
@@ -316,7 +317,16 @@ class AiAnthropic(BaseConnectorAi):
 
                     eventType = event.get("type", "")
 
-                    if eventType == "content_block_start":
+                    if eventType == "error":
+                        errDetail = event.get("error", {})
+                        errMsg = errDetail.get("message", str(errDetail))
+                        errType = errDetail.get("type", "unknown")
+                        logger.error(f"Anthropic stream error event: type={errType}, message={errMsg}")
+                        if "overloaded" in errMsg.lower() or "overloaded" in errType.lower():
+                            raise HTTPException(status_code=500, detail=f"Anthropic API is currently overloaded. Please try again in a few minutes.")
+                        raise HTTPException(status_code=500, detail=f"Anthropic stream error: [{errType}] {errMsg}")
+
+                    elif eventType == "content_block_start":
                         block = event.get("content_block", {})
                         idx = event.get("index", 0)
                         if block.get("type") == "tool_use":
@@ -338,10 +348,22 @@ class AiAnthropic(BaseConnectorAi):
                             if idx in toolUseBlocks:
                                 toolUseBlocks[idx]["arguments"] += delta.get("partial_json", "")
 
+                    elif eventType == "message_delta":
+                        delta = event.get("delta", {})
+                        stopReason = delta.get("stop_reason", stopReason)
+
                     elif eventType == "message_stop":
                         break
 
+            if not fullContent and not toolUseBlocks:
+                logger.warning(
+                    f"Anthropic stream returned empty response: model={model.name}, "
+                    f"stopReason={stopReason}"
+                )
+
             metadata: Dict[str, Any] = {}
+            if stopReason:
+                metadata["stopReason"] = stopReason
             if toolUseBlocks:
                 metadata["toolCalls"] = [
                     {
diff --git a/modules/aicore/aicorePluginMistral.py b/modules/aicore/aicorePluginMistral.py
index 8c4fb6d9..885addcf 100644
--- a/modules/aicore/aicorePluginMistral.py
+++ b/modules/aicore/aicorePluginMistral.py
@@ -174,7 +174,11 @@ class AiMistral(BaseConnectorAi):
                 "temperature": temperature,
                 "max_tokens": maxTokens
             }
-            
+
+            if modelCall.tools:
+                payload["tools"] = modelCall.tools
+                payload["tool_choice"] = modelCall.toolChoice or "auto"
+
             response = await self.httpClient.post(
                 model.apiUrl,
                 json=payload
@@ -214,15 +218,20 @@ class AiMistral(BaseConnectorAi):
                 raise HTTPException(status_code=500, detail=error_message)
             
             responseJson = response.json()
-            content = responseJson["choices"][0]["message"]["content"]
-            
+            choiceMessage = responseJson["choices"][0]["message"]
+            content = choiceMessage.get("content") or ""
+
+            metadata = {"response_id": responseJson.get("id", "")}
+            if choiceMessage.get("tool_calls"):
+                metadata["toolCalls"] = choiceMessage["tool_calls"]
+
             return AiModelResponse(
                 content=content,
                 success=True,
                 modelId=model.name,
-                metadata={"response_id": responseJson.get("id", "")}
+                metadata=metadata,
             )
-            
+
         except ContextLengthExceededException:
             # Re-raise context length exceptions without wrapping
             raise
@@ -250,7 +259,12 @@ class AiMistral(BaseConnectorAi):
                 "stream": True,
             }
 
+            if modelCall.tools:
+                payload["tools"] = modelCall.tools
+                payload["tool_choice"] = modelCall.toolChoice or "auto"
+
             fullContent = ""
+            toolCallsAccum: Dict[int, Dict[str, Any]] = {}
 
             async with self.httpClient.stream("POST", model.apiUrl, json=payload) as response:
                 if response.status_code != 200:
@@ -280,11 +294,31 @@ class AiMistral(BaseConnectorAi):
                         fullContent += delta["content"]
                         yield delta["content"]
 
+                    for tcDelta in delta.get("tool_calls", []):
+                        idx = tcDelta.get("index", 0)
+                        if idx not in toolCallsAccum:
+                            toolCallsAccum[idx] = {
+                                "id": tcDelta.get("id", ""),
+                                "type": "function",
+                                "function": {"name": "", "arguments": ""},
+                            }
+                        if tcDelta.get("id"):
+                            toolCallsAccum[idx]["id"] = tcDelta["id"]
+                        fn = tcDelta.get("function", {})
+                        if fn.get("name"):
+                            toolCallsAccum[idx]["function"]["name"] = fn["name"]
+                        if fn.get("arguments"):
+                            toolCallsAccum[idx]["function"]["arguments"] += fn["arguments"]
+
+            metadata: Dict[str, Any] = {}
+            if toolCallsAccum:
+                metadata["toolCalls"] = [toolCallsAccum[i] for i in sorted(toolCallsAccum)]
+
             yield AiModelResponse(
                 content=fullContent,
                 success=True,
                 modelId=model.name,
-                metadata={},
+                metadata=metadata,
             )
 
         except (RateLimitExceededException, ContextLengthExceededException, HTTPException):
diff --git a/modules/serviceCenter/services/serviceAgent/agentLoop.py b/modules/serviceCenter/services/serviceAgent/agentLoop.py
index 5ece40df..1cf74152 100644
--- a/modules/serviceCenter/services/serviceAgent/agentLoop.py
+++ b/modules/serviceCenter/services/serviceAgent/agentLoop.py
@@ -65,7 +65,12 @@ async def runAgentLoop(
 
     tools = toolRegistry.getTools()
     toolDefinitions = toolRegistry.formatToolsForFunctionCalling()
-    toolsText = toolRegistry.formatToolsForPrompt()
+
+    # Text-based tool descriptions are ONLY used as fallback when native function
+    # calling is unavailable.  Including both creates conflicting instructions
+    # (text ```tool_call format vs native tool_use blocks) and can cause the model
+    # to respond with plain text instead of actual tool calls.
+    toolsText = "" if toolDefinitions else toolRegistry.formatToolsForPrompt()
 
     systemPrompt = buildSystemPrompt(tools, toolsText, userLanguage=userLanguage)
     conversation = ConversationManager(systemPrompt)
@@ -193,6 +198,29 @@ async def runAgentLoop(
         toolCalls = _parseToolCalls(aiResponse)
         textContent = _extractTextContent(aiResponse)
 
+        logger.debug(
+            f"Round {state.currentRound} AI response: model={aiResponse.modelName}, "
+            f"toolCalls={len(toolCalls)}, nativeToolCalls={'yes' if aiResponse.toolCalls else 'no'}, "
+            f"contentLen={len(aiResponse.content)}, streamedLen={len(streamedText)}"
+        )
+
+        # Empty response (no content, no tool calls) = model returned nothing useful.
+        # Burn the round but let the loop continue so the next iteration can retry
+        # (the failover mechanism in the AI layer will try alternative models).
+        if not toolCalls and not textContent and not streamedText:
+            logger.warning(
+                f"Round {state.currentRound}: AI returned empty response "
+                f"(model={aiResponse.modelName}). Retrying next round."
+            )
+            conversation.addUserMessage(
+                "Your previous response was empty. Please use the available tools "
+                "to accomplish the task. Start by planning the steps, then call the "
+                "appropriate tools."
+            )
+            roundLog.durationMs = int((time.time() - roundStartTime) * 1000)
+            trace.rounds.append(roundLog)
+            continue
+
         if textContent and not streamedText:
             yield AgentEvent(type=AgentEventTypeEnum.MESSAGE, content=textContent)