From d63a41fbc832896d39c249f6d336d6dd250f3581 Mon Sep 17 00:00:00 2001
From: ValueOn AG
Date: Wed, 18 Mar 2026 13:57:01 +0100
Subject: [PATCH] llm failover enhanced
---
modules/aicore/aicorePluginAnthropic.py | 26 ++++++++++-
modules/aicore/aicorePluginMistral.py | 46 ++++++++++++++++---
.../services/serviceAgent/agentLoop.py | 30 +++++++++++-
3 files changed, 93 insertions(+), 9 deletions(-)
diff --git a/modules/aicore/aicorePluginAnthropic.py b/modules/aicore/aicorePluginAnthropic.py
index 8b6ec197..81a2175e 100644
--- a/modules/aicore/aicorePluginAnthropic.py
+++ b/modules/aicore/aicorePluginAnthropic.py
@@ -4,7 +4,7 @@ import json
import logging
import httpx
import os
-from typing import Dict, Any, List, AsyncGenerator, Union
+from typing import Dict, Any, List, AsyncGenerator, Optional, Union
from fastapi import HTTPException
from modules.shared.configuration import APP_CONFIG
from .aicoreBase import BaseConnectorAi, RateLimitExceededException
@@ -295,6 +295,7 @@ class AiAnthropic(BaseConnectorAi):
fullContent = ""
toolUseBlocks: Dict[int, Dict[str, Any]] = {}
currentToolIdx = -1
+ stopReason: Optional[str] = None
async with self.httpClient.stream("POST", model.apiUrl, json=payload) as response:
if response.status_code != 200:
@@ -316,7 +317,16 @@ class AiAnthropic(BaseConnectorAi):
eventType = event.get("type", "")
- if eventType == "content_block_start":
+ if eventType == "error":
+ errDetail = event.get("error", {})
+ errMsg = errDetail.get("message", str(errDetail))
+ errType = errDetail.get("type", "unknown")
+ logger.error(f"Anthropic stream error event: type={errType}, message={errMsg}")
+ if "overloaded" in errMsg.lower() or "overloaded" in errType.lower():
+ raise HTTPException(status_code=500, detail=f"Anthropic API is currently overloaded. Please try again in a few minutes.")
+ raise HTTPException(status_code=500, detail=f"Anthropic stream error: [{errType}] {errMsg}")
+
+ elif eventType == "content_block_start":
block = event.get("content_block", {})
idx = event.get("index", 0)
if block.get("type") == "tool_use":
@@ -338,10 +348,22 @@ class AiAnthropic(BaseConnectorAi):
if idx in toolUseBlocks:
toolUseBlocks[idx]["arguments"] += delta.get("partial_json", "")
+ elif eventType == "message_delta":
+ delta = event.get("delta", {})
+ stopReason = delta.get("stop_reason", stopReason)
+
elif eventType == "message_stop":
break
+ if not fullContent and not toolUseBlocks:
+ logger.warning(
+ f"Anthropic stream returned empty response: model={model.name}, "
+ f"stopReason={stopReason}"
+ )
+
metadata: Dict[str, Any] = {}
+ if stopReason:
+ metadata["stopReason"] = stopReason
if toolUseBlocks:
metadata["toolCalls"] = [
{
diff --git a/modules/aicore/aicorePluginMistral.py b/modules/aicore/aicorePluginMistral.py
index 8c4fb6d9..885addcf 100644
--- a/modules/aicore/aicorePluginMistral.py
+++ b/modules/aicore/aicorePluginMistral.py
@@ -174,7 +174,11 @@ class AiMistral(BaseConnectorAi):
"temperature": temperature,
"max_tokens": maxTokens
}
-
+
+ if modelCall.tools:
+ payload["tools"] = modelCall.tools
+ payload["tool_choice"] = modelCall.toolChoice or "auto"
+
response = await self.httpClient.post(
model.apiUrl,
json=payload
@@ -214,15 +218,20 @@ class AiMistral(BaseConnectorAi):
raise HTTPException(status_code=500, detail=error_message)
responseJson = response.json()
- content = responseJson["choices"][0]["message"]["content"]
-
+ choiceMessage = responseJson["choices"][0]["message"]
+ content = choiceMessage.get("content") or ""
+
+ metadata = {"response_id": responseJson.get("id", "")}
+ if choiceMessage.get("tool_calls"):
+ metadata["toolCalls"] = choiceMessage["tool_calls"]
+
return AiModelResponse(
content=content,
success=True,
modelId=model.name,
- metadata={"response_id": responseJson.get("id", "")}
+ metadata=metadata,
)
-
+
except ContextLengthExceededException:
# Re-raise context length exceptions without wrapping
raise
@@ -250,7 +259,12 @@ class AiMistral(BaseConnectorAi):
"stream": True,
}
+ if modelCall.tools:
+ payload["tools"] = modelCall.tools
+ payload["tool_choice"] = modelCall.toolChoice or "auto"
+
fullContent = ""
+ toolCallsAccum: Dict[int, Dict[str, Any]] = {}
async with self.httpClient.stream("POST", model.apiUrl, json=payload) as response:
if response.status_code != 200:
@@ -280,11 +294,31 @@ class AiMistral(BaseConnectorAi):
fullContent += delta["content"]
yield delta["content"]
+ for tcDelta in delta.get("tool_calls", []):
+ idx = tcDelta.get("index", 0)
+ if idx not in toolCallsAccum:
+ toolCallsAccum[idx] = {
+ "id": tcDelta.get("id", ""),
+ "type": "function",
+ "function": {"name": "", "arguments": ""},
+ }
+ if tcDelta.get("id"):
+ toolCallsAccum[idx]["id"] = tcDelta["id"]
+ fn = tcDelta.get("function", {})
+ if fn.get("name"):
+ toolCallsAccum[idx]["function"]["name"] = fn["name"]
+ if fn.get("arguments"):
+ toolCallsAccum[idx]["function"]["arguments"] += fn["arguments"]
+
+ metadata: Dict[str, Any] = {}
+ if toolCallsAccum:
+ metadata["toolCalls"] = [toolCallsAccum[i] for i in sorted(toolCallsAccum)]
+
yield AiModelResponse(
content=fullContent,
success=True,
modelId=model.name,
- metadata={},
+ metadata=metadata,
)
except (RateLimitExceededException, ContextLengthExceededException, HTTPException):
diff --git a/modules/serviceCenter/services/serviceAgent/agentLoop.py b/modules/serviceCenter/services/serviceAgent/agentLoop.py
index 5ece40df..1cf74152 100644
--- a/modules/serviceCenter/services/serviceAgent/agentLoop.py
+++ b/modules/serviceCenter/services/serviceAgent/agentLoop.py
@@ -65,7 +65,12 @@ async def runAgentLoop(
tools = toolRegistry.getTools()
toolDefinitions = toolRegistry.formatToolsForFunctionCalling()
- toolsText = toolRegistry.formatToolsForPrompt()
+
+ # Text-based tool descriptions are ONLY used as fallback when native function
+ # calling is unavailable. Including both creates conflicting instructions
+ # (text ```tool_call format vs native tool_use blocks) and can cause the model
+ # to respond with plain text instead of actual tool calls.
+ toolsText = "" if toolDefinitions else toolRegistry.formatToolsForPrompt()
systemPrompt = buildSystemPrompt(tools, toolsText, userLanguage=userLanguage)
conversation = ConversationManager(systemPrompt)
@@ -193,6 +198,29 @@ async def runAgentLoop(
toolCalls = _parseToolCalls(aiResponse)
textContent = _extractTextContent(aiResponse)
+ logger.debug(
+ f"Round {state.currentRound} AI response: model={aiResponse.modelName}, "
+ f"toolCalls={len(toolCalls)}, nativeToolCalls={'yes' if aiResponse.toolCalls else 'no'}, "
+ f"contentLen={len(aiResponse.content)}, streamedLen={len(streamedText)}"
+ )
+
+ # Empty response (no content, no tool calls) = model returned nothing useful.
+ # Burn the round but let the loop continue so the next iteration can retry
+ # (the failover mechanism in the AI layer will try alternative models).
+ if not toolCalls and not textContent and not streamedText:
+ logger.warning(
+ f"Round {state.currentRound}: AI returned empty response "
+ f"(model={aiResponse.modelName}). Retrying next round."
+ )
+ conversation.addUserMessage(
+ "Your previous response was empty. Please use the available tools "
+ "to accomplish the task. Start by planning the steps, then call the "
+ "appropriate tools."
+ )
+ roundLog.durationMs = int((time.time() - roundStartTime) * 1000)
+ trace.rounds.append(roundLog)
+ continue
+
if textContent and not streamedText:
yield AgentEvent(type=AgentEventTypeEnum.MESSAGE, content=textContent)