From d63a41fbc832896d39c249f6d336d6dd250f3581 Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Wed, 18 Mar 2026 13:57:01 +0100 Subject: [PATCH] llm failover enhanced --- modules/aicore/aicorePluginAnthropic.py | 26 ++++++++++- modules/aicore/aicorePluginMistral.py | 46 ++++++++++++++++--- .../services/serviceAgent/agentLoop.py | 30 +++++++++++- 3 files changed, 93 insertions(+), 9 deletions(-) diff --git a/modules/aicore/aicorePluginAnthropic.py b/modules/aicore/aicorePluginAnthropic.py index 8b6ec197..81a2175e 100644 --- a/modules/aicore/aicorePluginAnthropic.py +++ b/modules/aicore/aicorePluginAnthropic.py @@ -4,7 +4,7 @@ import json import logging import httpx import os -from typing import Dict, Any, List, AsyncGenerator, Union +from typing import Dict, Any, List, AsyncGenerator, Optional, Union from fastapi import HTTPException from modules.shared.configuration import APP_CONFIG from .aicoreBase import BaseConnectorAi, RateLimitExceededException @@ -295,6 +295,7 @@ class AiAnthropic(BaseConnectorAi): fullContent = "" toolUseBlocks: Dict[int, Dict[str, Any]] = {} currentToolIdx = -1 + stopReason: Optional[str] = None async with self.httpClient.stream("POST", model.apiUrl, json=payload) as response: if response.status_code != 200: @@ -316,7 +317,16 @@ class AiAnthropic(BaseConnectorAi): eventType = event.get("type", "") - if eventType == "content_block_start": + if eventType == "error": + errDetail = event.get("error", {}) + errMsg = errDetail.get("message", str(errDetail)) + errType = errDetail.get("type", "unknown") + logger.error(f"Anthropic stream error event: type={errType}, message={errMsg}") + if "overloaded" in errMsg.lower() or "overloaded" in errType.lower(): + raise HTTPException(status_code=500, detail=f"Anthropic API is currently overloaded. Please try again in a few minutes.") + raise HTTPException(status_code=500, detail=f"Anthropic stream error: [{errType}] {errMsg}") + + elif eventType == "content_block_start": block = event.get("content_block", {}) idx = event.get("index", 0) if block.get("type") == "tool_use": @@ -338,10 +348,22 @@ class AiAnthropic(BaseConnectorAi): if idx in toolUseBlocks: toolUseBlocks[idx]["arguments"] += delta.get("partial_json", "") + elif eventType == "message_delta": + delta = event.get("delta", {}) + stopReason = delta.get("stop_reason", stopReason) + elif eventType == "message_stop": break + if not fullContent and not toolUseBlocks: + logger.warning( + f"Anthropic stream returned empty response: model={model.name}, " + f"stopReason={stopReason}" + ) + metadata: Dict[str, Any] = {} + if stopReason: + metadata["stopReason"] = stopReason if toolUseBlocks: metadata["toolCalls"] = [ { diff --git a/modules/aicore/aicorePluginMistral.py b/modules/aicore/aicorePluginMistral.py index 8c4fb6d9..885addcf 100644 --- a/modules/aicore/aicorePluginMistral.py +++ b/modules/aicore/aicorePluginMistral.py @@ -174,7 +174,11 @@ class AiMistral(BaseConnectorAi): "temperature": temperature, "max_tokens": maxTokens } - + + if modelCall.tools: + payload["tools"] = modelCall.tools + payload["tool_choice"] = modelCall.toolChoice or "auto" + response = await self.httpClient.post( model.apiUrl, json=payload @@ -214,15 +218,20 @@ class AiMistral(BaseConnectorAi): raise HTTPException(status_code=500, detail=error_message) responseJson = response.json() - content = responseJson["choices"][0]["message"]["content"] - + choiceMessage = responseJson["choices"][0]["message"] + content = choiceMessage.get("content") or "" + + metadata = {"response_id": responseJson.get("id", "")} + if choiceMessage.get("tool_calls"): + metadata["toolCalls"] = choiceMessage["tool_calls"] + return AiModelResponse( content=content, success=True, modelId=model.name, - metadata={"response_id": responseJson.get("id", "")} + metadata=metadata, ) - + except ContextLengthExceededException: # Re-raise context length exceptions without wrapping raise @@ -250,7 +259,12 @@ class AiMistral(BaseConnectorAi): "stream": True, } + if modelCall.tools: + payload["tools"] = modelCall.tools + payload["tool_choice"] = modelCall.toolChoice or "auto" + fullContent = "" + toolCallsAccum: Dict[int, Dict[str, Any]] = {} async with self.httpClient.stream("POST", model.apiUrl, json=payload) as response: if response.status_code != 200: @@ -280,11 +294,31 @@ class AiMistral(BaseConnectorAi): fullContent += delta["content"] yield delta["content"] + for tcDelta in delta.get("tool_calls", []): + idx = tcDelta.get("index", 0) + if idx not in toolCallsAccum: + toolCallsAccum[idx] = { + "id": tcDelta.get("id", ""), + "type": "function", + "function": {"name": "", "arguments": ""}, + } + if tcDelta.get("id"): + toolCallsAccum[idx]["id"] = tcDelta["id"] + fn = tcDelta.get("function", {}) + if fn.get("name"): + toolCallsAccum[idx]["function"]["name"] = fn["name"] + if fn.get("arguments"): + toolCallsAccum[idx]["function"]["arguments"] += fn["arguments"] + + metadata: Dict[str, Any] = {} + if toolCallsAccum: + metadata["toolCalls"] = [toolCallsAccum[i] for i in sorted(toolCallsAccum)] + yield AiModelResponse( content=fullContent, success=True, modelId=model.name, - metadata={}, + metadata=metadata, ) except (RateLimitExceededException, ContextLengthExceededException, HTTPException): diff --git a/modules/serviceCenter/services/serviceAgent/agentLoop.py b/modules/serviceCenter/services/serviceAgent/agentLoop.py index 5ece40df..1cf74152 100644 --- a/modules/serviceCenter/services/serviceAgent/agentLoop.py +++ b/modules/serviceCenter/services/serviceAgent/agentLoop.py @@ -65,7 +65,12 @@ async def runAgentLoop( tools = toolRegistry.getTools() toolDefinitions = toolRegistry.formatToolsForFunctionCalling() - toolsText = toolRegistry.formatToolsForPrompt() + + # Text-based tool descriptions are ONLY used as fallback when native function + # calling is unavailable. Including both creates conflicting instructions + # (text ```tool_call format vs native tool_use blocks) and can cause the model + # to respond with plain text instead of actual tool calls. + toolsText = "" if toolDefinitions else toolRegistry.formatToolsForPrompt() systemPrompt = buildSystemPrompt(tools, toolsText, userLanguage=userLanguage) conversation = ConversationManager(systemPrompt) @@ -193,6 +198,29 @@ async def runAgentLoop( toolCalls = _parseToolCalls(aiResponse) textContent = _extractTextContent(aiResponse) + logger.debug( + f"Round {state.currentRound} AI response: model={aiResponse.modelName}, " + f"toolCalls={len(toolCalls)}, nativeToolCalls={'yes' if aiResponse.toolCalls else 'no'}, " + f"contentLen={len(aiResponse.content)}, streamedLen={len(streamedText)}" + ) + + # Empty response (no content, no tool calls) = model returned nothing useful. + # Burn the round but let the loop continue so the next iteration can retry + # (the failover mechanism in the AI layer will try alternative models). + if not toolCalls and not textContent and not streamedText: + logger.warning( + f"Round {state.currentRound}: AI returned empty response " + f"(model={aiResponse.modelName}). Retrying next round." + ) + conversation.addUserMessage( + "Your previous response was empty. Please use the available tools " + "to accomplish the task. Start by planning the steps, then call the " + "appropriate tools." + ) + roundLog.durationMs = int((time.time() - roundStartTime) * 1000) + trace.rounds.append(roundLog) + continue + if textContent and not streamedText: yield AgentEvent(type=AgentEventTypeEnum.MESSAGE, content=textContent)