llm failover enhanced

This commit is contained in:
ValueOn AG 2026-03-18 13:57:01 +01:00
parent b876f01da2
commit d63a41fbc8
3 changed files with 93 additions and 9 deletions

View file

@ -4,7 +4,7 @@ import json
import logging import logging
import httpx import httpx
import os import os
from typing import Dict, Any, List, AsyncGenerator, Union from typing import Dict, Any, List, AsyncGenerator, Optional, Union
from fastapi import HTTPException from fastapi import HTTPException
from modules.shared.configuration import APP_CONFIG from modules.shared.configuration import APP_CONFIG
from .aicoreBase import BaseConnectorAi, RateLimitExceededException from .aicoreBase import BaseConnectorAi, RateLimitExceededException
@ -295,6 +295,7 @@ class AiAnthropic(BaseConnectorAi):
fullContent = "" fullContent = ""
toolUseBlocks: Dict[int, Dict[str, Any]] = {} toolUseBlocks: Dict[int, Dict[str, Any]] = {}
currentToolIdx = -1 currentToolIdx = -1
stopReason: Optional[str] = None
async with self.httpClient.stream("POST", model.apiUrl, json=payload) as response: async with self.httpClient.stream("POST", model.apiUrl, json=payload) as response:
if response.status_code != 200: if response.status_code != 200:
@ -316,7 +317,16 @@ class AiAnthropic(BaseConnectorAi):
eventType = event.get("type", "") eventType = event.get("type", "")
if eventType == "content_block_start": if eventType == "error":
errDetail = event.get("error", {})
errMsg = errDetail.get("message", str(errDetail))
errType = errDetail.get("type", "unknown")
logger.error(f"Anthropic stream error event: type={errType}, message={errMsg}")
if "overloaded" in errMsg.lower() or "overloaded" in errType.lower():
raise HTTPException(status_code=500, detail=f"Anthropic API is currently overloaded. Please try again in a few minutes.")
raise HTTPException(status_code=500, detail=f"Anthropic stream error: [{errType}] {errMsg}")
elif eventType == "content_block_start":
block = event.get("content_block", {}) block = event.get("content_block", {})
idx = event.get("index", 0) idx = event.get("index", 0)
if block.get("type") == "tool_use": if block.get("type") == "tool_use":
@ -338,10 +348,22 @@ class AiAnthropic(BaseConnectorAi):
if idx in toolUseBlocks: if idx in toolUseBlocks:
toolUseBlocks[idx]["arguments"] += delta.get("partial_json", "") toolUseBlocks[idx]["arguments"] += delta.get("partial_json", "")
elif eventType == "message_delta":
delta = event.get("delta", {})
stopReason = delta.get("stop_reason", stopReason)
elif eventType == "message_stop": elif eventType == "message_stop":
break break
if not fullContent and not toolUseBlocks:
logger.warning(
f"Anthropic stream returned empty response: model={model.name}, "
f"stopReason={stopReason}"
)
metadata: Dict[str, Any] = {} metadata: Dict[str, Any] = {}
if stopReason:
metadata["stopReason"] = stopReason
if toolUseBlocks: if toolUseBlocks:
metadata["toolCalls"] = [ metadata["toolCalls"] = [
{ {

View file

@ -174,7 +174,11 @@ class AiMistral(BaseConnectorAi):
"temperature": temperature, "temperature": temperature,
"max_tokens": maxTokens "max_tokens": maxTokens
} }
if modelCall.tools:
payload["tools"] = modelCall.tools
payload["tool_choice"] = modelCall.toolChoice or "auto"
response = await self.httpClient.post( response = await self.httpClient.post(
model.apiUrl, model.apiUrl,
json=payload json=payload
@ -214,15 +218,20 @@ class AiMistral(BaseConnectorAi):
raise HTTPException(status_code=500, detail=error_message) raise HTTPException(status_code=500, detail=error_message)
responseJson = response.json() responseJson = response.json()
content = responseJson["choices"][0]["message"]["content"] choiceMessage = responseJson["choices"][0]["message"]
content = choiceMessage.get("content") or ""
metadata = {"response_id": responseJson.get("id", "")}
if choiceMessage.get("tool_calls"):
metadata["toolCalls"] = choiceMessage["tool_calls"]
return AiModelResponse( return AiModelResponse(
content=content, content=content,
success=True, success=True,
modelId=model.name, modelId=model.name,
metadata={"response_id": responseJson.get("id", "")} metadata=metadata,
) )
except ContextLengthExceededException: except ContextLengthExceededException:
# Re-raise context length exceptions without wrapping # Re-raise context length exceptions without wrapping
raise raise
@ -250,7 +259,12 @@ class AiMistral(BaseConnectorAi):
"stream": True, "stream": True,
} }
if modelCall.tools:
payload["tools"] = modelCall.tools
payload["tool_choice"] = modelCall.toolChoice or "auto"
fullContent = "" fullContent = ""
toolCallsAccum: Dict[int, Dict[str, Any]] = {}
async with self.httpClient.stream("POST", model.apiUrl, json=payload) as response: async with self.httpClient.stream("POST", model.apiUrl, json=payload) as response:
if response.status_code != 200: if response.status_code != 200:
@ -280,11 +294,31 @@ class AiMistral(BaseConnectorAi):
fullContent += delta["content"] fullContent += delta["content"]
yield delta["content"] yield delta["content"]
for tcDelta in delta.get("tool_calls", []):
idx = tcDelta.get("index", 0)
if idx not in toolCallsAccum:
toolCallsAccum[idx] = {
"id": tcDelta.get("id", ""),
"type": "function",
"function": {"name": "", "arguments": ""},
}
if tcDelta.get("id"):
toolCallsAccum[idx]["id"] = tcDelta["id"]
fn = tcDelta.get("function", {})
if fn.get("name"):
toolCallsAccum[idx]["function"]["name"] = fn["name"]
if fn.get("arguments"):
toolCallsAccum[idx]["function"]["arguments"] += fn["arguments"]
metadata: Dict[str, Any] = {}
if toolCallsAccum:
metadata["toolCalls"] = [toolCallsAccum[i] for i in sorted(toolCallsAccum)]
yield AiModelResponse( yield AiModelResponse(
content=fullContent, content=fullContent,
success=True, success=True,
modelId=model.name, modelId=model.name,
metadata={}, metadata=metadata,
) )
except (RateLimitExceededException, ContextLengthExceededException, HTTPException): except (RateLimitExceededException, ContextLengthExceededException, HTTPException):

View file

@ -65,7 +65,12 @@ async def runAgentLoop(
tools = toolRegistry.getTools() tools = toolRegistry.getTools()
toolDefinitions = toolRegistry.formatToolsForFunctionCalling() toolDefinitions = toolRegistry.formatToolsForFunctionCalling()
toolsText = toolRegistry.formatToolsForPrompt()
# Text-based tool descriptions are ONLY used as fallback when native function
# calling is unavailable. Including both creates conflicting instructions
# (text ```tool_call format vs native tool_use blocks) and can cause the model
# to respond with plain text instead of actual tool calls.
toolsText = "" if toolDefinitions else toolRegistry.formatToolsForPrompt()
systemPrompt = buildSystemPrompt(tools, toolsText, userLanguage=userLanguage) systemPrompt = buildSystemPrompt(tools, toolsText, userLanguage=userLanguage)
conversation = ConversationManager(systemPrompt) conversation = ConversationManager(systemPrompt)
@ -193,6 +198,29 @@ async def runAgentLoop(
toolCalls = _parseToolCalls(aiResponse) toolCalls = _parseToolCalls(aiResponse)
textContent = _extractTextContent(aiResponse) textContent = _extractTextContent(aiResponse)
logger.debug(
f"Round {state.currentRound} AI response: model={aiResponse.modelName}, "
f"toolCalls={len(toolCalls)}, nativeToolCalls={'yes' if aiResponse.toolCalls else 'no'}, "
f"contentLen={len(aiResponse.content)}, streamedLen={len(streamedText)}"
)
# Empty response (no content, no tool calls) = model returned nothing useful.
# Burn the round but let the loop continue so the next iteration can retry
# (the failover mechanism in the AI layer will try alternative models).
if not toolCalls and not textContent and not streamedText:
logger.warning(
f"Round {state.currentRound}: AI returned empty response "
f"(model={aiResponse.modelName}). Retrying next round."
)
conversation.addUserMessage(
"Your previous response was empty. Please use the available tools "
"to accomplish the task. Start by planning the steps, then call the "
"appropriate tools."
)
roundLog.durationMs = int((time.time() - roundStartTime) * 1000)
trace.rounds.append(roundLog)
continue
if textContent and not streamedText: if textContent and not streamedText:
yield AgentEvent(type=AgentEventTypeEnum.MESSAGE, content=textContent) yield AgentEvent(type=AgentEventTypeEnum.MESSAGE, content=textContent)