llm failover enhanced

This commit is contained in:
ValueOn AG 2026-03-18 13:57:01 +01:00
parent b876f01da2
commit d63a41fbc8
3 changed files with 93 additions and 9 deletions

View file

@ -4,7 +4,7 @@ import json
import logging
import httpx
import os
from typing import Dict, Any, List, AsyncGenerator, Union
from typing import Dict, Any, List, AsyncGenerator, Optional, Union
from fastapi import HTTPException
from modules.shared.configuration import APP_CONFIG
from .aicoreBase import BaseConnectorAi, RateLimitExceededException
@ -295,6 +295,7 @@ class AiAnthropic(BaseConnectorAi):
fullContent = ""
toolUseBlocks: Dict[int, Dict[str, Any]] = {}
currentToolIdx = -1
stopReason: Optional[str] = None
async with self.httpClient.stream("POST", model.apiUrl, json=payload) as response:
if response.status_code != 200:
@ -316,7 +317,16 @@ class AiAnthropic(BaseConnectorAi):
eventType = event.get("type", "")
if eventType == "content_block_start":
if eventType == "error":
errDetail = event.get("error", {})
errMsg = errDetail.get("message", str(errDetail))
errType = errDetail.get("type", "unknown")
logger.error(f"Anthropic stream error event: type={errType}, message={errMsg}")
if "overloaded" in errMsg.lower() or "overloaded" in errType.lower():
raise HTTPException(status_code=500, detail=f"Anthropic API is currently overloaded. Please try again in a few minutes.")
raise HTTPException(status_code=500, detail=f"Anthropic stream error: [{errType}] {errMsg}")
elif eventType == "content_block_start":
block = event.get("content_block", {})
idx = event.get("index", 0)
if block.get("type") == "tool_use":
@ -338,10 +348,22 @@ class AiAnthropic(BaseConnectorAi):
if idx in toolUseBlocks:
toolUseBlocks[idx]["arguments"] += delta.get("partial_json", "")
elif eventType == "message_delta":
delta = event.get("delta", {})
stopReason = delta.get("stop_reason", stopReason)
elif eventType == "message_stop":
break
if not fullContent and not toolUseBlocks:
logger.warning(
f"Anthropic stream returned empty response: model={model.name}, "
f"stopReason={stopReason}"
)
metadata: Dict[str, Any] = {}
if stopReason:
metadata["stopReason"] = stopReason
if toolUseBlocks:
metadata["toolCalls"] = [
{

View file

@ -175,6 +175,10 @@ class AiMistral(BaseConnectorAi):
"max_tokens": maxTokens
}
if modelCall.tools:
payload["tools"] = modelCall.tools
payload["tool_choice"] = modelCall.toolChoice or "auto"
response = await self.httpClient.post(
model.apiUrl,
json=payload
@ -214,13 +218,18 @@ class AiMistral(BaseConnectorAi):
raise HTTPException(status_code=500, detail=error_message)
responseJson = response.json()
content = responseJson["choices"][0]["message"]["content"]
choiceMessage = responseJson["choices"][0]["message"]
content = choiceMessage.get("content") or ""
metadata = {"response_id": responseJson.get("id", "")}
if choiceMessage.get("tool_calls"):
metadata["toolCalls"] = choiceMessage["tool_calls"]
return AiModelResponse(
content=content,
success=True,
modelId=model.name,
metadata={"response_id": responseJson.get("id", "")}
metadata=metadata,
)
except ContextLengthExceededException:
@ -250,7 +259,12 @@ class AiMistral(BaseConnectorAi):
"stream": True,
}
if modelCall.tools:
payload["tools"] = modelCall.tools
payload["tool_choice"] = modelCall.toolChoice or "auto"
fullContent = ""
toolCallsAccum: Dict[int, Dict[str, Any]] = {}
async with self.httpClient.stream("POST", model.apiUrl, json=payload) as response:
if response.status_code != 200:
@ -280,11 +294,31 @@ class AiMistral(BaseConnectorAi):
fullContent += delta["content"]
yield delta["content"]
for tcDelta in delta.get("tool_calls", []):
idx = tcDelta.get("index", 0)
if idx not in toolCallsAccum:
toolCallsAccum[idx] = {
"id": tcDelta.get("id", ""),
"type": "function",
"function": {"name": "", "arguments": ""},
}
if tcDelta.get("id"):
toolCallsAccum[idx]["id"] = tcDelta["id"]
fn = tcDelta.get("function", {})
if fn.get("name"):
toolCallsAccum[idx]["function"]["name"] = fn["name"]
if fn.get("arguments"):
toolCallsAccum[idx]["function"]["arguments"] += fn["arguments"]
metadata: Dict[str, Any] = {}
if toolCallsAccum:
metadata["toolCalls"] = [toolCallsAccum[i] for i in sorted(toolCallsAccum)]
yield AiModelResponse(
content=fullContent,
success=True,
modelId=model.name,
metadata={},
metadata=metadata,
)
except (RateLimitExceededException, ContextLengthExceededException, HTTPException):

View file

@ -65,7 +65,12 @@ async def runAgentLoop(
tools = toolRegistry.getTools()
toolDefinitions = toolRegistry.formatToolsForFunctionCalling()
toolsText = toolRegistry.formatToolsForPrompt()
# Text-based tool descriptions are ONLY used as fallback when native function
# calling is unavailable. Including both creates conflicting instructions
# (text ```tool_call format vs native tool_use blocks) and can cause the model
# to respond with plain text instead of actual tool calls.
toolsText = "" if toolDefinitions else toolRegistry.formatToolsForPrompt()
systemPrompt = buildSystemPrompt(tools, toolsText, userLanguage=userLanguage)
conversation = ConversationManager(systemPrompt)
@ -193,6 +198,29 @@ async def runAgentLoop(
toolCalls = _parseToolCalls(aiResponse)
textContent = _extractTextContent(aiResponse)
logger.debug(
f"Round {state.currentRound} AI response: model={aiResponse.modelName}, "
f"toolCalls={len(toolCalls)}, nativeToolCalls={'yes' if aiResponse.toolCalls else 'no'}, "
f"contentLen={len(aiResponse.content)}, streamedLen={len(streamedText)}"
)
# Empty response (no content, no tool calls) = model returned nothing useful.
# Burn the round but let the loop continue so the next iteration can retry
# (the failover mechanism in the AI layer will try alternative models).
if not toolCalls and not textContent and not streamedText:
logger.warning(
f"Round {state.currentRound}: AI returned empty response "
f"(model={aiResponse.modelName}). Retrying next round."
)
conversation.addUserMessage(
"Your previous response was empty. Please use the available tools "
"to accomplish the task. Start by planning the steps, then call the "
"appropriate tools."
)
roundLog.durationMs = int((time.time() - roundStartTime) * 1000)
trace.rounds.append(roundLog)
continue
if textContent and not streamedText:
yield AgentEvent(type=AgentEventTypeEnum.MESSAGE, content=textContent)