llm failover enhanced
This commit is contained in:
parent
b876f01da2
commit
d63a41fbc8
3 changed files with 93 additions and 9 deletions
|
|
@ -4,7 +4,7 @@ import json
|
||||||
import logging
|
import logging
|
||||||
import httpx
|
import httpx
|
||||||
import os
|
import os
|
||||||
from typing import Dict, Any, List, AsyncGenerator, Union
|
from typing import Dict, Any, List, AsyncGenerator, Optional, Union
|
||||||
from fastapi import HTTPException
|
from fastapi import HTTPException
|
||||||
from modules.shared.configuration import APP_CONFIG
|
from modules.shared.configuration import APP_CONFIG
|
||||||
from .aicoreBase import BaseConnectorAi, RateLimitExceededException
|
from .aicoreBase import BaseConnectorAi, RateLimitExceededException
|
||||||
|
|
@ -295,6 +295,7 @@ class AiAnthropic(BaseConnectorAi):
|
||||||
fullContent = ""
|
fullContent = ""
|
||||||
toolUseBlocks: Dict[int, Dict[str, Any]] = {}
|
toolUseBlocks: Dict[int, Dict[str, Any]] = {}
|
||||||
currentToolIdx = -1
|
currentToolIdx = -1
|
||||||
|
stopReason: Optional[str] = None
|
||||||
|
|
||||||
async with self.httpClient.stream("POST", model.apiUrl, json=payload) as response:
|
async with self.httpClient.stream("POST", model.apiUrl, json=payload) as response:
|
||||||
if response.status_code != 200:
|
if response.status_code != 200:
|
||||||
|
|
@ -316,7 +317,16 @@ class AiAnthropic(BaseConnectorAi):
|
||||||
|
|
||||||
eventType = event.get("type", "")
|
eventType = event.get("type", "")
|
||||||
|
|
||||||
if eventType == "content_block_start":
|
if eventType == "error":
|
||||||
|
errDetail = event.get("error", {})
|
||||||
|
errMsg = errDetail.get("message", str(errDetail))
|
||||||
|
errType = errDetail.get("type", "unknown")
|
||||||
|
logger.error(f"Anthropic stream error event: type={errType}, message={errMsg}")
|
||||||
|
if "overloaded" in errMsg.lower() or "overloaded" in errType.lower():
|
||||||
|
raise HTTPException(status_code=500, detail=f"Anthropic API is currently overloaded. Please try again in a few minutes.")
|
||||||
|
raise HTTPException(status_code=500, detail=f"Anthropic stream error: [{errType}] {errMsg}")
|
||||||
|
|
||||||
|
elif eventType == "content_block_start":
|
||||||
block = event.get("content_block", {})
|
block = event.get("content_block", {})
|
||||||
idx = event.get("index", 0)
|
idx = event.get("index", 0)
|
||||||
if block.get("type") == "tool_use":
|
if block.get("type") == "tool_use":
|
||||||
|
|
@ -338,10 +348,22 @@ class AiAnthropic(BaseConnectorAi):
|
||||||
if idx in toolUseBlocks:
|
if idx in toolUseBlocks:
|
||||||
toolUseBlocks[idx]["arguments"] += delta.get("partial_json", "")
|
toolUseBlocks[idx]["arguments"] += delta.get("partial_json", "")
|
||||||
|
|
||||||
|
elif eventType == "message_delta":
|
||||||
|
delta = event.get("delta", {})
|
||||||
|
stopReason = delta.get("stop_reason", stopReason)
|
||||||
|
|
||||||
elif eventType == "message_stop":
|
elif eventType == "message_stop":
|
||||||
break
|
break
|
||||||
|
|
||||||
|
if not fullContent and not toolUseBlocks:
|
||||||
|
logger.warning(
|
||||||
|
f"Anthropic stream returned empty response: model={model.name}, "
|
||||||
|
f"stopReason={stopReason}"
|
||||||
|
)
|
||||||
|
|
||||||
metadata: Dict[str, Any] = {}
|
metadata: Dict[str, Any] = {}
|
||||||
|
if stopReason:
|
||||||
|
metadata["stopReason"] = stopReason
|
||||||
if toolUseBlocks:
|
if toolUseBlocks:
|
||||||
metadata["toolCalls"] = [
|
metadata["toolCalls"] = [
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -175,6 +175,10 @@ class AiMistral(BaseConnectorAi):
|
||||||
"max_tokens": maxTokens
|
"max_tokens": maxTokens
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if modelCall.tools:
|
||||||
|
payload["tools"] = modelCall.tools
|
||||||
|
payload["tool_choice"] = modelCall.toolChoice or "auto"
|
||||||
|
|
||||||
response = await self.httpClient.post(
|
response = await self.httpClient.post(
|
||||||
model.apiUrl,
|
model.apiUrl,
|
||||||
json=payload
|
json=payload
|
||||||
|
|
@ -214,13 +218,18 @@ class AiMistral(BaseConnectorAi):
|
||||||
raise HTTPException(status_code=500, detail=error_message)
|
raise HTTPException(status_code=500, detail=error_message)
|
||||||
|
|
||||||
responseJson = response.json()
|
responseJson = response.json()
|
||||||
content = responseJson["choices"][0]["message"]["content"]
|
choiceMessage = responseJson["choices"][0]["message"]
|
||||||
|
content = choiceMessage.get("content") or ""
|
||||||
|
|
||||||
|
metadata = {"response_id": responseJson.get("id", "")}
|
||||||
|
if choiceMessage.get("tool_calls"):
|
||||||
|
metadata["toolCalls"] = choiceMessage["tool_calls"]
|
||||||
|
|
||||||
return AiModelResponse(
|
return AiModelResponse(
|
||||||
content=content,
|
content=content,
|
||||||
success=True,
|
success=True,
|
||||||
modelId=model.name,
|
modelId=model.name,
|
||||||
metadata={"response_id": responseJson.get("id", "")}
|
metadata=metadata,
|
||||||
)
|
)
|
||||||
|
|
||||||
except ContextLengthExceededException:
|
except ContextLengthExceededException:
|
||||||
|
|
@ -250,7 +259,12 @@ class AiMistral(BaseConnectorAi):
|
||||||
"stream": True,
|
"stream": True,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if modelCall.tools:
|
||||||
|
payload["tools"] = modelCall.tools
|
||||||
|
payload["tool_choice"] = modelCall.toolChoice or "auto"
|
||||||
|
|
||||||
fullContent = ""
|
fullContent = ""
|
||||||
|
toolCallsAccum: Dict[int, Dict[str, Any]] = {}
|
||||||
|
|
||||||
async with self.httpClient.stream("POST", model.apiUrl, json=payload) as response:
|
async with self.httpClient.stream("POST", model.apiUrl, json=payload) as response:
|
||||||
if response.status_code != 200:
|
if response.status_code != 200:
|
||||||
|
|
@ -280,11 +294,31 @@ class AiMistral(BaseConnectorAi):
|
||||||
fullContent += delta["content"]
|
fullContent += delta["content"]
|
||||||
yield delta["content"]
|
yield delta["content"]
|
||||||
|
|
||||||
|
for tcDelta in delta.get("tool_calls", []):
|
||||||
|
idx = tcDelta.get("index", 0)
|
||||||
|
if idx not in toolCallsAccum:
|
||||||
|
toolCallsAccum[idx] = {
|
||||||
|
"id": tcDelta.get("id", ""),
|
||||||
|
"type": "function",
|
||||||
|
"function": {"name": "", "arguments": ""},
|
||||||
|
}
|
||||||
|
if tcDelta.get("id"):
|
||||||
|
toolCallsAccum[idx]["id"] = tcDelta["id"]
|
||||||
|
fn = tcDelta.get("function", {})
|
||||||
|
if fn.get("name"):
|
||||||
|
toolCallsAccum[idx]["function"]["name"] = fn["name"]
|
||||||
|
if fn.get("arguments"):
|
||||||
|
toolCallsAccum[idx]["function"]["arguments"] += fn["arguments"]
|
||||||
|
|
||||||
|
metadata: Dict[str, Any] = {}
|
||||||
|
if toolCallsAccum:
|
||||||
|
metadata["toolCalls"] = [toolCallsAccum[i] for i in sorted(toolCallsAccum)]
|
||||||
|
|
||||||
yield AiModelResponse(
|
yield AiModelResponse(
|
||||||
content=fullContent,
|
content=fullContent,
|
||||||
success=True,
|
success=True,
|
||||||
modelId=model.name,
|
modelId=model.name,
|
||||||
metadata={},
|
metadata=metadata,
|
||||||
)
|
)
|
||||||
|
|
||||||
except (RateLimitExceededException, ContextLengthExceededException, HTTPException):
|
except (RateLimitExceededException, ContextLengthExceededException, HTTPException):
|
||||||
|
|
|
||||||
|
|
@ -65,7 +65,12 @@ async def runAgentLoop(
|
||||||
|
|
||||||
tools = toolRegistry.getTools()
|
tools = toolRegistry.getTools()
|
||||||
toolDefinitions = toolRegistry.formatToolsForFunctionCalling()
|
toolDefinitions = toolRegistry.formatToolsForFunctionCalling()
|
||||||
toolsText = toolRegistry.formatToolsForPrompt()
|
|
||||||
|
# Text-based tool descriptions are ONLY used as fallback when native function
|
||||||
|
# calling is unavailable. Including both creates conflicting instructions
|
||||||
|
# (text ```tool_call format vs native tool_use blocks) and can cause the model
|
||||||
|
# to respond with plain text instead of actual tool calls.
|
||||||
|
toolsText = "" if toolDefinitions else toolRegistry.formatToolsForPrompt()
|
||||||
|
|
||||||
systemPrompt = buildSystemPrompt(tools, toolsText, userLanguage=userLanguage)
|
systemPrompt = buildSystemPrompt(tools, toolsText, userLanguage=userLanguage)
|
||||||
conversation = ConversationManager(systemPrompt)
|
conversation = ConversationManager(systemPrompt)
|
||||||
|
|
@ -193,6 +198,29 @@ async def runAgentLoop(
|
||||||
toolCalls = _parseToolCalls(aiResponse)
|
toolCalls = _parseToolCalls(aiResponse)
|
||||||
textContent = _extractTextContent(aiResponse)
|
textContent = _extractTextContent(aiResponse)
|
||||||
|
|
||||||
|
logger.debug(
|
||||||
|
f"Round {state.currentRound} AI response: model={aiResponse.modelName}, "
|
||||||
|
f"toolCalls={len(toolCalls)}, nativeToolCalls={'yes' if aiResponse.toolCalls else 'no'}, "
|
||||||
|
f"contentLen={len(aiResponse.content)}, streamedLen={len(streamedText)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Empty response (no content, no tool calls) = model returned nothing useful.
|
||||||
|
# Burn the round but let the loop continue so the next iteration can retry
|
||||||
|
# (the failover mechanism in the AI layer will try alternative models).
|
||||||
|
if not toolCalls and not textContent and not streamedText:
|
||||||
|
logger.warning(
|
||||||
|
f"Round {state.currentRound}: AI returned empty response "
|
||||||
|
f"(model={aiResponse.modelName}). Retrying next round."
|
||||||
|
)
|
||||||
|
conversation.addUserMessage(
|
||||||
|
"Your previous response was empty. Please use the available tools "
|
||||||
|
"to accomplish the task. Start by planning the steps, then call the "
|
||||||
|
"appropriate tools."
|
||||||
|
)
|
||||||
|
roundLog.durationMs = int((time.time() - roundStartTime) * 1000)
|
||||||
|
trace.rounds.append(roundLog)
|
||||||
|
continue
|
||||||
|
|
||||||
if textContent and not streamedText:
|
if textContent and not streamedText:
|
||||||
yield AgentEvent(type=AgentEventTypeEnum.MESSAGE, content=textContent)
|
yield AgentEvent(type=AgentEventTypeEnum.MESSAGE, content=textContent)
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue