llm failover enhanced
This commit is contained in:
parent
b876f01da2
commit
d63a41fbc8
3 changed files with 93 additions and 9 deletions
|
|
@ -4,7 +4,7 @@ import json
|
|||
import logging
|
||||
import httpx
|
||||
import os
|
||||
from typing import Dict, Any, List, AsyncGenerator, Union
|
||||
from typing import Dict, Any, List, AsyncGenerator, Optional, Union
|
||||
from fastapi import HTTPException
|
||||
from modules.shared.configuration import APP_CONFIG
|
||||
from .aicoreBase import BaseConnectorAi, RateLimitExceededException
|
||||
|
|
@ -295,6 +295,7 @@ class AiAnthropic(BaseConnectorAi):
|
|||
fullContent = ""
|
||||
toolUseBlocks: Dict[int, Dict[str, Any]] = {}
|
||||
currentToolIdx = -1
|
||||
stopReason: Optional[str] = None
|
||||
|
||||
async with self.httpClient.stream("POST", model.apiUrl, json=payload) as response:
|
||||
if response.status_code != 200:
|
||||
|
|
@ -316,7 +317,16 @@ class AiAnthropic(BaseConnectorAi):
|
|||
|
||||
eventType = event.get("type", "")
|
||||
|
||||
if eventType == "content_block_start":
|
||||
if eventType == "error":
|
||||
errDetail = event.get("error", {})
|
||||
errMsg = errDetail.get("message", str(errDetail))
|
||||
errType = errDetail.get("type", "unknown")
|
||||
logger.error(f"Anthropic stream error event: type={errType}, message={errMsg}")
|
||||
if "overloaded" in errMsg.lower() or "overloaded" in errType.lower():
|
||||
raise HTTPException(status_code=500, detail=f"Anthropic API is currently overloaded. Please try again in a few minutes.")
|
||||
raise HTTPException(status_code=500, detail=f"Anthropic stream error: [{errType}] {errMsg}")
|
||||
|
||||
elif eventType == "content_block_start":
|
||||
block = event.get("content_block", {})
|
||||
idx = event.get("index", 0)
|
||||
if block.get("type") == "tool_use":
|
||||
|
|
@ -338,10 +348,22 @@ class AiAnthropic(BaseConnectorAi):
|
|||
if idx in toolUseBlocks:
|
||||
toolUseBlocks[idx]["arguments"] += delta.get("partial_json", "")
|
||||
|
||||
elif eventType == "message_delta":
|
||||
delta = event.get("delta", {})
|
||||
stopReason = delta.get("stop_reason", stopReason)
|
||||
|
||||
elif eventType == "message_stop":
|
||||
break
|
||||
|
||||
if not fullContent and not toolUseBlocks:
|
||||
logger.warning(
|
||||
f"Anthropic stream returned empty response: model={model.name}, "
|
||||
f"stopReason={stopReason}"
|
||||
)
|
||||
|
||||
metadata: Dict[str, Any] = {}
|
||||
if stopReason:
|
||||
metadata["stopReason"] = stopReason
|
||||
if toolUseBlocks:
|
||||
metadata["toolCalls"] = [
|
||||
{
|
||||
|
|
|
|||
|
|
@ -174,7 +174,11 @@ class AiMistral(BaseConnectorAi):
|
|||
"temperature": temperature,
|
||||
"max_tokens": maxTokens
|
||||
}
|
||||
|
||||
|
||||
if modelCall.tools:
|
||||
payload["tools"] = modelCall.tools
|
||||
payload["tool_choice"] = modelCall.toolChoice or "auto"
|
||||
|
||||
response = await self.httpClient.post(
|
||||
model.apiUrl,
|
||||
json=payload
|
||||
|
|
@ -214,15 +218,20 @@ class AiMistral(BaseConnectorAi):
|
|||
raise HTTPException(status_code=500, detail=error_message)
|
||||
|
||||
responseJson = response.json()
|
||||
content = responseJson["choices"][0]["message"]["content"]
|
||||
|
||||
choiceMessage = responseJson["choices"][0]["message"]
|
||||
content = choiceMessage.get("content") or ""
|
||||
|
||||
metadata = {"response_id": responseJson.get("id", "")}
|
||||
if choiceMessage.get("tool_calls"):
|
||||
metadata["toolCalls"] = choiceMessage["tool_calls"]
|
||||
|
||||
return AiModelResponse(
|
||||
content=content,
|
||||
success=True,
|
||||
modelId=model.name,
|
||||
metadata={"response_id": responseJson.get("id", "")}
|
||||
metadata=metadata,
|
||||
)
|
||||
|
||||
|
||||
except ContextLengthExceededException:
|
||||
# Re-raise context length exceptions without wrapping
|
||||
raise
|
||||
|
|
@ -250,7 +259,12 @@ class AiMistral(BaseConnectorAi):
|
|||
"stream": True,
|
||||
}
|
||||
|
||||
if modelCall.tools:
|
||||
payload["tools"] = modelCall.tools
|
||||
payload["tool_choice"] = modelCall.toolChoice or "auto"
|
||||
|
||||
fullContent = ""
|
||||
toolCallsAccum: Dict[int, Dict[str, Any]] = {}
|
||||
|
||||
async with self.httpClient.stream("POST", model.apiUrl, json=payload) as response:
|
||||
if response.status_code != 200:
|
||||
|
|
@ -280,11 +294,31 @@ class AiMistral(BaseConnectorAi):
|
|||
fullContent += delta["content"]
|
||||
yield delta["content"]
|
||||
|
||||
for tcDelta in delta.get("tool_calls", []):
|
||||
idx = tcDelta.get("index", 0)
|
||||
if idx not in toolCallsAccum:
|
||||
toolCallsAccum[idx] = {
|
||||
"id": tcDelta.get("id", ""),
|
||||
"type": "function",
|
||||
"function": {"name": "", "arguments": ""},
|
||||
}
|
||||
if tcDelta.get("id"):
|
||||
toolCallsAccum[idx]["id"] = tcDelta["id"]
|
||||
fn = tcDelta.get("function", {})
|
||||
if fn.get("name"):
|
||||
toolCallsAccum[idx]["function"]["name"] = fn["name"]
|
||||
if fn.get("arguments"):
|
||||
toolCallsAccum[idx]["function"]["arguments"] += fn["arguments"]
|
||||
|
||||
metadata: Dict[str, Any] = {}
|
||||
if toolCallsAccum:
|
||||
metadata["toolCalls"] = [toolCallsAccum[i] for i in sorted(toolCallsAccum)]
|
||||
|
||||
yield AiModelResponse(
|
||||
content=fullContent,
|
||||
success=True,
|
||||
modelId=model.name,
|
||||
metadata={},
|
||||
metadata=metadata,
|
||||
)
|
||||
|
||||
except (RateLimitExceededException, ContextLengthExceededException, HTTPException):
|
||||
|
|
|
|||
|
|
@ -65,7 +65,12 @@ async def runAgentLoop(
|
|||
|
||||
tools = toolRegistry.getTools()
|
||||
toolDefinitions = toolRegistry.formatToolsForFunctionCalling()
|
||||
toolsText = toolRegistry.formatToolsForPrompt()
|
||||
|
||||
# Text-based tool descriptions are ONLY used as fallback when native function
|
||||
# calling is unavailable. Including both creates conflicting instructions
|
||||
# (text ```tool_call format vs native tool_use blocks) and can cause the model
|
||||
# to respond with plain text instead of actual tool calls.
|
||||
toolsText = "" if toolDefinitions else toolRegistry.formatToolsForPrompt()
|
||||
|
||||
systemPrompt = buildSystemPrompt(tools, toolsText, userLanguage=userLanguage)
|
||||
conversation = ConversationManager(systemPrompt)
|
||||
|
|
@ -193,6 +198,29 @@ async def runAgentLoop(
|
|||
toolCalls = _parseToolCalls(aiResponse)
|
||||
textContent = _extractTextContent(aiResponse)
|
||||
|
||||
logger.debug(
|
||||
f"Round {state.currentRound} AI response: model={aiResponse.modelName}, "
|
||||
f"toolCalls={len(toolCalls)}, nativeToolCalls={'yes' if aiResponse.toolCalls else 'no'}, "
|
||||
f"contentLen={len(aiResponse.content)}, streamedLen={len(streamedText)}"
|
||||
)
|
||||
|
||||
# Empty response (no content, no tool calls) = model returned nothing useful.
|
||||
# Burn the round but let the loop continue so the next iteration can retry
|
||||
# (the failover mechanism in the AI layer will try alternative models).
|
||||
if not toolCalls and not textContent and not streamedText:
|
||||
logger.warning(
|
||||
f"Round {state.currentRound}: AI returned empty response "
|
||||
f"(model={aiResponse.modelName}). Retrying next round."
|
||||
)
|
||||
conversation.addUserMessage(
|
||||
"Your previous response was empty. Please use the available tools "
|
||||
"to accomplish the task. Start by planning the steps, then call the "
|
||||
"appropriate tools."
|
||||
)
|
||||
roundLog.durationMs = int((time.time() - roundStartTime) * 1000)
|
||||
trace.rounds.append(roundLog)
|
||||
continue
|
||||
|
||||
if textContent and not streamedText:
|
||||
yield AgentEvent(type=AgentEventTypeEnum.MESSAGE, content=textContent)
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue