llm failover enhanced

2026-03-18 13:57:01 +01:00 · 2026-03-18 13:57:01 +01:00 · d63a41fbc8
commit d63a41fbc8
parent b876f01da2
3 changed files with 93 additions and 9 deletions
--- a/modules/aicore/aicorePluginAnthropic.py
+++ b/modules/aicore/aicorePluginAnthropic.py
@ -4,7 +4,7 @@ import json
 import logging
 import httpx
 import os
-from typing import Dict, Any, List, AsyncGenerator, Union
+from typing import Dict, Any, List, AsyncGenerator, Optional, Union
 from fastapi import HTTPException
 from modules.shared.configuration import APP_CONFIG
 from .aicoreBase import BaseConnectorAi, RateLimitExceededException
@ -295,6 +295,7 @@ class AiAnthropic(BaseConnectorAi):
            fullContent = ""
            toolUseBlocks: Dict[int, Dict[str, Any]] = {}
            currentToolIdx = -1
            stopReason: Optional[str] = None
            async with self.httpClient.stream("POST", model.apiUrl, json=payload) as response:
                if response.status_code != 200:
@ -316,7 +317,16 @@ class AiAnthropic(BaseConnectorAi):
                    eventType = event.get("type", "")
-                    if eventType == "content_block_start":
+                    if eventType == "error":
                        errDetail = event.get("error", {})
                        errMsg = errDetail.get("message", str(errDetail))
                        errType = errDetail.get("type", "unknown")
                        logger.error(f"Anthropic stream error event: type={errType}, message={errMsg}")
                        if "overloaded" in errMsg.lower() or "overloaded" in errType.lower():
                            raise HTTPException(status_code=500, detail=f"Anthropic API is currently overloaded. Please try again in a few minutes.")
                        raise HTTPException(status_code=500, detail=f"Anthropic stream error: [{errType}] {errMsg}")
                    elif eventType == "content_block_start":
                        block = event.get("content_block", {})
                        idx = event.get("index", 0)
                        if block.get("type") == "tool_use":
@ -338,10 +348,22 @@ class AiAnthropic(BaseConnectorAi):
                            if idx in toolUseBlocks:
                                toolUseBlocks[idx]["arguments"] += delta.get("partial_json", "")
                    elif eventType == "message_delta":
                        delta = event.get("delta", {})
                        stopReason = delta.get("stop_reason", stopReason)
                    elif eventType == "message_stop":
                        break
            if not fullContent and not toolUseBlocks:
                logger.warning(
                    f"Anthropic stream returned empty response: model={model.name}, "
                    f"stopReason={stopReason}"
                )
            metadata: Dict[str, Any] = {}
            if stopReason:
                metadata["stopReason"] = stopReason
            if toolUseBlocks:
                metadata["toolCalls"] = [
                    {
--- a/modules/aicore/aicorePluginMistral.py
+++ b/modules/aicore/aicorePluginMistral.py
@ -175,6 +175,10 @@ class AiMistral(BaseConnectorAi):
                "max_tokens": maxTokens
            }
            if modelCall.tools:
                payload["tools"] = modelCall.tools
                payload["tool_choice"] = modelCall.toolChoice or "auto"
            response = await self.httpClient.post(
                model.apiUrl,
                json=payload
@ -214,13 +218,18 @@ class AiMistral(BaseConnectorAi):
                raise HTTPException(status_code=500, detail=error_message)
            responseJson = response.json()
-            content = responseJson["choices"][0]["message"]["content"]
+            choiceMessage = responseJson["choices"][0]["message"]
            content = choiceMessage.get("content") or ""
            metadata = {"response_id": responseJson.get("id", "")}
            if choiceMessage.get("tool_calls"):
                metadata["toolCalls"] = choiceMessage["tool_calls"]
            return AiModelResponse(
                content=content,
                success=True,
                modelId=model.name,
-                metadata={"response_id": responseJson.get("id", "")}
+                metadata=metadata,
            )
        except ContextLengthExceededException:
@ -250,7 +259,12 @@ class AiMistral(BaseConnectorAi):
                "stream": True,
            }
            if modelCall.tools:
                payload["tools"] = modelCall.tools
                payload["tool_choice"] = modelCall.toolChoice or "auto"
            fullContent = ""
            toolCallsAccum: Dict[int, Dict[str, Any]] = {}
            async with self.httpClient.stream("POST", model.apiUrl, json=payload) as response:
                if response.status_code != 200:
@ -280,11 +294,31 @@ class AiMistral(BaseConnectorAi):
                        fullContent += delta["content"]
                        yield delta["content"]
                    for tcDelta in delta.get("tool_calls", []):
                        idx = tcDelta.get("index", 0)
                        if idx not in toolCallsAccum:
                            toolCallsAccum[idx] = {
                                "id": tcDelta.get("id", ""),
                                "type": "function",
                                "function": {"name": "", "arguments": ""},
                            }
                        if tcDelta.get("id"):
                            toolCallsAccum[idx]["id"] = tcDelta["id"]
                        fn = tcDelta.get("function", {})
                        if fn.get("name"):
                            toolCallsAccum[idx]["function"]["name"] = fn["name"]
                        if fn.get("arguments"):
                            toolCallsAccum[idx]["function"]["arguments"] += fn["arguments"]
            metadata: Dict[str, Any] = {}
            if toolCallsAccum:
                metadata["toolCalls"] = [toolCallsAccum[i] for i in sorted(toolCallsAccum)]
            yield AiModelResponse(
                content=fullContent,
                success=True,
                modelId=model.name,
-                metadata={},
+                metadata=metadata,
            )
        except (RateLimitExceededException, ContextLengthExceededException, HTTPException):
--- a/modules/serviceCenter/services/serviceAgent/agentLoop.py
+++ b/modules/serviceCenter/services/serviceAgent/agentLoop.py
@ -65,7 +65,12 @@ async def runAgentLoop(
    tools = toolRegistry.getTools()
    toolDefinitions = toolRegistry.formatToolsForFunctionCalling()
-    toolsText = toolRegistry.formatToolsForPrompt()
+
    # Text-based tool descriptions are ONLY used as fallback when native function
    # calling is unavailable.  Including both creates conflicting instructions
    # (text ```tool_call format vs native tool_use blocks) and can cause the model
    # to respond with plain text instead of actual tool calls.
    toolsText = "" if toolDefinitions else toolRegistry.formatToolsForPrompt()
    systemPrompt = buildSystemPrompt(tools, toolsText, userLanguage=userLanguage)
    conversation = ConversationManager(systemPrompt)
@ -193,6 +198,29 @@ async def runAgentLoop(
        toolCalls = _parseToolCalls(aiResponse)
        textContent = _extractTextContent(aiResponse)
        logger.debug(
            f"Round {state.currentRound} AI response: model={aiResponse.modelName}, "
            f"toolCalls={len(toolCalls)}, nativeToolCalls={'yes' if aiResponse.toolCalls else 'no'}, "
            f"contentLen={len(aiResponse.content)}, streamedLen={len(streamedText)}"
        )
        # Empty response (no content, no tool calls) = model returned nothing useful.
        # Burn the round but let the loop continue so the next iteration can retry
        # (the failover mechanism in the AI layer will try alternative models).
        if not toolCalls and not textContent and not streamedText:
            logger.warning(
                f"Round {state.currentRound}: AI returned empty response "
                f"(model={aiResponse.modelName}). Retrying next round."
            )
            conversation.addUserMessage(
                "Your previous response was empty. Please use the available tools "
                "to accomplish the task. Start by planning the steps, then call the "
                "appropriate tools."
            )
            roundLog.durationMs = int((time.time() - roundStartTime) * 1000)
            trace.rounds.append(roundLog)
            continue
        if textContent and not streamedText:
            yield AgentEvent(type=AgentEventTypeEnum.MESSAGE, content=textContent)