llm failover enhanced

2026-03-18 13:57:01 +01:00 · 2026-03-18 13:57:01 +01:00 · d63a41fbc8
commit d63a41fbc8
parent b876f01da2
3 changed files with 93 additions and 9 deletions
--- a/modules/aicore/aicorePluginAnthropic.py
+++ b/modules/aicore/aicorePluginAnthropic.py
@ -4,7 +4,7 @@ import json
 import logging
 import httpx
 import os
-from typing import Dict, Any, List, AsyncGenerator, Union
+from typing import Dict, Any, List, AsyncGenerator, Optional, Union
 from fastapi import HTTPException
 from modules.shared.configuration import APP_CONFIG
 from .aicoreBase import BaseConnectorAi, RateLimitExceededException
@ -295,6 +295,7 @@ class AiAnthropic(BaseConnectorAi):
            fullContent = ""
            toolUseBlocks: Dict[int, Dict[str, Any]] = {}
            currentToolIdx = -1
+            stopReason: Optional[str] = None

            async with self.httpClient.stream("POST", model.apiUrl, json=payload) as response:
                if response.status_code != 200:
@ -316,7 +317,16 @@ class AiAnthropic(BaseConnectorAi):

                    eventType = event.get("type", "")

-                    if eventType == "content_block_start":
+                    if eventType == "error":
+                        errDetail = event.get("error", {})
+                        errMsg = errDetail.get("message", str(errDetail))
+                        errType = errDetail.get("type", "unknown")
+                        logger.error(f"Anthropic stream error event: type={errType}, message={errMsg}")
+                        if "overloaded" in errMsg.lower() or "overloaded" in errType.lower():
+                            raise HTTPException(status_code=500, detail=f"Anthropic API is currently overloaded. Please try again in a few minutes.")
+                        raise HTTPException(status_code=500, detail=f"Anthropic stream error: [{errType}] {errMsg}")
+
+                    elif eventType == "content_block_start":
                        block = event.get("content_block", {})
                        idx = event.get("index", 0)
                        if block.get("type") == "tool_use":
@ -338,10 +348,22 @@ class AiAnthropic(BaseConnectorAi):
                            if idx in toolUseBlocks:
                                toolUseBlocks[idx]["arguments"] += delta.get("partial_json", "")

+                    elif eventType == "message_delta":
+                        delta = event.get("delta", {})
+                        stopReason = delta.get("stop_reason", stopReason)
+
                    elif eventType == "message_stop":
                        break

+            if not fullContent and not toolUseBlocks:
+                logger.warning(
+                    f"Anthropic stream returned empty response: model={model.name}, "
+                    f"stopReason={stopReason}"
+                )
+
            metadata: Dict[str, Any] = {}
+            if stopReason:
+                metadata["stopReason"] = stopReason
            if toolUseBlocks:
                metadata["toolCalls"] = [
                    {
--- a/modules/aicore/aicorePluginMistral.py
+++ b/modules/aicore/aicorePluginMistral.py
@ -174,7 +174,11 @@ class AiMistral(BaseConnectorAi):
                "temperature": temperature,
                "max_tokens": maxTokens
            }
-            
+
+            if modelCall.tools:
+                payload["tools"] = modelCall.tools
+                payload["tool_choice"] = modelCall.toolChoice or "auto"
+
            response = await self.httpClient.post(
                model.apiUrl,
                json=payload
@ -214,15 +218,20 @@ class AiMistral(BaseConnectorAi):
                raise HTTPException(status_code=500, detail=error_message)
            
            responseJson = response.json()
-            content = responseJson["choices"][0]["message"]["content"]
-            
+            choiceMessage = responseJson["choices"][0]["message"]
+            content = choiceMessage.get("content") or ""
+
+            metadata = {"response_id": responseJson.get("id", "")}
+            if choiceMessage.get("tool_calls"):
+                metadata["toolCalls"] = choiceMessage["tool_calls"]
+
            return AiModelResponse(
                content=content,
                success=True,
                modelId=model.name,
-                metadata={"response_id": responseJson.get("id", "")}
+                metadata=metadata,
            )
-            
+
        except ContextLengthExceededException:
            # Re-raise context length exceptions without wrapping
            raise
@ -250,7 +259,12 @@ class AiMistral(BaseConnectorAi):
                "stream": True,
            }

+            if modelCall.tools:
+                payload["tools"] = modelCall.tools
+                payload["tool_choice"] = modelCall.toolChoice or "auto"
+
            fullContent = ""
+            toolCallsAccum: Dict[int, Dict[str, Any]] = {}

            async with self.httpClient.stream("POST", model.apiUrl, json=payload) as response:
                if response.status_code != 200:
@ -280,11 +294,31 @@ class AiMistral(BaseConnectorAi):
                        fullContent += delta["content"]
                        yield delta["content"]

+                    for tcDelta in delta.get("tool_calls", []):
+                        idx = tcDelta.get("index", 0)
+                        if idx not in toolCallsAccum:
+                            toolCallsAccum[idx] = {
+                                "id": tcDelta.get("id", ""),
+                                "type": "function",
+                                "function": {"name": "", "arguments": ""},
+                            }
+                        if tcDelta.get("id"):
+                            toolCallsAccum[idx]["id"] = tcDelta["id"]
+                        fn = tcDelta.get("function", {})
+                        if fn.get("name"):
+                            toolCallsAccum[idx]["function"]["name"] = fn["name"]
+                        if fn.get("arguments"):
+                            toolCallsAccum[idx]["function"]["arguments"] += fn["arguments"]
+
+            metadata: Dict[str, Any] = {}
+            if toolCallsAccum:
+                metadata["toolCalls"] = [toolCallsAccum[i] for i in sorted(toolCallsAccum)]
+
            yield AiModelResponse(
                content=fullContent,
                success=True,
                modelId=model.name,
-                metadata={},
+                metadata=metadata,
            )

        except (RateLimitExceededException, ContextLengthExceededException, HTTPException):
--- a/modules/serviceCenter/services/serviceAgent/agentLoop.py
+++ b/modules/serviceCenter/services/serviceAgent/agentLoop.py
@ -65,7 +65,12 @@ async def runAgentLoop(

    tools = toolRegistry.getTools()
    toolDefinitions = toolRegistry.formatToolsForFunctionCalling()
-    toolsText = toolRegistry.formatToolsForPrompt()
+
+    # Text-based tool descriptions are ONLY used as fallback when native function
+    # calling is unavailable.  Including both creates conflicting instructions
+    # (text ```tool_call format vs native tool_use blocks) and can cause the model
+    # to respond with plain text instead of actual tool calls.
+    toolsText = "" if toolDefinitions else toolRegistry.formatToolsForPrompt()

    systemPrompt = buildSystemPrompt(tools, toolsText, userLanguage=userLanguage)
    conversation = ConversationManager(systemPrompt)
@ -193,6 +198,29 @@ async def runAgentLoop(
        toolCalls = _parseToolCalls(aiResponse)
        textContent = _extractTextContent(aiResponse)

+        logger.debug(
+            f"Round {state.currentRound} AI response: model={aiResponse.modelName}, "
+            f"toolCalls={len(toolCalls)}, nativeToolCalls={'yes' if aiResponse.toolCalls else 'no'}, "
+            f"contentLen={len(aiResponse.content)}, streamedLen={len(streamedText)}"
+        )
+
+        # Empty response (no content, no tool calls) = model returned nothing useful.
+        # Burn the round but let the loop continue so the next iteration can retry
+        # (the failover mechanism in the AI layer will try alternative models).
+        if not toolCalls and not textContent and not streamedText:
+            logger.warning(
+                f"Round {state.currentRound}: AI returned empty response "
+                f"(model={aiResponse.modelName}). Retrying next round."
+            )
+            conversation.addUserMessage(
+                "Your previous response was empty. Please use the available tools "
+                "to accomplish the task. Start by planning the steps, then call the "
+                "appropriate tools."
+            )
+            roundLog.durationMs = int((time.time() - roundStartTime) * 1000)
+            trace.rounds.append(roundLog)
+            continue
+
        if textContent and not streamedText:
            yield AgentEvent(type=AgentEventTypeEnum.MESSAGE, content=textContent)