gateway/modules/aicore/aicorePluginMistral.py

282 lines
12 KiB
Python

# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
import logging
import httpx
from typing import List
from fastapi import HTTPException
from modules.shared.configuration import APP_CONFIG
from .aicoreBase import BaseConnectorAi
from modules.datamodels.datamodelAi import AiModel, PriorityEnum, ProcessingModeEnum, OperationTypeEnum, AiModelCall, AiModelResponse, createOperationTypeRatings
# Configure logger
logger = logging.getLogger(__name__)
class ContextLengthExceededException(Exception):
"""Exception raised when the context length exceeds the model's limit"""
pass
class RateLimitExceededException(Exception):
"""Exception raised when the provider's rate limit (TPM) is exceeded"""
pass
def loadConfigData():
"""Load configuration data for Mistral connector"""
return {
"apiKey": APP_CONFIG.get('Connector_AiMistral_API_SECRET'),
}
class AiMistral(BaseConnectorAi):
"""Connector for communication with the Mistral AI API (Le Chat Mistral)."""
def __init__(self):
super().__init__()
# Load configuration
self.config = loadConfigData()
self.apiKey = self.config["apiKey"]
# HttpClient for API calls
# Timeout set to 600 seconds (10 minutes) for complex requests that may take longer
# AiService calls can take significantly longer due to prompt building and processing overhead
self.httpClient = httpx.AsyncClient(
timeout=600.0,
headers={
"Authorization": f"Bearer {self.apiKey}",
"Content-Type": "application/json"
}
)
logger.info("Mistral Connector initialized")
def getConnectorType(self) -> str:
"""Get the connector type identifier."""
return "mistral"
def getModels(self) -> List[AiModel]:
"""Get all available Mistral models."""
return [
AiModel(
name="mistral-large-latest",
displayName="Mistral Large 3",
connectorType="mistral",
apiUrl="https://api.mistral.ai/v1/chat/completions",
temperature=0.2,
maxTokens=16384,
contextLength=256000,
costPer1kTokensInput=0.0005, # $0.50/M tokens (updated 2026-02)
costPer1kTokensOutput=0.0015, # $1.50/M tokens (updated 2026-02)
speedRating=8, # Good speed for complex tasks
qualityRating=9, # High quality
functionCall=self.callAiBasic,
priority=PriorityEnum.BALANCED,
processingMode=ProcessingModeEnum.ADVANCED,
operationTypes=createOperationTypeRatings(
(OperationTypeEnum.PLAN, 9),
(OperationTypeEnum.DATA_ANALYSE, 9),
(OperationTypeEnum.DATA_GENERATE, 9),
(OperationTypeEnum.DATA_EXTRACT, 8)
),
version="mistral-large-latest",
calculatepriceCHF=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.0005 + (bytesReceived / 4 / 1000) * 0.0015
),
AiModel(
name="mistral-small-latest",
displayName="Mistral Small 3.2",
connectorType="mistral",
apiUrl="https://api.mistral.ai/v1/chat/completions",
temperature=0.2,
maxTokens=16384,
contextLength=128000,
costPer1kTokensInput=0.00006, # $0.06/M tokens (updated 2026-02)
costPer1kTokensOutput=0.00018, # $0.18/M tokens (updated 2026-02)
speedRating=9, # Very fast, lightweight model
qualityRating=7, # Good quality, cost-efficient
functionCall=self.callAiBasic,
priority=PriorityEnum.SPEED,
processingMode=ProcessingModeEnum.BASIC,
operationTypes=createOperationTypeRatings(
(OperationTypeEnum.PLAN, 7),
(OperationTypeEnum.DATA_ANALYSE, 7),
(OperationTypeEnum.DATA_GENERATE, 8),
(OperationTypeEnum.DATA_EXTRACT, 7)
),
version="mistral-small-latest",
calculatepriceCHF=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.00006 + (bytesReceived / 4 / 1000) * 0.00018
),
AiModel(
name="mistral-large-latest",
displayName="Mistral Large 3 Vision",
connectorType="mistral",
apiUrl="https://api.mistral.ai/v1/chat/completions",
temperature=0.2,
maxTokens=16384,
contextLength=256000,
costPer1kTokensInput=0.0005, # $0.50/M tokens (updated 2026-02)
costPer1kTokensOutput=0.0015, # $1.50/M tokens (updated 2026-02)
speedRating=6, # Slower for vision tasks
qualityRating=8, # Good quality vision
functionCall=self.callAiImage,
priority=PriorityEnum.QUALITY,
processingMode=ProcessingModeEnum.DETAILED,
operationTypes=createOperationTypeRatings(
(OperationTypeEnum.IMAGE_ANALYSE, 8)
),
version="mistral-large-latest",
calculatepriceCHF=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.0005 + (bytesReceived / 4 / 1000) * 0.0015
)
]
async def callAiBasic(self, modelCall: AiModelCall) -> AiModelResponse:
"""
Calls the Mistral AI API with the given messages using standardized pattern.
Mistral's chat completions API is OpenAI-compatible: it accepts the same
message format (role/content) including system messages, and returns
responses in the same choices[0].message.content structure.
Args:
modelCall: AiModelCall with messages and options
Returns:
AiModelResponse with content and metadata
Raises:
HTTPException: For errors in API communication
"""
try:
# Extract parameters from modelCall
messages = modelCall.messages
model = modelCall.model
options = modelCall.options
temperature = getattr(options, "temperature", None)
if temperature is None:
temperature = model.temperature
maxTokens = model.maxTokens
payload = {
"model": model.name,
"messages": messages,
"temperature": temperature,
"max_tokens": maxTokens
}
response = await self.httpClient.post(
model.apiUrl,
json=payload
)
if response.status_code != 200:
error_message = f"Mistral API error: {response.status_code} - {response.text}"
logger.error(error_message)
# Check for rate limit exceeded (429 TPM)
if response.status_code == 429:
try:
error_data = response.json()
error_msg = error_data.get("error", {}).get("message", "Rate limit exceeded")
raise RateLimitExceededException(
f"Rate limit exceeded for {model.name}: {error_msg}"
)
except (ValueError, KeyError):
raise RateLimitExceededException(
f"Rate limit exceeded for {model.name}"
)
# Check for context length exceeded error
if response.status_code == 400:
try:
error_data = response.json()
if (error_data.get("error", {}).get("code") == "context_length_exceeded" or
"context length" in error_data.get("error", {}).get("message", "").lower() or
"too many tokens" in error_data.get("error", {}).get("message", "").lower()):
raise ContextLengthExceededException(
f"Context length exceeded: {error_data.get('error', {}).get('message', 'Unknown error')}"
)
except (ValueError, KeyError):
pass # If we can't parse the error, fall through to generic error
# Include the actual error details in the exception
raise HTTPException(status_code=500, detail=error_message)
responseJson = response.json()
content = responseJson["choices"][0]["message"]["content"]
return AiModelResponse(
content=content,
success=True,
modelId=model.name,
metadata={"response_id": responseJson.get("id", "")}
)
except ContextLengthExceededException:
# Re-raise context length exceptions without wrapping
raise
except RateLimitExceededException:
# Re-raise rate limit exceptions without wrapping
raise
except Exception as e:
logger.error(f"Error calling Mistral API: {str(e)}")
raise HTTPException(status_code=500, detail=f"Error calling Mistral API: {str(e)}")
async def callAiImage(self, modelCall: AiModelCall) -> AiModelResponse:
"""
Analyzes an image with the Mistral Vision API using standardized pattern.
Mistral Large 3 is multimodal and accepts image inputs in OpenAI-compatible
format: {"type": "image_url", "image_url": {"url": "data:...base64,..."}}
Args:
modelCall: AiModelCall with messages and image data in options
Returns:
AiModelResponse with analysis content
"""
try:
# Extract parameters from modelCall
messages = modelCall.messages
model = modelCall.model
# Messages should already be in the correct format with image data embedded
# Just verify they contain image data
if not messages or not messages[0].get("content"):
raise ValueError("No messages provided for image analysis")
logger.debug(f"Starting image analysis with {len(messages)} message(s)...")
# Use the messages directly - they should already contain the image data
# in the format: {"type": "image_url", "image_url": {"url": "data:...base64,..."}}
# Mistral Large 3 supports this OpenAI-compatible vision format natively
# Use parameters from model
temperature = model.temperature
payload = {
"model": model.name,
"messages": messages,
"temperature": temperature
}
response = await self.httpClient.post(
model.apiUrl,
json=payload
)
if response.status_code != 200:
logger.error(f"Mistral API error: {response.status_code} - {response.text}")
raise HTTPException(status_code=500, detail="Error communicating with Mistral API")
responseJson = response.json()
content = responseJson["choices"][0]["message"]["content"]
return AiModelResponse(
content=content,
success=True,
modelId=model.name,
metadata={"response_id": responseJson.get("id", "")}
)
except Exception as e:
logger.error(f"Error during image analysis: {str(e)}", exc_info=True)
return AiModelResponse(
content="",
success=False,
error=f"Error during image analysis: {str(e)}"
)