757 lines
No EOL
34 KiB
Python
757 lines
No EOL
34 KiB
Python
# Copyright (c) 2025 Patrick Motsch
|
|
# All rights reserved.
|
|
import logging
|
|
import json as _json
|
|
import httpx
|
|
from typing import List, Dict, Any, AsyncGenerator, Union
|
|
from fastapi import HTTPException
|
|
from modules.shared.configuration import APP_CONFIG
|
|
from .aicoreBase import BaseConnectorAi, RateLimitExceededException, ContextLengthExceededException
|
|
from modules.datamodels.datamodelAi import AiModel, PriorityEnum, ProcessingModeEnum, OperationTypeEnum, AiModelCall, AiModelResponse, createOperationTypeRatings, AiCallPromptImage
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def _supportsCustomTemperature(modelName: str) -> bool:
|
|
"""Check whether an OpenAI model accepts a custom `temperature` value.
|
|
|
|
GPT-5.x and the o-series (o1/o3/o4) reasoning models reject every
|
|
`temperature` value other than the default (1) with HTTP 400
|
|
`unsupported_value`. For these models we must omit `temperature`
|
|
from the payload entirely. Older chat-completions models
|
|
(gpt-4o, gpt-4o-mini, gpt-4.1, gpt-3.5-*) still accept any value
|
|
in [0, 2].
|
|
|
|
Returns:
|
|
True if `temperature` may be sent; False if it must be omitted.
|
|
"""
|
|
if not modelName:
|
|
return True
|
|
name = modelName.lower()
|
|
if name.startswith("gpt-5"):
|
|
return False
|
|
if name.startswith("o1") or name.startswith("o3") or name.startswith("o4"):
|
|
return False
|
|
return True
|
|
|
|
|
|
def loadConfigData():
|
|
"""Load configuration data for OpenAI connector"""
|
|
return {
|
|
"apiKey": APP_CONFIG.get('Connector_AiOpenai_API_SECRET'),
|
|
}
|
|
|
|
class AiOpenai(BaseConnectorAi):
|
|
"""Connector for communication with the OpenAI API."""
|
|
|
|
def __init__(self):
|
|
super().__init__()
|
|
# Load configuration
|
|
self.config = loadConfigData()
|
|
self.apiKey = self.config["apiKey"]
|
|
|
|
# HttpClient for API calls
|
|
# Timeout set to 600 seconds (10 minutes) for complex requests that may take longer
|
|
# AiService calls can take significantly longer due to prompt building and processing overhead
|
|
self.httpClient = httpx.AsyncClient(
|
|
timeout=600.0,
|
|
headers={
|
|
"Authorization": f"Bearer {self.apiKey}",
|
|
"Content-Type": "application/json"
|
|
}
|
|
)
|
|
logger.info("OpenAI Connector initialized")
|
|
|
|
def getConnectorType(self) -> str:
|
|
"""Get the connector type identifier."""
|
|
return "openai"
|
|
|
|
def getModels(self) -> List[AiModel]:
|
|
"""Get all available OpenAI models."""
|
|
return [
|
|
AiModel(
|
|
name="gpt-4o",
|
|
displayName="OpenAI GPT-4o",
|
|
connectorType="openai",
|
|
apiUrl="https://api.openai.com/v1/chat/completions",
|
|
temperature=0.2,
|
|
maxTokens=16384,
|
|
contextLength=128000,
|
|
maxInputTokensPerRequest=25000, # OpenAI org TPM limit is 30K, keep 5K buffer
|
|
costPer1kTokensInput=0.0025, # $2.50/M tokens (updated 2026-02)
|
|
costPer1kTokensOutput=0.01, # $10.00/M tokens (updated 2026-02)
|
|
speedRating=8, # Good speed for complex tasks
|
|
qualityRating=10, # High quality
|
|
functionCall=self.callAiBasic,
|
|
functionCallStream=self.callAiBasicStream,
|
|
priority=PriorityEnum.BALANCED,
|
|
processingMode=ProcessingModeEnum.ADVANCED,
|
|
operationTypes=createOperationTypeRatings(
|
|
(OperationTypeEnum.PLAN, 9),
|
|
(OperationTypeEnum.DATA_ANALYSE, 10),
|
|
(OperationTypeEnum.DATA_GENERATE, 10),
|
|
(OperationTypeEnum.DATA_EXTRACT, 7),
|
|
(OperationTypeEnum.AGENT, 9),
|
|
(OperationTypeEnum.DATA_QUERY, 8),
|
|
),
|
|
version="gpt-4o",
|
|
calculatepriceCHF=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.0025 + (bytesReceived / 4 / 1000) * 0.01
|
|
),
|
|
AiModel(
|
|
name="gpt-4o-mini",
|
|
displayName="OpenAI GPT-4o Mini",
|
|
connectorType="openai",
|
|
apiUrl="https://api.openai.com/v1/chat/completions",
|
|
temperature=0.2,
|
|
maxTokens=16384,
|
|
contextLength=128000,
|
|
maxInputTokensPerRequest=25000, # OpenAI org TPM limit, keep buffer
|
|
costPer1kTokensInput=0.00015, # $0.15/M tokens (updated 2026-02)
|
|
costPer1kTokensOutput=0.0006, # $0.60/M tokens (updated 2026-02)
|
|
speedRating=9, # Very fast
|
|
qualityRating=8, # Good quality, replaces gpt-3.5-turbo
|
|
functionCall=self.callAiBasic,
|
|
functionCallStream=self.callAiBasicStream,
|
|
priority=PriorityEnum.SPEED,
|
|
processingMode=ProcessingModeEnum.BASIC,
|
|
operationTypes=createOperationTypeRatings(
|
|
(OperationTypeEnum.PLAN, 8),
|
|
(OperationTypeEnum.DATA_ANALYSE, 8),
|
|
(OperationTypeEnum.DATA_GENERATE, 9),
|
|
(OperationTypeEnum.DATA_EXTRACT, 7),
|
|
(OperationTypeEnum.AGENT, 8),
|
|
(OperationTypeEnum.DATA_QUERY, 10),
|
|
),
|
|
version="gpt-4o-mini",
|
|
calculatepriceCHF=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.00015 + (bytesReceived / 4 / 1000) * 0.0006
|
|
),
|
|
AiModel(
|
|
name="gpt-4o",
|
|
displayName="OpenAI GPT-4o Vision",
|
|
connectorType="openai",
|
|
apiUrl="https://api.openai.com/v1/chat/completions",
|
|
temperature=0.2,
|
|
maxTokens=16384,
|
|
contextLength=128000,
|
|
maxInputTokensPerRequest=25000, # OpenAI org TPM limit is 30K, keep 5K buffer
|
|
costPer1kTokensInput=0.0025, # $2.50/M tokens (updated 2026-02)
|
|
costPer1kTokensOutput=0.01, # $10.00/M tokens (updated 2026-02)
|
|
speedRating=6, # Slower for vision tasks
|
|
qualityRating=9, # High quality vision
|
|
functionCall=self.callAiImage,
|
|
priority=PriorityEnum.QUALITY,
|
|
processingMode=ProcessingModeEnum.DETAILED,
|
|
operationTypes=createOperationTypeRatings(
|
|
(OperationTypeEnum.IMAGE_ANALYSE, 9)
|
|
),
|
|
version="gpt-4o",
|
|
calculatepriceCHF=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.0025 + (bytesReceived / 4 / 1000) * 0.01
|
|
),
|
|
AiModel(
|
|
name="gpt-5.5",
|
|
displayName="OpenAI GPT-5.5",
|
|
connectorType="openai",
|
|
apiUrl="https://api.openai.com/v1/chat/completions",
|
|
temperature=0.2,
|
|
maxTokens=128000,
|
|
contextLength=1050000,
|
|
costPer1kTokensInput=0.005, # $5/M tokens (OpenAI API, 2026-04)
|
|
costPer1kTokensOutput=0.03, # $30/M tokens
|
|
speedRating=8,
|
|
qualityRating=10,
|
|
functionCall=self.callAiBasic,
|
|
functionCallStream=self.callAiBasicStream,
|
|
priority=PriorityEnum.QUALITY,
|
|
processingMode=ProcessingModeEnum.DETAILED,
|
|
operationTypes=createOperationTypeRatings(
|
|
(OperationTypeEnum.PLAN, 10),
|
|
(OperationTypeEnum.DATA_ANALYSE, 10),
|
|
(OperationTypeEnum.DATA_GENERATE, 10),
|
|
(OperationTypeEnum.DATA_EXTRACT, 8),
|
|
(OperationTypeEnum.AGENT, 10),
|
|
(OperationTypeEnum.DATA_QUERY, 8),
|
|
),
|
|
version="gpt-5.5",
|
|
calculatepriceCHF=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.005 + (bytesReceived / 4 / 1000) * 0.03
|
|
),
|
|
AiModel(
|
|
name="gpt-5.4",
|
|
displayName="OpenAI GPT-5.4",
|
|
connectorType="openai",
|
|
apiUrl="https://api.openai.com/v1/chat/completions",
|
|
temperature=0.2,
|
|
maxTokens=128000,
|
|
contextLength=1050000,
|
|
costPer1kTokensInput=0.0025, # $2.50/M tokens
|
|
costPer1kTokensOutput=0.015, # $15/M tokens
|
|
speedRating=8,
|
|
qualityRating=10,
|
|
functionCall=self.callAiBasic,
|
|
functionCallStream=self.callAiBasicStream,
|
|
priority=PriorityEnum.BALANCED,
|
|
processingMode=ProcessingModeEnum.ADVANCED,
|
|
operationTypes=createOperationTypeRatings(
|
|
(OperationTypeEnum.PLAN, 9),
|
|
(OperationTypeEnum.DATA_ANALYSE, 10),
|
|
(OperationTypeEnum.DATA_GENERATE, 10),
|
|
(OperationTypeEnum.DATA_EXTRACT, 8),
|
|
(OperationTypeEnum.AGENT, 9),
|
|
(OperationTypeEnum.DATA_QUERY, 8),
|
|
),
|
|
version="gpt-5.4",
|
|
calculatepriceCHF=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.0025 + (bytesReceived / 4 / 1000) * 0.015
|
|
),
|
|
AiModel(
|
|
name="gpt-5.4-mini",
|
|
displayName="OpenAI GPT-5.4 Mini",
|
|
connectorType="openai",
|
|
apiUrl="https://api.openai.com/v1/chat/completions",
|
|
temperature=0.2,
|
|
maxTokens=128000,
|
|
contextLength=400000,
|
|
costPer1kTokensInput=0.00075, # $0.75/M tokens
|
|
costPer1kTokensOutput=0.0045, # $4.50/M tokens
|
|
speedRating=9,
|
|
qualityRating=9,
|
|
functionCall=self.callAiBasic,
|
|
functionCallStream=self.callAiBasicStream,
|
|
priority=PriorityEnum.SPEED,
|
|
processingMode=ProcessingModeEnum.BASIC,
|
|
operationTypes=createOperationTypeRatings(
|
|
(OperationTypeEnum.PLAN, 8),
|
|
(OperationTypeEnum.DATA_ANALYSE, 9),
|
|
(OperationTypeEnum.DATA_GENERATE, 9),
|
|
(OperationTypeEnum.DATA_EXTRACT, 8),
|
|
(OperationTypeEnum.AGENT, 8),
|
|
(OperationTypeEnum.DATA_QUERY, 10),
|
|
),
|
|
version="gpt-5.4-mini",
|
|
calculatepriceCHF=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.00075 + (bytesReceived / 4 / 1000) * 0.0045
|
|
),
|
|
AiModel(
|
|
name="gpt-5.4-nano",
|
|
displayName="OpenAI GPT-5.4 Nano",
|
|
connectorType="openai",
|
|
apiUrl="https://api.openai.com/v1/chat/completions",
|
|
temperature=0.2,
|
|
maxTokens=128000,
|
|
contextLength=400000,
|
|
costPer1kTokensInput=0.0002, # $0.20/M tokens
|
|
costPer1kTokensOutput=0.00125, # $1.25/M tokens
|
|
speedRating=10,
|
|
qualityRating=7,
|
|
functionCall=self.callAiBasic,
|
|
functionCallStream=self.callAiBasicStream,
|
|
priority=PriorityEnum.COST,
|
|
processingMode=ProcessingModeEnum.BASIC,
|
|
operationTypes=createOperationTypeRatings(
|
|
(OperationTypeEnum.PLAN, 7),
|
|
(OperationTypeEnum.DATA_ANALYSE, 7),
|
|
(OperationTypeEnum.DATA_GENERATE, 8),
|
|
(OperationTypeEnum.DATA_EXTRACT, 9),
|
|
(OperationTypeEnum.AGENT, 7),
|
|
(OperationTypeEnum.DATA_QUERY, 10),
|
|
),
|
|
version="gpt-5.4-nano",
|
|
calculatepriceCHF=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.0002 + (bytesReceived / 4 / 1000) * 0.00125
|
|
),
|
|
AiModel(
|
|
name="gpt-5.5",
|
|
displayName="OpenAI GPT-5.5 Vision",
|
|
connectorType="openai",
|
|
apiUrl="https://api.openai.com/v1/chat/completions",
|
|
temperature=0.2,
|
|
maxTokens=128000,
|
|
contextLength=1050000,
|
|
costPer1kTokensInput=0.005,
|
|
costPer1kTokensOutput=0.03,
|
|
speedRating=6,
|
|
qualityRating=10,
|
|
functionCall=self.callAiImage,
|
|
priority=PriorityEnum.QUALITY,
|
|
processingMode=ProcessingModeEnum.DETAILED,
|
|
operationTypes=createOperationTypeRatings(
|
|
(OperationTypeEnum.IMAGE_ANALYSE, 10)
|
|
),
|
|
version="gpt-5.5",
|
|
calculatepriceCHF=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.005 + (bytesReceived / 4 / 1000) * 0.03
|
|
),
|
|
AiModel(
|
|
name="text-embedding-3-small",
|
|
displayName="OpenAI Embedding Small",
|
|
connectorType="openai",
|
|
apiUrl="https://api.openai.com/v1/embeddings",
|
|
temperature=0.0,
|
|
maxTokens=0,
|
|
contextLength=8191,
|
|
costPer1kTokensInput=0.00002, # $0.02/M tokens
|
|
costPer1kTokensOutput=0.0,
|
|
speedRating=10,
|
|
qualityRating=8,
|
|
functionCall=self.callEmbedding,
|
|
priority=PriorityEnum.COST,
|
|
processingMode=ProcessingModeEnum.BASIC,
|
|
operationTypes=createOperationTypeRatings(
|
|
(OperationTypeEnum.EMBEDDING, 10)
|
|
),
|
|
version="text-embedding-3-small",
|
|
calculatepriceCHF=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.00002
|
|
),
|
|
AiModel(
|
|
name="text-embedding-3-large",
|
|
displayName="OpenAI Embedding Large",
|
|
connectorType="openai",
|
|
apiUrl="https://api.openai.com/v1/embeddings",
|
|
temperature=0.0,
|
|
maxTokens=0,
|
|
contextLength=8191,
|
|
costPer1kTokensInput=0.00013, # $0.13/M tokens
|
|
costPer1kTokensOutput=0.0,
|
|
speedRating=9,
|
|
qualityRating=10,
|
|
functionCall=self.callEmbedding,
|
|
priority=PriorityEnum.QUALITY,
|
|
processingMode=ProcessingModeEnum.ADVANCED,
|
|
operationTypes=createOperationTypeRatings(
|
|
(OperationTypeEnum.EMBEDDING, 10)
|
|
),
|
|
version="text-embedding-3-large",
|
|
calculatepriceCHF=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.00013
|
|
),
|
|
AiModel(
|
|
name="dall-e-3",
|
|
displayName="OpenAI DALL-E 3",
|
|
connectorType="openai",
|
|
apiUrl="https://api.openai.com/v1/images/generations",
|
|
temperature=0.0, # Image generation doesn't use temperature
|
|
maxTokens=0, # Image generation doesn't use tokens
|
|
contextLength=0,
|
|
costPer1kTokensInput=0.04,
|
|
costPer1kTokensOutput=0.0,
|
|
speedRating=5, # Slow for image generation
|
|
qualityRating=9, # High quality art generation
|
|
# capabilities removed (not used in business logic)
|
|
functionCall=self.generateImage,
|
|
priority=PriorityEnum.QUALITY,
|
|
processingMode=ProcessingModeEnum.DETAILED,
|
|
operationTypes=createOperationTypeRatings(
|
|
(OperationTypeEnum.IMAGE_GENERATE, 10)
|
|
),
|
|
version="dall-e-3",
|
|
calculatepriceCHF=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.04
|
|
)
|
|
]
|
|
|
|
async def callAiBasic(self, modelCall: AiModelCall) -> AiModelResponse:
|
|
"""
|
|
Calls the OpenAI API with the given messages using standardized pattern.
|
|
|
|
Args:
|
|
modelCall: AiModelCall with messages and options
|
|
|
|
Returns:
|
|
AiModelResponse with content and metadata
|
|
|
|
Raises:
|
|
HTTPException: For errors in API communication
|
|
"""
|
|
try:
|
|
# Extract parameters from modelCall
|
|
messages = modelCall.messages
|
|
model = modelCall.model
|
|
options = modelCall.options
|
|
temperature = getattr(options, "temperature", None)
|
|
if temperature is None:
|
|
temperature = model.temperature
|
|
maxTokens = model.maxTokens
|
|
|
|
payload = {
|
|
"model": model.name,
|
|
"messages": messages,
|
|
# Universal output-length cap. `max_tokens` is deprecated and
|
|
# rejected outright by gpt-5.x / o-series; `max_completion_tokens`
|
|
# is accepted by every current chat-completions model (legacy
|
|
# gpt-4o, gpt-4.1, gpt-5.x, o1/o3/o4) per OpenAI API reference.
|
|
"max_completion_tokens": maxTokens
|
|
}
|
|
# gpt-5.x and o-series only accept the default temperature (1) and
|
|
# return HTTP 400 `unsupported_value` for anything else - omit the
|
|
# field entirely for those models.
|
|
if _supportsCustomTemperature(model.name):
|
|
payload["temperature"] = temperature
|
|
|
|
if modelCall.tools:
|
|
payload["tools"] = modelCall.tools
|
|
payload["tool_choice"] = modelCall.toolChoice or "auto"
|
|
|
|
response = await self.httpClient.post(
|
|
model.apiUrl,
|
|
json=payload
|
|
)
|
|
|
|
if response.status_code != 200:
|
|
error_message = f"OpenAI API error: {response.status_code} - {response.text}"
|
|
logger.error(error_message)
|
|
|
|
# Check for rate limit exceeded (429 TPM)
|
|
if response.status_code == 429:
|
|
try:
|
|
error_data = response.json()
|
|
error_msg = error_data.get("error", {}).get("message", "Rate limit exceeded")
|
|
raise RateLimitExceededException(
|
|
f"Rate limit exceeded for {model.name}: {error_msg}"
|
|
)
|
|
except (ValueError, KeyError):
|
|
raise RateLimitExceededException(
|
|
f"Rate limit exceeded for {model.name}"
|
|
)
|
|
|
|
# Check for context length exceeded error
|
|
if response.status_code == 400:
|
|
try:
|
|
error_data = response.json()
|
|
if (error_data.get("error", {}).get("code") == "context_length_exceeded" or
|
|
"context length" in error_data.get("error", {}).get("message", "").lower()):
|
|
# Raise a specific exception for context length issues
|
|
raise ContextLengthExceededException(
|
|
f"Context length exceeded: {error_data.get('error', {}).get('message', 'Unknown error')}"
|
|
)
|
|
except (ValueError, KeyError):
|
|
pass # If we can't parse the error, fall through to generic error
|
|
|
|
# Include the actual error details in the exception
|
|
raise HTTPException(status_code=500, detail=error_message)
|
|
|
|
responseJson = response.json()
|
|
choiceMessage = responseJson["choices"][0]["message"]
|
|
content = choiceMessage.get("content") or ""
|
|
|
|
metadata = {"response_id": responseJson.get("id", "")}
|
|
if choiceMessage.get("tool_calls"):
|
|
metadata["toolCalls"] = choiceMessage["tool_calls"]
|
|
|
|
return AiModelResponse(
|
|
content=content,
|
|
success=True,
|
|
modelId=model.name,
|
|
metadata=metadata
|
|
)
|
|
|
|
except ContextLengthExceededException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"Error calling OpenAI API: {str(e)}")
|
|
raise HTTPException(status_code=500, detail=f"Error calling OpenAI API: {str(e)}")
|
|
|
|
async def callAiBasicStream(self, modelCall: AiModelCall) -> AsyncGenerator[Union[str, AiModelResponse], None]:
|
|
"""Stream OpenAI response. Yields str deltas, then final AiModelResponse."""
|
|
try:
|
|
messages = modelCall.messages
|
|
model = modelCall.model
|
|
options = modelCall.options
|
|
temperature = getattr(options, "temperature", None)
|
|
if temperature is None:
|
|
temperature = model.temperature
|
|
|
|
payload: Dict[str, Any] = {
|
|
"model": model.name,
|
|
"messages": messages,
|
|
# See callAiBasic for the rationale: `max_completion_tokens`
|
|
# is the universal output-length parameter; `max_tokens` is
|
|
# deprecated and rejected by gpt-5.x / o-series.
|
|
"max_completion_tokens": model.maxTokens,
|
|
"stream": True,
|
|
}
|
|
if _supportsCustomTemperature(model.name):
|
|
payload["temperature"] = temperature
|
|
|
|
if modelCall.tools:
|
|
payload["tools"] = modelCall.tools
|
|
payload["tool_choice"] = modelCall.toolChoice or "auto"
|
|
|
|
fullContent = ""
|
|
toolCallsAccum: Dict[int, Dict[str, Any]] = {}
|
|
|
|
async with self.httpClient.stream("POST", model.apiUrl, json=payload) as response:
|
|
if response.status_code != 200:
|
|
body = await response.aread()
|
|
bodyStr = body.decode()
|
|
if response.status_code == 429:
|
|
try:
|
|
errorMsg = _json.loads(bodyStr).get("error", {}).get("message", "Rate limit exceeded")
|
|
except (ValueError, KeyError):
|
|
errorMsg = f"Rate limit exceeded for {model.name}"
|
|
raise RateLimitExceededException(f"Rate limit exceeded for {model.name}: {errorMsg}")
|
|
raise HTTPException(status_code=500, detail=f"OpenAI stream error: {response.status_code} - {bodyStr}")
|
|
|
|
async for line in response.aiter_lines():
|
|
if not line.startswith("data: "):
|
|
continue
|
|
data = line[6:]
|
|
if data.strip() == "[DONE]":
|
|
break
|
|
try:
|
|
chunk = _json.loads(data)
|
|
except _json.JSONDecodeError:
|
|
continue
|
|
|
|
delta = chunk.get("choices", [{}])[0].get("delta", {})
|
|
|
|
if "content" in delta and delta["content"]:
|
|
fullContent += delta["content"]
|
|
yield delta["content"]
|
|
|
|
for tcDelta in delta.get("tool_calls", []):
|
|
idx = tcDelta.get("index", 0)
|
|
if idx not in toolCallsAccum:
|
|
toolCallsAccum[idx] = {
|
|
"id": tcDelta.get("id", ""),
|
|
"type": "function",
|
|
"function": {"name": "", "arguments": ""},
|
|
}
|
|
if tcDelta.get("id"):
|
|
toolCallsAccum[idx]["id"] = tcDelta["id"]
|
|
fn = tcDelta.get("function", {})
|
|
if fn.get("name"):
|
|
toolCallsAccum[idx]["function"]["name"] = fn["name"]
|
|
if fn.get("arguments"):
|
|
toolCallsAccum[idx]["function"]["arguments"] += fn["arguments"]
|
|
|
|
metadata: Dict[str, Any] = {}
|
|
if toolCallsAccum:
|
|
metadata["toolCalls"] = [toolCallsAccum[i] for i in sorted(toolCallsAccum)]
|
|
|
|
yield AiModelResponse(
|
|
content=fullContent,
|
|
success=True,
|
|
modelId=model.name,
|
|
metadata=metadata,
|
|
)
|
|
|
|
except (RateLimitExceededException, ContextLengthExceededException, HTTPException):
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"Error streaming OpenAI API: {e}")
|
|
raise HTTPException(status_code=500, detail=f"Error streaming OpenAI API: {e}")
|
|
|
|
async def callEmbedding(self, modelCall: AiModelCall) -> AiModelResponse:
|
|
"""Generate embeddings via the OpenAI Embeddings API.
|
|
|
|
Reads texts from modelCall.embeddingInput.
|
|
Returns vectors in metadata["embeddings"].
|
|
"""
|
|
try:
|
|
model = modelCall.model
|
|
texts = modelCall.embeddingInput or []
|
|
if not texts:
|
|
return AiModelResponse(
|
|
content="", success=False, error="No embeddingInput provided"
|
|
)
|
|
|
|
payload = {"model": model.name, "input": texts}
|
|
response = await self.httpClient.post(model.apiUrl, json=payload)
|
|
|
|
if response.status_code != 200:
|
|
errorMessage = f"OpenAI Embedding API error: {response.status_code} - {response.text}"
|
|
logger.error(errorMessage)
|
|
if response.status_code == 429:
|
|
raise RateLimitExceededException(f"Rate limit exceeded for {model.name}")
|
|
if response.status_code == 400:
|
|
try:
|
|
errorData = response.json()
|
|
errMsg = errorData.get("error", {}).get("message", "").lower()
|
|
errCode = errorData.get("error", {}).get("code", "")
|
|
if errCode == "context_length_exceeded" or "too many tokens" in errMsg or "maximum context length" in errMsg:
|
|
raise ContextLengthExceededException(
|
|
f"Embedding context length exceeded for {model.name}: {errorData.get('error', {}).get('message', '')}"
|
|
)
|
|
except (ValueError, KeyError):
|
|
pass
|
|
raise HTTPException(status_code=500, detail=errorMessage)
|
|
|
|
responseJson = response.json()
|
|
embeddings = [item["embedding"] for item in responseJson["data"]]
|
|
usage = responseJson.get("usage", {})
|
|
|
|
return AiModelResponse(
|
|
content="",
|
|
success=True,
|
|
modelId=model.name,
|
|
tokensUsed={
|
|
"input": usage.get("prompt_tokens", 0),
|
|
"output": 0,
|
|
"total": usage.get("total_tokens", 0),
|
|
},
|
|
metadata={"embeddings": embeddings},
|
|
)
|
|
except (RateLimitExceededException, ContextLengthExceededException):
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"Error calling OpenAI Embedding API: {str(e)}")
|
|
raise HTTPException(status_code=500, detail=f"Error calling OpenAI Embedding API: {str(e)}")
|
|
|
|
async def callAiImage(self, modelCall: AiModelCall) -> AiModelResponse:
|
|
"""
|
|
Analyzes an image with the OpenAI Vision API using standardized pattern.
|
|
|
|
Args:
|
|
modelCall: AiModelCall with messages and image data in options
|
|
|
|
Returns:
|
|
AiModelResponse with analysis content
|
|
"""
|
|
try:
|
|
# Extract parameters from modelCall
|
|
messages = modelCall.messages
|
|
model = modelCall.model
|
|
|
|
# Messages should already be in the correct format with image data embedded
|
|
# Just verify they contain image data
|
|
if not messages or not messages[0].get("content"):
|
|
raise ValueError("No messages provided for image analysis")
|
|
|
|
logger.debug(f"Starting image analysis with {len(messages)} message(s)...")
|
|
|
|
# Use the messages directly - they should already contain the image data
|
|
# in the format: {"type": "image_url", "image_url": {"url": "data:...base64,..."}}
|
|
|
|
temperature = model.temperature
|
|
# Don't set maxTokens - let the model use its full context length
|
|
|
|
payload = {
|
|
"model": model.name,
|
|
"messages": messages,
|
|
}
|
|
if _supportsCustomTemperature(model.name):
|
|
payload["temperature"] = temperature
|
|
|
|
response = await self.httpClient.post(
|
|
model.apiUrl,
|
|
json=payload
|
|
)
|
|
|
|
if response.status_code != 200:
|
|
logger.error(f"OpenAI API error: {response.status_code} - {response.text}")
|
|
raise HTTPException(status_code=500, detail="Error communicating with OpenAI API")
|
|
|
|
responseJson = response.json()
|
|
content = responseJson["choices"][0]["message"]["content"]
|
|
|
|
return AiModelResponse(
|
|
content=content,
|
|
success=True,
|
|
modelId=model.name,
|
|
metadata={"response_id": responseJson.get("id", "")}
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error during image analysis: {str(e)}", exc_info=True)
|
|
return AiModelResponse(
|
|
content="",
|
|
success=False,
|
|
error=f"Error during image analysis: {str(e)}"
|
|
)
|
|
|
|
async def generateImage(self, modelCall: AiModelCall) -> AiModelResponse:
|
|
"""
|
|
Generate an image using DALL-E 3 using standardized pattern.
|
|
|
|
Args:
|
|
modelCall: AiModelCall with messages and generation options
|
|
|
|
Returns:
|
|
AiModelResponse with generated image data
|
|
"""
|
|
try:
|
|
# Extract parameters from modelCall
|
|
messages = modelCall.messages
|
|
model = modelCall.model
|
|
options = modelCall.options
|
|
|
|
# Get prompt from messages
|
|
promptContent = messages[0]["content"] if messages else ""
|
|
|
|
# Parse prompt using AiCallPromptImage model
|
|
import json
|
|
|
|
try:
|
|
# Try to parse as JSON
|
|
promptData = json.loads(promptContent)
|
|
promptModel = AiCallPromptImage(**promptData)
|
|
except:
|
|
# If not JSON, use plain text prompt
|
|
promptModel = AiCallPromptImage(
|
|
prompt=promptContent,
|
|
size=options.size if options and hasattr(options, 'size') else "1024x1024",
|
|
quality=options.quality if options and hasattr(options, 'quality') else "standard",
|
|
style=options.style if options and hasattr(options, 'style') else "vivid"
|
|
)
|
|
|
|
# Extract parameters from Pydantic model
|
|
prompt = promptModel.prompt
|
|
size = promptModel.size or "1024x1024"
|
|
quality = promptModel.quality or "standard"
|
|
style = promptModel.style or "vivid"
|
|
|
|
logger.debug(f"Starting image generation with prompt: '{prompt[:100]}...'")
|
|
|
|
# DALL-E 3 API endpoint
|
|
dalle_url = "https://api.openai.com/v1/images/generations"
|
|
|
|
payload = {
|
|
"model": "dall-e-3",
|
|
"prompt": prompt,
|
|
"size": size,
|
|
"quality": quality,
|
|
"style": style,
|
|
"n": 1,
|
|
"response_format": "b64_json" # Get base64 data directly instead of URLs
|
|
}
|
|
|
|
# Use existing httpClient to benefit from connection pooling
|
|
# This avoids TLS connection issues that can occur with fresh clients
|
|
response = await self.httpClient.post(
|
|
dalle_url,
|
|
json=payload
|
|
)
|
|
|
|
if response.status_code != 200:
|
|
logger.error(f"DALL-E API error: {response.status_code} - {response.text}")
|
|
return AiModelResponse(
|
|
content="",
|
|
success=False,
|
|
error=f"DALL-E API error: {response.status_code} - {response.text}"
|
|
)
|
|
|
|
responseJson = response.json()
|
|
|
|
if "data" in responseJson and len(responseJson["data"]) > 0:
|
|
image_data = responseJson["data"][0]["b64_json"]
|
|
|
|
logger.info(f"Successfully generated image: {len(image_data)} characters")
|
|
return AiModelResponse(
|
|
content=image_data,
|
|
success=True,
|
|
modelId="dall-e-3",
|
|
metadata={
|
|
"size": size,
|
|
"quality": quality,
|
|
"style": style,
|
|
"response_id": responseJson.get("id", "")
|
|
}
|
|
)
|
|
else:
|
|
logger.error("No image data in DALL-E response")
|
|
return AiModelResponse(
|
|
content="",
|
|
success=False,
|
|
error="No image data in DALL-E response"
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error during image generation: {str(e)}", exc_info=True)
|
|
return AiModelResponse(
|
|
content="",
|
|
success=False,
|
|
error=f"Error during image generation: {str(e)}"
|
|
) |