gateway/modules/interfaces/interfaceAiObjects.py
2025-09-25 16:59:44 +02:00

117 lines
4.6 KiB
Python

import logging
from typing import Dict, Any, List
from modules.connectors.connectorAiOpenai import AiOpenai
from modules.connectors.connectorAiAnthropic import AiAnthropic
from modules.datamodels.datamodelAi import AiCallOptions, AiCallRequest, AiCallResponse
logger = logging.getLogger(__name__)
# Local model registry (connectors specification) belongs in interface layer, not service
aiModels: Dict[str, Dict[str, Any]] = {
"openai_gpt4o": {
"connector": "openai",
"contextLength": 128000,
"costPer1kTokens": 0.03,
"costPer1kTokensOutput": 0.06,
"speedRating": 8,
"qualityRating": 9,
},
"openai_gpt35": {
"connector": "openai",
"contextLength": 16000,
"costPer1kTokens": 0.0015,
"costPer1kTokensOutput": 0.002,
"speedRating": 9,
"qualityRating": 7,
},
"anthropic_claude": {
"connector": "anthropic",
"contextLength": 200000,
"costPer1kTokens": 0.015,
"costPer1kTokensOutput": 0.075,
"speedRating": 7,
"qualityRating": 10,
},
}
class AiObjects:
"""Centralized AI interface: selects model and calls connector. No document handling."""
def __init__(self):
self.openaiService = AiOpenai()
self.anthropicService = AiAnthropic()
def _estimateCost(self, modelInfo: Dict[str, Any], contentSize: int) -> float:
estimatedTokens = contentSize / 4
inputCost = (estimatedTokens / 1000) * modelInfo["costPer1kTokens"]
outputCost = (estimatedTokens / 1000) * modelInfo["costPer1kTokensOutput"] * 0.1
return inputCost + outputCost
def _selectModel(self, prompt: str, context: str, options: AiCallOptions) -> str:
totalSize = len(prompt.encode("utf-8")) + len(context.encode("utf-8"))
candidates: Dict[str, Dict[str, Any]] = {}
for name, info in aiModels.items():
if totalSize > info["contextLength"] * 0.8:
continue
if options.maxCost is not None:
if self._estimateCost(info, totalSize) > options.maxCost:
continue
candidates[name] = info
if not candidates:
return "openai_gpt35"
if options.priority == "speed":
return max(candidates, key=lambda k: candidates[k]["speedRating"])
if options.priority == "quality":
return max(candidates, key=lambda k: candidates[k]["qualityRating"])
if options.priority == "cost":
return min(candidates, key=lambda k: candidates[k]["costPer1kTokens"])
def balancedScore(name: str) -> float:
info = candidates[name]
return info["qualityRating"] * 0.4 + info["speedRating"] * 0.3 + (10 - info["costPer1kTokens"] * 1000) * 0.3
return max(candidates, key=balancedScore)
def _connectorFor(self, modelName: str):
return self.openaiService if aiModels[modelName]["connector"] == "openai" else self.anthropicService
async def call(self, request: AiCallRequest) -> AiCallResponse:
prompt = request.prompt
context = request.context or ""
options = request.options
# Compress optionally (prompt/context) - simple truncation fallback kept here
def maybeTruncate(text: str, limit: int) -> str:
data = text.encode("utf-8")
if len(data) <= limit:
return text
return data[:limit].decode("utf-8", errors="ignore") + "... [truncated]"
if options.compressPrompt and len(prompt.encode("utf-8")) > 2000:
prompt = maybeTruncate(prompt, 2000)
if options.compressContext and len(context.encode("utf-8")) > 70000:
context = maybeTruncate(context, 70000)
modelName = self._selectModel(prompt, context, options)
messages: List[Dict[str, Any]] = []
if context:
messages.append({"role": "system", "content": f"Context from documents:\n{context}"})
messages.append({"role": "user", "content": prompt})
connector = self._connectorFor(modelName)
if aiModels[modelName]["connector"] == "openai":
content = await connector.callAiBasic(messages)
else:
response = await connector.callAiBasic(messages)
content = response["choices"][0]["message"]["content"]
# Estimate cost/tokens
totalSize = len((prompt + context).encode("utf-8"))
cost = self._estimateCost(aiModels[modelName], totalSize)
usedTokens = int(totalSize / 4)
return AiCallResponse(content=content, modelName=modelName, usedTokens=usedTokens, costEstimate=cost)