117 lines
4.6 KiB
Python
117 lines
4.6 KiB
Python
import logging
|
|
from typing import Dict, Any, List
|
|
|
|
from modules.connectors.connectorAiOpenai import AiOpenai
|
|
from modules.connectors.connectorAiAnthropic import AiAnthropic
|
|
from modules.datamodels.datamodelAi import AiCallOptions, AiCallRequest, AiCallResponse
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
# Local model registry (connectors specification) belongs in interface layer, not service
|
|
aiModels: Dict[str, Dict[str, Any]] = {
|
|
"openai_gpt4o": {
|
|
"connector": "openai",
|
|
"contextLength": 128000,
|
|
"costPer1kTokens": 0.03,
|
|
"costPer1kTokensOutput": 0.06,
|
|
"speedRating": 8,
|
|
"qualityRating": 9,
|
|
},
|
|
"openai_gpt35": {
|
|
"connector": "openai",
|
|
"contextLength": 16000,
|
|
"costPer1kTokens": 0.0015,
|
|
"costPer1kTokensOutput": 0.002,
|
|
"speedRating": 9,
|
|
"qualityRating": 7,
|
|
},
|
|
"anthropic_claude": {
|
|
"connector": "anthropic",
|
|
"contextLength": 200000,
|
|
"costPer1kTokens": 0.015,
|
|
"costPer1kTokensOutput": 0.075,
|
|
"speedRating": 7,
|
|
"qualityRating": 10,
|
|
},
|
|
}
|
|
|
|
|
|
class AiObjects:
|
|
"""Centralized AI interface: selects model and calls connector. No document handling."""
|
|
|
|
def __init__(self):
|
|
self.openaiService = AiOpenai()
|
|
self.anthropicService = AiAnthropic()
|
|
|
|
def _estimateCost(self, modelInfo: Dict[str, Any], contentSize: int) -> float:
|
|
estimatedTokens = contentSize / 4
|
|
inputCost = (estimatedTokens / 1000) * modelInfo["costPer1kTokens"]
|
|
outputCost = (estimatedTokens / 1000) * modelInfo["costPer1kTokensOutput"] * 0.1
|
|
return inputCost + outputCost
|
|
|
|
def _selectModel(self, prompt: str, context: str, options: AiCallOptions) -> str:
|
|
totalSize = len(prompt.encode("utf-8")) + len(context.encode("utf-8"))
|
|
candidates: Dict[str, Dict[str, Any]] = {}
|
|
for name, info in aiModels.items():
|
|
if totalSize > info["contextLength"] * 0.8:
|
|
continue
|
|
if options.maxCost is not None:
|
|
if self._estimateCost(info, totalSize) > options.maxCost:
|
|
continue
|
|
candidates[name] = info
|
|
if not candidates:
|
|
return "openai_gpt35"
|
|
if options.priority == "speed":
|
|
return max(candidates, key=lambda k: candidates[k]["speedRating"])
|
|
if options.priority == "quality":
|
|
return max(candidates, key=lambda k: candidates[k]["qualityRating"])
|
|
if options.priority == "cost":
|
|
return min(candidates, key=lambda k: candidates[k]["costPer1kTokens"])
|
|
def balancedScore(name: str) -> float:
|
|
info = candidates[name]
|
|
return info["qualityRating"] * 0.4 + info["speedRating"] * 0.3 + (10 - info["costPer1kTokens"] * 1000) * 0.3
|
|
return max(candidates, key=balancedScore)
|
|
|
|
def _connectorFor(self, modelName: str):
|
|
return self.openaiService if aiModels[modelName]["connector"] == "openai" else self.anthropicService
|
|
|
|
async def call(self, request: AiCallRequest) -> AiCallResponse:
|
|
prompt = request.prompt
|
|
context = request.context or ""
|
|
options = request.options
|
|
|
|
# Compress optionally (prompt/context) - simple truncation fallback kept here
|
|
def maybeTruncate(text: str, limit: int) -> str:
|
|
data = text.encode("utf-8")
|
|
if len(data) <= limit:
|
|
return text
|
|
return data[:limit].decode("utf-8", errors="ignore") + "... [truncated]"
|
|
|
|
if options.compressPrompt and len(prompt.encode("utf-8")) > 2000:
|
|
prompt = maybeTruncate(prompt, 2000)
|
|
if options.compressContext and len(context.encode("utf-8")) > 70000:
|
|
context = maybeTruncate(context, 70000)
|
|
|
|
modelName = self._selectModel(prompt, context, options)
|
|
|
|
messages: List[Dict[str, Any]] = []
|
|
if context:
|
|
messages.append({"role": "system", "content": f"Context from documents:\n{context}"})
|
|
messages.append({"role": "user", "content": prompt})
|
|
|
|
connector = self._connectorFor(modelName)
|
|
if aiModels[modelName]["connector"] == "openai":
|
|
content = await connector.callAiBasic(messages)
|
|
else:
|
|
response = await connector.callAiBasic(messages)
|
|
content = response["choices"][0]["message"]["content"]
|
|
|
|
# Estimate cost/tokens
|
|
totalSize = len((prompt + context).encode("utf-8"))
|
|
cost = self._estimateCost(aiModels[modelName], totalSize)
|
|
usedTokens = int(totalSize / 4)
|
|
|
|
return AiCallResponse(content=content, modelName=modelName, usedTokens=usedTokens, costEstimate=cost)
|
|
|