updated price calculations and basis for refactory of dynamic content handling

This commit is contained in:
ValueOn AG 2025-10-22 16:48:32 +02:00
parent 109e77fd60
commit 6b819cc848
9 changed files with 222 additions and 195 deletions

View file

@ -172,4 +172,4 @@ class ModelRegistry:
# Global registry instance # Global registry instance
model_registry = ModelRegistry() modelRegistry = ModelRegistry()

View file

@ -6,7 +6,7 @@ import time
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
from modules.aicore.aicoreModelRegistry import model_registry from modules.aicore.aicoreModelRegistry import modelRegistry
from modules.aicore.aicoreModelSelector import model_selector from modules.aicore.aicoreModelSelector import model_selector
from modules.datamodels.datamodelAi import ( from modules.datamodels.datamodelAi import (
AiModel, AiModel,
@ -42,11 +42,11 @@ class AiObjects:
logger.info("Auto-discovering AI connectors...") logger.info("Auto-discovering AI connectors...")
# Use the model registry's built-in discovery mechanism # Use the model registry's built-in discovery mechanism
discoveredConnectors = model_registry.discoverConnectors() discoveredConnectors = modelRegistry.discoverConnectors()
# Register each discovered connector # Register each discovered connector
for connector in discoveredConnectors: for connector in discoveredConnectors:
model_registry.registerConnector(connector) modelRegistry.registerConnector(connector)
logger.info(f"Registered connector: {connector.getConnectorType()}") logger.info(f"Registered connector: {connector.getConnectorType()}")
logger.info(f"Total connectors registered: {len(discoveredConnectors)}") logger.info(f"Total connectors registered: {len(discoveredConnectors)}")
@ -63,7 +63,7 @@ class AiObjects:
def _selectModel(self, prompt: str, context: str, options: AiCallOptions) -> str: def _selectModel(self, prompt: str, context: str, options: AiCallOptions) -> str:
"""Select the best model using dynamic model selection system.""" """Select the best model using dynamic model selection system."""
# Get available models from the dynamic registry # Get available models from the dynamic registry
availableModels = model_registry.getAvailableModels() availableModels = modelRegistry.getAvailableModels()
if not availableModels: if not availableModels:
logger.error("No models available in the registry") logger.error("No models available in the registry")
@ -109,7 +109,7 @@ class AiObjects:
maxTokens = getattr(options, "maxTokens", None) maxTokens = getattr(options, "maxTokens", None)
# Get fallback models for this operation type # Get fallback models for this operation type
availableModels = model_registry.getAvailableModels() availableModels = modelRegistry.getAvailableModels()
fallbackModels = model_selector.getFallbackModels(prompt, context, options, availableModels) fallbackModels = model_selector.getFallbackModels(prompt, context, options, availableModels)
if not fallbackModels: if not fallbackModels:
@ -188,7 +188,7 @@ class AiObjects:
startTime = time.time() startTime = time.time()
# Get the connector for this model # Get the connector for this model
connector = model_registry.getConnectorForModel(model.name) connector = modelRegistry.getConnectorForModel(model.name)
if not connector: if not connector:
raise ValueError(f"No connector found for model {model.name}") raise ValueError(f"No connector found for model {model.name}")
@ -221,8 +221,8 @@ class AiObjects:
processingTime = endTime - startTime processingTime = endTime - startTime
outputBytes = len(content.encode("utf-8")) outputBytes = len(content.encode("utf-8"))
# Calculate price using model's cost information # Calculate price using model's own price calculation method
priceUsd = model.costPer1kTokensInput * (inputBytes / 4 / 1000) + model.costPer1kTokensOutput * (outputBytes / 4 / 1000) priceUsd = model.calculatePriceUsd(inputBytes, outputBytes)
return AiCallResponse( return AiCallResponse(
content=content, content=content,
@ -244,7 +244,7 @@ class AiObjects:
inputBytes = len(prompt.encode("utf-8")) + len(imageData) if isinstance(imageData, bytes) else len(prompt.encode("utf-8")) + len(str(imageData).encode("utf-8")) inputBytes = len(prompt.encode("utf-8")) + len(imageData) if isinstance(imageData, bytes) else len(prompt.encode("utf-8")) + len(str(imageData).encode("utf-8"))
# Get fallback models for image analysis # Get fallback models for image analysis
availableModels = model_registry.getAvailableModels() availableModels = modelRegistry.getAvailableModels()
fallbackModels = model_selector.getFallbackModels(prompt, "", options, availableModels) fallbackModels = model_selector.getFallbackModels(prompt, "", options, availableModels)
if not fallbackModels: if not fallbackModels:
@ -314,8 +314,8 @@ class AiObjects:
processingTime = endTime - startTime processingTime = endTime - startTime
outputBytes = len(content.encode("utf-8")) outputBytes = len(content.encode("utf-8"))
# Calculate price using model's cost information # Calculate price using model's own price calculation method
priceUsd = model.costPer1kTokensInput * (inputBytes / 4 / 1000) + model.costPer1kTokensOutput * (outputBytes / 4 / 1000) priceUsd = model.calculatePriceUsd(inputBytes, outputBytes)
return AiCallResponse( return AiCallResponse(
content=content, content=content,
@ -339,13 +339,13 @@ class AiObjects:
try: try:
# Select the best model for image generation # Select the best model for image generation
modelName = self._selectModel(prompt, "", options) modelName = self._selectModel(prompt, "", options)
selectedModel = model_registry.getModel(modelName) selectedModel = modelRegistry.getModel(modelName)
if not selectedModel: if not selectedModel:
raise ValueError(f"Selected model {modelName} not found in registry") raise ValueError(f"Selected model {modelName} not found in registry")
# Get the connector for this model # Get the connector for this model
connector = model_registry.getConnectorForModel(modelName) connector = modelRegistry.getConnectorForModel(modelName)
if not connector: if not connector:
raise ValueError(f"No connector found for model {modelName}") raise ValueError(f"No connector found for model {modelName}")
@ -364,9 +364,8 @@ class AiObjects:
processingTime = endTime - startTime processingTime = endTime - startTime
outputBytes = len(content.encode("utf-8")) outputBytes = len(content.encode("utf-8"))
# Calculate price using model's cost information # Calculate price using model's own price calculation method
estimatedTokens = inputBytes / 4 priceUsd = selectedModel.calculatePriceUsd(inputBytes, outputBytes)
priceUsd = (estimatedTokens / 1000) * selectedModel.costPer1kTokensInput + (outputBytes / 4 / 1000) * selectedModel.costPer1kTokensOutput
logger.info(f"✅ Image generation successful with model: {modelName}") logger.info(f"✅ Image generation successful with model: {modelName}")
return AiCallResponse( return AiCallResponse(
@ -401,7 +400,7 @@ class AiObjects:
**kwargs **kwargs
) )
# Get Tavily connector from registry # Get Tavily connector from registry
tavilyConnector = model_registry.getConnectorForModel("tavily_search") tavilyConnector = modelRegistry.getConnectorForModel("tavily_search")
if not tavilyConnector: if not tavilyConnector:
raise ValueError("Tavily connector not available") raise ValueError("Tavily connector not available")
result = await tavilyConnector.search(request) result = await tavilyConnector.search(request)
@ -440,7 +439,7 @@ class AiObjects:
format=format format=format
) )
# Get Tavily connector from registry # Get Tavily connector from registry
tavilyConnector = model_registry.getConnectorForModel("tavily_crawl") tavilyConnector = modelRegistry.getConnectorForModel("tavily_crawl")
if not tavilyConnector: if not tavilyConnector:
raise ValueError("Tavily connector not available") raise ValueError("Tavily connector not available")
result = await tavilyConnector.crawl(request) result = await tavilyConnector.crawl(request)
@ -792,7 +791,7 @@ Format your response in a clear, professional manner that would be helpful for s
startTime = time.time() startTime = time.time()
# Use Perplexity for web research with search capabilities # Use Perplexity for web research with search capabilities
perplexity_connector = model_registry.getConnectorForModel("perplexity_callAiWithWebSearch") perplexity_connector = modelRegistry.getConnectorForModel("perplexity_callAiWithWebSearch")
if not perplexity_connector: if not perplexity_connector:
raise ValueError("Perplexity connector not available") raise ValueError("Perplexity connector not available")
response = await perplexity_connector.callAiWithWebSearch(webPrompt) response = await perplexity_connector.callAiWithWebSearch(webPrompt)
@ -803,12 +802,8 @@ Format your response in a clear, professional manner that would be helpful for s
outputBytes = len(response.encode("utf-8")) outputBytes = len(response.encode("utf-8"))
# Calculate price using Perplexity model pricing # Calculate price using Perplexity model pricing
perplexity_model = model_registry.getModel("perplexity_callAiWithWebSearch") perplexity_model = modelRegistry.getModel("perplexity_callAiWithWebSearch")
if perplexity_model: priceUsd = perplexity_model.calculatePriceUsd(inputBytes, outputBytes)
estimated_tokens = inputBytes / 4
priceUsd = (estimated_tokens / 1000) * perplexity_model.costPer1kTokensInput + (outputBytes / 4 / 1000) * perplexity_model.costPer1kTokensOutput
else:
priceUsd = 0.0
logger.info(f"✅ Web query successful with Perplexity") logger.info(f"✅ Web query successful with Perplexity")
return AiCallResponse( return AiCallResponse(
@ -835,26 +830,26 @@ Format your response in a clear, professional manner that would be helpful for s
# Utility methods # Utility methods
async def listAvailableModels(self, connectorType: str = None) -> List[Dict[str, Any]]: async def listAvailableModels(self, connectorType: str = None) -> List[Dict[str, Any]]:
"""List available models, optionally filtered by connector type.""" """List available models, optionally filtered by connector type."""
models = model_registry.getAvailableModels() models = modelRegistry.getAvailableModels()
if connectorType: if connectorType:
return [model.dict() for model in models if model.connectorType == connectorType] return [model.dict() for model in models if model.connectorType == connectorType]
return [model.dict() for model in models] return [model.dict() for model in models]
async def getModelInfo(self, modelName: str) -> Dict[str, Any]: async def getModelInfo(self, modelName: str) -> Dict[str, Any]:
"""Get information about a specific model.""" """Get information about a specific model."""
model = model_registry.getModel(modelName) model = modelRegistry.getModel(modelName)
if not model: if not model:
raise ValueError(f"Model {modelName} not found") raise ValueError(f"Model {modelName} not found")
return model.dict() return model.dict()
async def getModelsByCapability(self, capability: str) -> List[str]: async def getModelsByCapability(self, capability: str) -> List[str]:
"""Get model names that support a specific capability.""" """Get model names that support a specific capability."""
models = model_registry.getModelsByCapability(capability) models = modelRegistry.getModelsByCapability(capability)
return [model.name for model in models] return [model.name for model in models]
async def getModelsByTag(self, tag: str) -> List[str]: async def getModelsByTag(self, tag: str) -> List[str]:
"""Get model names that have a specific tag.""" """Get model names that have a specific tag."""
models = model_registry.getModelsByTag(tag) models = modelRegistry.getModelsByTag(tag)
return [model.name for model in models] return [model.name for model in models]
async def selectRelevantWebsites(self, websites: List[str], userQuestion: str) -> Tuple[List[str], str]: async def selectRelevantWebsites(self, websites: List[str], userQuestion: str) -> Tuple[List[str], str]:

View file

@ -547,81 +547,3 @@ CRITICAL REQUIREMENTS:
except Exception as e: except Exception as e:
logger.error(f"Error in AI image generation: {str(e)}") logger.error(f"Error in AI image generation: {str(e)}")
return {"success": False, "error": str(e)} return {"success": False, "error": str(e)}
def _getModelCapabilitiesForContent(self, prompt: str, documents: Optional[List[ChatDocument]], options: AiCallOptions) -> Dict[str, int]:
"""
Get model capabilities for content processing, including appropriate size limits for chunking.
"""
# Estimate total content size
prompt_size = len(prompt.encode('utf-8'))
document_size = 0
if documents:
# Rough estimate of document content size
for doc in documents:
document_size += doc.fileSize or 0
total_size = prompt_size + document_size
# Use AiObjects to select the best model for this content size
# We'll simulate the model selection by checking available models
from modules.interfaces.interfaceAiObjects import aiModels
# Find the best model for this content size and operation
best_model = None
best_context_length = 0
for model_name, model_info in aiModels.items():
context_length = model_info.get("contextLength", 0)
# Skip models with no context length or too small for content
if context_length == 0:
continue
# Check if model supports the operation type
capabilities = model_info.get("capabilities", [])
if options.operationType == OperationTypeEnum.IMAGE_ANALYSE and "imageAnalyse" not in capabilities:
continue
elif options.operationType == OperationTypeEnum.IMAGE_GENERATE and "imageGenerate" not in capabilities:
continue
elif options.operationType == OperationTypeEnum.WEB_RESEARCH and "web_search" not in capabilities:
continue
elif "text_generation" not in capabilities:
continue
# Prefer models that can handle the content without chunking, but allow chunking if needed
if context_length >= total_size * 0.8: # 80% of content size
if context_length > best_context_length:
best_model = model_info
best_context_length = context_length
elif best_model is None: # Fallback to largest available model
if context_length > best_context_length:
best_model = model_info
best_context_length = context_length
# Fallback to a reasonable default if no model found
if best_model is None:
best_model = {
"contextLength": 128000, # GPT-4o default
"llmName": "gpt-4o"
}
# Calculate appropriate sizes
# Convert tokens to bytes (rough estimate: 1 token ≈ 4 characters)
context_length_bytes = int(best_model["contextLength"] * 4)
max_context_bytes = int(context_length_bytes * 0.9) # 90% of context length
text_chunk_size = int(max_context_bytes * 0.7) # 70% of max context for text chunks
image_chunk_size = int(max_context_bytes * 0.8) # 80% of max context for image chunks
logger.debug(f"Selected model: {best_model.get('llmName', 'unknown')} with context length: {best_model['contextLength']}")
logger.debug(f"Content size: {total_size} bytes, Max context: {max_context_bytes} bytes")
logger.debug(f"Text chunk size: {text_chunk_size} bytes, Image chunk size: {image_chunk_size} bytes")
return {
"maxContextBytes": max_context_bytes,
"textChunkSize": text_chunk_size,
"imageChunkSize": image_chunk_size
}

View file

@ -503,8 +503,6 @@ CONTINUATION INSTRUCTIONS:
Handle text calls with document processing through ExtractionService. Handle text calls with document processing through ExtractionService.
UNIFIED PROCESSING: Always use per-chunk processing for consistency. UNIFIED PROCESSING: Always use per-chunk processing for consistency.
""" """
# UNIFIED PROCESSING: Always use per-chunk processing for consistency
# This ensures MIME-type checking, chunk mapping, and parallel processing
return await self.processDocumentsPerChunk(documents, prompt, options) return await self.processDocumentsPerChunk(documents, prompt, options)
async def _processChunksWithMapping( async def _processChunksWithMapping(
@ -1194,6 +1192,7 @@ CONTINUATION INSTRUCTIONS:
def _getModelCapabilitiesForContent(self, prompt: str, documents: Optional[List[ChatDocument]], options: AiCallOptions) -> Dict[str, int]: def _getModelCapabilitiesForContent(self, prompt: str, documents: Optional[List[ChatDocument]], options: AiCallOptions) -> Dict[str, int]:
""" """
Get model capabilities for content processing, including appropriate size limits for chunking. Get model capabilities for content processing, including appropriate size limits for chunking.
Uses centralized model selection to determine chunking parameters.
""" """
# Estimate total content size # Estimate total content size
prompt_size = len(prompt.encode('utf-8')) prompt_size = len(prompt.encode('utf-8'))
@ -1205,57 +1204,38 @@ CONTINUATION INSTRUCTIONS:
total_size = prompt_size + document_size total_size = prompt_size + document_size
# Use AiObjects to select the best model for this content size # Use centralized model selection to get the best model for chunking parameters
# We'll simulate the model selection by checking available models try:
from modules.interfaces.interfaceAiObjects import aiModels from modules.aicore.aicoreModelRegistry import modelRegistry
from modules.aicore.aicoreModelSelector import model_selector
# Find the best model for this content size and operation # Get available models and select the best one for this operation
best_model = None availableModels = modelRegistry.getAvailableModels()
best_context_length = 0 selectedModel = model_selector.selectModel(prompt, "", options, availableModels)
for model_name, model_info in aiModels.items(): if selectedModel:
context_length = model_info.get("contextLength", 0) context_length = selectedModel.contextLength
model_name = selectedModel.name
logger.debug(f"Selected model for chunking: {model_name} with context length: {context_length}")
else:
# Fallback to conservative default if no model selected
context_length = 128000 # GPT-4o default
model_name = "fallback"
logger.warning(f"No model selected for chunking, using fallback context length: {context_length}")
# Skip models with no context length or too small for content except Exception as e:
if context_length == 0: # Fallback to conservative default if model selection fails
continue context_length = 128000 # GPT-4o default
model_name = "fallback"
# Check if model supports the operation type logger.error(f"Model selection failed for chunking: {e}, using fallback context length: {context_length}")
capabilities = model_info.get("capabilities", [])
if options.operationType == OperationTypeEnum.IMAGE_ANALYSE and "imageAnalyse" not in capabilities:
continue
elif options.operationType == OperationTypeEnum.IMAGE_GENERATE and "imageGenerate" not in capabilities:
continue
elif options.operationType == OperationTypeEnum.WEB_RESEARCH and "web_search" not in capabilities:
continue
elif "text_generation" not in capabilities:
continue
# Prefer models that can handle the content without chunking, but allow chunking if needed
if context_length >= total_size * 0.8: # 80% of content size
if context_length > best_context_length:
best_model = model_info
best_context_length = context_length
elif best_model is None: # Fallback to largest available model
if context_length > best_context_length:
best_model = model_info
best_context_length = context_length
# Fallback to a reasonable default if no model found
if best_model is None:
best_model = {
"contextLength": 128000, # GPT-4o default
"llmName": "gpt-4o"
}
# Calculate appropriate sizes # Calculate appropriate sizes
# Convert tokens to bytes (rough estimate: 1 token ≈ 4 characters) # Convert tokens to bytes (rough estimate: 1 token ≈ 4 characters)
context_length_bytes = int(best_model["contextLength"] * 4) context_length_bytes = int(context_length * 4)
max_context_bytes = int(context_length_bytes * 0.9) # 90% of context length max_context_bytes = int(context_length_bytes * 0.9) # 90% of context length
text_chunk_size = int(max_context_bytes * 0.7) # 70% of max context for text chunks text_chunk_size = int(max_context_bytes * 0.7) # 70% of max context for text chunks
image_chunk_size = int(max_context_bytes * 0.8) # 80% of max context for image chunks image_chunk_size = int(max_context_bytes * 0.8) # 80% of max context for image chunks
logger.debug(f"Selected model: {best_model.get('llmName', 'unknown')} with context length: {best_model['contextLength']}")
logger.debug(f"Content size: {total_size} bytes, Max context: {max_context_bytes} bytes") logger.debug(f"Content size: {total_size} bytes, Max context: {max_context_bytes} bytes")
logger.debug(f"Text chunk size: {text_chunk_size} bytes, Image chunk size: {image_chunk_size} bytes") logger.debug(f"Text chunk size: {text_chunk_size} bytes, Image chunk size: {image_chunk_size} bytes")

View file

@ -138,7 +138,7 @@ class ImageChunker(Chunker):
return chunks return chunks
def _tileImage(self, image: "Image.Image", maxBytes: int, tileSize: int, quality: int, originalPixels: int) -> List[Dict[str, Any]]: def _tileImage(self, image: Any, maxBytes: int, tileSize: int, quality: int, originalPixels: int) -> List[Dict[str, Any]]:
"""Split image into tiles if it's still too large after compression.""" """Split image into tiles if it's still too large after compression."""
chunks = [] chunks = []
width, height = image.size width, height = image.size

View file

@ -4,11 +4,11 @@ import logging
import time import time
from .subRegistry import ExtractorRegistry, ChunkerRegistry from .subRegistry import ExtractorRegistry, ChunkerRegistry
from .subPipeline import runExtraction, poolAndLimit, applyAiIfRequested from .subPipeline import runExtraction
from modules.datamodels.datamodelExtraction import ContentExtracted, ContentPart, MergeStrategy from modules.datamodels.datamodelExtraction import ContentExtracted, ContentPart, MergeStrategy
from modules.datamodels.datamodelChat import ChatDocument from modules.datamodels.datamodelChat import ChatDocument
from modules.datamodels.datamodelAi import AiCallResponse from modules.datamodels.datamodelAi import AiCallResponse
from modules.interfaces.interfaceAiObjects import aiModels from modules.aicore.aicoreModelRegistry import modelRegistry
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -91,8 +91,6 @@ class ExtractionService:
else: else:
logger.debug(f"No chunking needed - {len(ec.parts)} parts fit within size limits") logger.debug(f"No chunking needed - {len(ec.parts)} parts fit within size limits")
ec = applyAiIfRequested(ec, options)
# Calculate timing and emit stats # Calculate timing and emit stats
endTime = time.time() endTime = time.time()
processingTime = endTime - startTime processingTime = endTime - startTime
@ -103,7 +101,8 @@ class ExtractionService:
# Use internal extraction model for pricing # Use internal extraction model for pricing
modelName = "internal_extraction" modelName = "internal_extraction"
priceUsd = aiModels[modelName]["calculatePriceUsd"](processingTime, bytesSent, bytesReceived) model = modelRegistry.getModel(modelName)
priceUsd = model.calculatePriceUsd(processingTime, bytesSent, bytesReceived)
# Create AiCallResponse with real calculation # Create AiCallResponse with real calculation
aiResponse = AiCallResponse( aiResponse = AiCallResponse(

View file

@ -298,37 +298,3 @@ def _applySizeLimit(parts: List[ContentPart], maxSize: int) -> List[ContentPart]
return kept return kept
def applyAiIfRequested(extracted: ContentExtracted, options: Dict[str, Any]) -> ContentExtracted:
"""
Apply AI processing if requested in options.
This is a placeholder for actual AI integration.
"""
prompt = options.get("prompt")
operationType = options.get("operationType", "general")
if not prompt:
return extracted
# Placeholder AI processing based on operationType
if operationType == "analyse":
# Add analysis metadata to parts
for part in extracted.parts:
if part.typeGroup in ("text", "table", "structure"):
part.metadata["ai_processed"] = True
part.metadata["operation_type"] = operationType
elif operationType == "plan":
# Add plan generation metadata
for part in extracted.parts:
if part.typeGroup == "text":
part.metadata["ai_processed"] = True
part.metadata["operation_type"] = operationType
elif operationType == "generate":
# Add content generation metadata
for part in extracted.parts:
part.metadata["ai_processed"] = True
part.metadata["operation_type"] = operationType
return extracted

View file

@ -4,7 +4,7 @@ import time
from typing import Any, Dict, List, Optional, Union, Tuple from typing import Any, Dict, List, Optional, Union, Tuple
from modules.datamodels.datamodelChat import ChatDocument from modules.datamodels.datamodelChat import ChatDocument
from modules.datamodels.datamodelAi import AiCallResponse from modules.datamodels.datamodelAi import AiCallResponse
from modules.interfaces.interfaceAiObjects import aiModels from modules.aicore.aicoreModelRegistry import modelRegistry
from modules.services.serviceGeneration.subDocumentUtility import ( from modules.services.serviceGeneration.subDocumentUtility import (
getFileExtension, getFileExtension,
getMimeTypeFromExtension, getMimeTypeFromExtension,
@ -430,7 +430,8 @@ class GenerationService:
# Use internal generation model for pricing # Use internal generation model for pricing
modelName = "internal_generation" modelName = "internal_generation"
priceUsd = aiModels[modelName]["calculatePriceUsd"](processingTime, 0, bytesReceived) model = modelRegistry.getModel(modelName)
priceUsd = model.calculatePriceUsd(processingTime, 0, bytesReceived)
aiResponse = AiCallResponse( aiResponse = AiCallResponse(
content="", # No content for generation stats needed content="", # No content for generation stats needed
@ -457,7 +458,8 @@ class GenerationService:
# Use internal generation model for pricing # Use internal generation model for pricing
modelName = "internal_generation" modelName = "internal_generation"
priceUsd = aiModels[modelName]["calculatePriceUsd"](processingTime, 0, 0) model = modelRegistry.getModel(modelName)
priceUsd = model.calculatePriceUsd(processingTime, 0, 0)
aiResponse = AiCallResponse( aiResponse = AiCallResponse(
content="", # No content for generation stats needed content="", # No content for generation stats needed

163
test_ai_model_selection.py Normal file
View file

@ -0,0 +1,163 @@
#!/usr/bin/env python3
"""
AI Model Selection Test - Prints prioritized fallback model lists used for AI calls
Scenarios mirror typical calls in workflows/ (task planning, action planning,
analysis, and react-mode decisions), showing which models are shortlisted and
their final prioritized order after rating and cost tie-breaking.
"""
import asyncio
import os
import sys
from typing import List, Tuple
# Ensure gateway is on path when running directly
sys.path.append(os.path.dirname(__file__))
from modules.features.chatPlayground.mainChatPlayground import getServices
from modules.datamodels.datamodelAi import (
AiCallOptions,
OperationTypeEnum,
PriorityEnum,
ProcessingModeEnum,
)
from modules.datamodels.datamodelUam import User
from modules.aicore.aicoreModelRegistry import modelRegistry
from modules.aicore.aicoreModelSelector import model_selector
class ModelSelectionTester:
def __init__(self) -> None:
testUser = User(
id="test_user_models",
username="test_models",
email="test@example.com",
fullName="Test Models",
language="en",
mandateId="test_mandate",
)
self.services = getServices(testUser, None)
async def initialize(self) -> None:
from modules.services.serviceAi.mainServiceAi import AiService
self.services.ai = await AiService.create(self.services)
async def _printFallbackList(self, title: str, prompt: str, options: AiCallOptions) -> None:
print(f"\n{'='*80}")
print(f"{title}")
print(f"{'='*80}")
print(
f"Operation={options.operationType.name}, Priority={options.priority.name}, ProcessingMode={options.processingMode.name}"
)
availableModels = modelRegistry.getAvailableModels()
fallbackModels = model_selector.getFallbackModels(
prompt=prompt,
context="",
options=options,
availableModels=availableModels,
)
if not fallbackModels:
print("No suitable models found (capability filter returned empty list).")
return
print("Prioritized fallback model sequence (name | quality | speed | $/1k in | ctx):")
for idx, m in enumerate(fallbackModels, 1):
costIn = getattr(m, "costPer1kTokensInput", 0.0)
print(
f" {idx:>2}. {m.name} | Q={getattr(m, 'qualityRating', 0)} | S={getattr(m, 'speedRating', 0)} | ${costIn:.4f} | ctx={getattr(m, 'contextLength', 0)}"
)
async def run(self) -> None:
# Scenarios reflecting workflows/
scenarios: List[Tuple[str, str, AiCallOptions]] = []
# Task planning (taskPlanner, modeActionplan)
scenarios.append(
(
"PLAN - Quality, Detailed",
"Task planning for a multi-step business workflow.",
AiCallOptions(
operationType=OperationTypeEnum.PLAN,
priority=PriorityEnum.QUALITY,
compressPrompt=False,
compressContext=False,
processingMode=ProcessingModeEnum.DETAILED,
maxCost=0.10,
maxProcessingTime=30,
),
)
)
# Result validation / analysis (modeActionplan)
scenarios.append(
(
"ANALYSE - Balanced, Advanced",
"Validate action plan correctness and completeness.",
AiCallOptions(
operationType=OperationTypeEnum.ANALYSE,
priority=PriorityEnum.BALANCED,
compressPrompt=True,
compressContext=False,
processingMode=ProcessingModeEnum.ADVANCED,
maxCost=0.05,
maxProcessingTime=30,
),
)
)
# React mode - action selection (modeReact)
scenarios.append(
(
"GENERAL - Balanced, Advanced (React: action selection)",
"Select next best action from context and state.",
AiCallOptions(
operationType=OperationTypeEnum.GENERAL,
priority=PriorityEnum.BALANCED,
compressPrompt=True,
compressContext=True,
processingMode=ProcessingModeEnum.ADVANCED,
maxCost=0.03,
maxProcessingTime=20,
),
)
)
# React mode - parameter suggestion (modeReact example)
scenarios.append(
(
"ANALYSE - Balanced, Advanced (React: parameter suggestion)",
"Suggest parameters for the selected action as JSON.",
AiCallOptions(
operationType=OperationTypeEnum.ANALYSE,
priority=PriorityEnum.BALANCED,
compressPrompt=True,
compressContext=False,
processingMode=ProcessingModeEnum.ADVANCED,
maxCost=0.05,
maxProcessingTime=30,
resultFormat="json",
temperature=0.3,
),
)
)
# Iterate and print lists
for title, prompt, options in scenarios:
await self._printFallbackList(title, prompt, options)
async def main() -> None:
tester = ModelSelectionTester()
await tester.initialize()
await tester.run()
if __name__ == "__main__":
asyncio.run(main())