310 lines
13 KiB
Python
310 lines
13 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
AI Model Selection Test - Prints prioritized fallback model lists used for AI calls
|
|
|
|
Scenarios mirror typical calls in workflows/ (task planning, action planning,
|
|
analysis, and react-mode decisions), showing which models are shortlisted and
|
|
their final prioritized order after rating and cost tie-breaking.
|
|
"""
|
|
|
|
import asyncio
|
|
import os
|
|
import sys
|
|
from typing import List, Tuple
|
|
|
|
|
|
# Ensure gateway is on path when running directly
|
|
sys.path.append(os.path.dirname(__file__))
|
|
|
|
from modules.features.chatPlayground.mainChatPlayground import getServices
|
|
from modules.datamodels.datamodelAi import (
|
|
AiCallOptions,
|
|
OperationTypeEnum,
|
|
PriorityEnum,
|
|
ProcessingModeEnum,
|
|
)
|
|
from modules.datamodels.datamodelUam import User
|
|
from modules.aicore.aicoreModelRegistry import modelRegistry
|
|
from modules.aicore.aicoreModelSelector import modelSelector
|
|
|
|
|
|
class ModelSelectionTester:
|
|
def __init__(self) -> None:
|
|
testUser = User(
|
|
id="test_user_models",
|
|
username="test_models",
|
|
email="test@example.com",
|
|
fullName="Test Models",
|
|
language="en",
|
|
mandateId="test_mandate",
|
|
)
|
|
self.services = getServices(testUser, None)
|
|
|
|
async def initialize(self) -> None:
|
|
from modules.services.serviceAi.mainServiceAi import AiService
|
|
|
|
self.services.ai = await AiService.create(self.services)
|
|
|
|
async def _printFallbackListWithContext(self, title: str, prompt: str, context: str, options: AiCallOptions) -> None:
|
|
print(f"\n{'='*80}")
|
|
print(f"{title}")
|
|
print(f"{'='*80}")
|
|
print(
|
|
f"Operation={options.operationType.name}, Priority={options.priority.name}, ProcessingMode={options.processingMode.name}"
|
|
)
|
|
|
|
# Show context and prompt sizes
|
|
promptSize = len(prompt.encode("utf-8"))
|
|
contextSize = len(context.encode("utf-8"))
|
|
totalSize = promptSize + contextSize
|
|
print(f"Prompt size: {promptSize} bytes, Context size: {contextSize} bytes, Total: {totalSize} bytes")
|
|
|
|
availableModels = modelRegistry.getAvailableModels()
|
|
failoverModelList = modelSelector.getFailoverModelList(
|
|
prompt=prompt,
|
|
context=context,
|
|
options=options,
|
|
availableModels=availableModels,
|
|
)
|
|
|
|
if not failoverModelList:
|
|
print("No suitable models found (capability filter returned empty list).")
|
|
return
|
|
|
|
print("Prioritized fallback model sequence (name | quality | speed | $/1k in | ctx | score):")
|
|
for idx, m in enumerate(failoverModelList, 1):
|
|
costIn = getattr(m, "costPer1kTokensInput", 0.0)
|
|
# Calculate detailed score breakdown
|
|
promptSize = len(prompt.encode("utf-8"))
|
|
contextSize = len(context.encode("utf-8"))
|
|
totalSize = promptSize + contextSize
|
|
|
|
# Get detailed scoring
|
|
sizeRating = modelSelector._getSizeRating(m, totalSize)
|
|
processingModeRating = modelSelector._getProcessingModeRating(m.processingMode, options.processingMode)
|
|
priorityRating = modelSelector._getPriorityRating(m, options.priority)
|
|
totalScore = sizeRating + processingModeRating + priorityRating
|
|
|
|
print(
|
|
f" {idx:>2}. {m.name} | Q={getattr(m, 'qualityRating', 0)} | S={getattr(m, 'speedRating', 0)} | ${costIn:.4f} | ctx={getattr(m, 'contextLength', 0)} | score={totalScore:.3f}"
|
|
)
|
|
print(f" Size: {sizeRating:.3f}, ProcessingMode: {processingModeRating:.3f}, Priority: {priorityRating:.3f}")
|
|
|
|
async def _printFallbackList(self, title: str, prompt: str, options: AiCallOptions) -> None:
|
|
print(f"\n{'='*80}")
|
|
print(f"{title}")
|
|
print(f"{'='*80}")
|
|
print(
|
|
f"Operation={options.operationType.name}, Priority={options.priority.name}, ProcessingMode={options.processingMode.name}"
|
|
)
|
|
|
|
# Show context and prompt sizes
|
|
context = "" # Currently using empty context
|
|
promptSize = len(prompt.encode("utf-8"))
|
|
contextSize = len(context.encode("utf-8"))
|
|
totalSize = promptSize + contextSize
|
|
print(f"Prompt size: {promptSize} bytes, Context size: {contextSize} bytes, Total: {totalSize} bytes")
|
|
|
|
availableModels = modelRegistry.getAvailableModels()
|
|
failoverModelList = modelSelector.getFailoverModelList(
|
|
prompt=prompt,
|
|
context=context,
|
|
options=options,
|
|
availableModels=availableModels,
|
|
)
|
|
|
|
if not failoverModelList:
|
|
print("No suitable models found (capability filter returned empty list).")
|
|
return
|
|
|
|
print("Prioritized fallback model sequence (name | quality | speed | $/1k in | ctx | score):")
|
|
for idx, m in enumerate(failoverModelList, 1):
|
|
costIn = getattr(m, "costPer1kTokensInput", 0.0)
|
|
# Calculate detailed score breakdown
|
|
promptSize = len(prompt.encode("utf-8"))
|
|
contextSize = len(context.encode("utf-8"))
|
|
totalSize = promptSize + contextSize
|
|
|
|
# Get detailed scoring
|
|
sizeRating = modelSelector._getSizeRating(m, totalSize)
|
|
processingModeRating = modelSelector._getProcessingModeRating(m.processingMode, options.processingMode)
|
|
priorityRating = modelSelector._getPriorityRating(m, options.priority)
|
|
totalScore = sizeRating + processingModeRating + priorityRating
|
|
|
|
print(
|
|
f" {idx:>2}. {m.name} | Q={getattr(m, 'qualityRating', 0)} | S={getattr(m, 'speedRating', 0)} | ${costIn:.4f} | ctx={getattr(m, 'contextLength', 0)} | score={totalScore:.3f}"
|
|
)
|
|
print(f" Size: {sizeRating:.3f}, ProcessingMode: {processingModeRating:.3f}, Priority: {priorityRating:.3f}")
|
|
|
|
async def run(self) -> None:
|
|
# Scenarios reflecting workflows/
|
|
scenarios: List[Tuple[str, str, AiCallOptions]] = []
|
|
|
|
# Task planning (taskPlanner, modeActionplan)
|
|
scenarios.append(
|
|
(
|
|
"PLAN - Quality, Detailed",
|
|
"Task planning for a multi-step business workflow.",
|
|
AiCallOptions(
|
|
operationType=OperationTypeEnum.PLAN,
|
|
priority=PriorityEnum.QUALITY,
|
|
compressPrompt=False,
|
|
compressContext=False,
|
|
processingMode=ProcessingModeEnum.DETAILED,
|
|
maxCost=0.10,
|
|
maxProcessingTime=30,
|
|
),
|
|
)
|
|
)
|
|
|
|
# Result validation / analysis (modeActionplan)
|
|
scenarios.append(
|
|
(
|
|
"ANALYSE - Balanced, Advanced",
|
|
"Validate action plan correctness and completeness.",
|
|
AiCallOptions(
|
|
operationType=OperationTypeEnum.ANALYSE,
|
|
priority=PriorityEnum.BALANCED,
|
|
compressPrompt=True,
|
|
compressContext=False,
|
|
processingMode=ProcessingModeEnum.ADVANCED,
|
|
maxCost=0.05,
|
|
maxProcessingTime=30,
|
|
),
|
|
)
|
|
)
|
|
|
|
# React mode - action selection (modeReact)
|
|
scenarios.append(
|
|
(
|
|
"GENERAL - Balanced, Advanced (React: action selection)",
|
|
"Select next best action from context and state.",
|
|
AiCallOptions(
|
|
operationType=OperationTypeEnum.GENERAL,
|
|
priority=PriorityEnum.BALANCED,
|
|
compressPrompt=True,
|
|
compressContext=True,
|
|
processingMode=ProcessingModeEnum.ADVANCED,
|
|
maxCost=0.03,
|
|
maxProcessingTime=20,
|
|
),
|
|
)
|
|
)
|
|
|
|
# React mode - parameter suggestion (modeReact example)
|
|
scenarios.append(
|
|
(
|
|
"ANALYSE - Balanced, Advanced (React: parameter suggestion)",
|
|
"Suggest parameters for the selected action as JSON.",
|
|
AiCallOptions(
|
|
operationType=OperationTypeEnum.ANALYSE,
|
|
priority=PriorityEnum.BALANCED,
|
|
compressPrompt=True,
|
|
compressContext=False,
|
|
processingMode=ProcessingModeEnum.ADVANCED,
|
|
maxCost=0.05,
|
|
maxProcessingTime=30,
|
|
resultFormat="json",
|
|
temperature=0.3,
|
|
),
|
|
)
|
|
)
|
|
|
|
# Intent analysis (user input understanding)
|
|
scenarios.append(
|
|
(
|
|
"ANALYSE - Quality, Detailed (Intent Analysis)",
|
|
"Analyze user intent and extract key requirements from the following request: 'I need to create a comprehensive marketing strategy for our new product launch including budget allocation, timeline, and target audience analysis.'",
|
|
AiCallOptions(
|
|
operationType=OperationTypeEnum.ANALYSE,
|
|
priority=PriorityEnum.QUALITY,
|
|
compressPrompt=False,
|
|
compressContext=False,
|
|
processingMode=ProcessingModeEnum.DETAILED,
|
|
maxCost=0.08,
|
|
maxProcessingTime=45,
|
|
resultFormat="json",
|
|
temperature=0.2,
|
|
),
|
|
)
|
|
)
|
|
|
|
# Review/Validation (quality assurance)
|
|
scenarios.append(
|
|
(
|
|
"ANALYSE - Quality, Detailed (Review/Validation)",
|
|
"Review and validate the following business proposal for completeness, accuracy, and compliance with industry standards. Identify any gaps or areas for improvement.",
|
|
AiCallOptions(
|
|
operationType=OperationTypeEnum.ANALYSE,
|
|
priority=PriorityEnum.QUALITY,
|
|
compressPrompt=False,
|
|
compressContext=False,
|
|
processingMode=ProcessingModeEnum.DETAILED,
|
|
maxCost=0.10,
|
|
maxProcessingTime=60,
|
|
resultFormat="json",
|
|
temperature=0.1,
|
|
),
|
|
)
|
|
)
|
|
|
|
# Large context scenario (to test size-based scoring)
|
|
scenarios.append(
|
|
(
|
|
"GENERAL - Balanced, Advanced (Large Context Test)",
|
|
"Process this large document and provide a comprehensive summary.",
|
|
AiCallOptions(
|
|
operationType=OperationTypeEnum.GENERAL,
|
|
priority=PriorityEnum.BALANCED,
|
|
compressPrompt=False,
|
|
compressContext=False,
|
|
processingMode=ProcessingModeEnum.ADVANCED,
|
|
maxCost=0.15,
|
|
maxProcessingTime=120,
|
|
),
|
|
)
|
|
)
|
|
|
|
# Iterate and print lists
|
|
for title, prompt, options in scenarios:
|
|
await self._printFallbackList(title, prompt, options)
|
|
|
|
# Test with actual context to see size-based scoring
|
|
largeContext = """
|
|
This is a comprehensive business document containing detailed information about our company's strategic initiatives,
|
|
financial performance, market analysis, competitive landscape, operational metrics, customer feedback,
|
|
product development roadmap, technology stack, human resources, legal compliance, risk management,
|
|
sustainability efforts, and future growth plans. The document spans multiple sections including executive summary,
|
|
market research, financial statements, operational reports, customer insights, product specifications,
|
|
technology architecture, HR policies, legal frameworks, risk assessments, environmental impact studies,
|
|
and strategic recommendations. This extensive content is designed to test the model selection algorithm's
|
|
ability to handle large context sizes and make intelligent decisions about which models are best suited
|
|
for processing such substantial amounts of information while maintaining efficiency and cost-effectiveness.
|
|
""" * 10 # Repeat to make it even larger
|
|
|
|
await self._printFallbackListWithContext(
|
|
"GENERAL - Balanced, Advanced (Large Context Test)",
|
|
"Analyze this comprehensive business document and provide key insights.",
|
|
largeContext,
|
|
AiCallOptions(
|
|
operationType=OperationTypeEnum.GENERAL,
|
|
priority=PriorityEnum.BALANCED,
|
|
compressPrompt=False,
|
|
compressContext=False,
|
|
processingMode=ProcessingModeEnum.ADVANCED,
|
|
maxCost=0.15,
|
|
maxProcessingTime=120,
|
|
),
|
|
)
|
|
|
|
|
|
async def main() -> None:
|
|
tester = ModelSelectionTester()
|
|
await tester.initialize()
|
|
await tester.run()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(main())
|
|
|
|
|