gateway/modules/aicore/aicoreModelSelector.py

386 lines
16 KiB
Python

"""
Dynamic model selector using configurable rules and scoring.
"""
import logging
from typing import List, Optional, Dict, Any, Tuple
from modules.datamodels.datamodelAi import AiModel, AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum, ModelCapabilitiesEnum
from modules.aicore.aicoreModelSelectionConfig import model_selection_config
logger = logging.getLogger(__name__)
class ModelSelector:
"""Dynamic model selector using configurable rules."""
def __init__(self):
self.config = model_selection_config
def selectModel(self,
prompt: str,
context: str,
options: AiCallOptions,
availableModels: List[AiModel]) -> Optional[AiModel]:
"""
Select the best model based on configurable rules and scoring.
Args:
prompt: User prompt
context: Context data
options: AI call options
availableModels: List of available models to choose from
Returns:
Selected model or None if no suitable model found
"""
if not availableModels:
logger.warning("No models available for selection")
return None
logger.info(f"Selecting model for operation: {options.operationType}, priority: {options.priority}")
# Calculate input size
inputSize = len(prompt.encode("utf-8")) + len(context.encode("utf-8"))
# Get applicable rules
rules = self.config.getRulesForOperation(options.operationType)
logger.debug(f"Found {len(rules)} applicable rules for {options.operationType}")
# Score each model
scoredModels = []
for model in availableModels:
if not model.isAvailable:
continue
score = self._calculateModelScore(model, inputSize, options, rules)
if score > 0: # Only consider models with positive scores
scoredModels.append((model, score))
logger.debug(f"Model {model.name}: score={score:.2f}")
if not scoredModels:
logger.warning("No models passed the selection criteria, trying fallback criteria")
# Try fallback criteria
fallbackCriteria = self.getFallbackCriteria(options.operationType)
return self._selectWithFallbackCriteria(availableModels, fallbackCriteria, inputSize, options)
# Sort by score (highest first)
scoredModels.sort(key=lambda x: x[1], reverse=True)
selectedModel = scoredModels[0][0]
selectedScore = scoredModels[0][1]
logger.info(f"Selected model: {selectedModel.name} (score: {selectedScore:.2f})")
# Log selection details
self._logSelectionDetails(selectedModel, inputSize, options)
return selectedModel
def _calculateModelScore(self,
model: AiModel,
inputSize: int,
options: AiCallOptions,
rules: List) -> float:
"""Calculate score for a model based on rules and criteria."""
score = 0.0
# Check basic requirements
if not self._meetsBasicRequirements(model, inputSize, options):
return 0.0
# Apply rules
for rule in rules:
ruleScore = self._applyRule(model, inputSize, options, rule)
score += ruleScore * rule.weight
# Apply priority-based scoring
priorityScore = self._applyPriorityScoring(model, options)
score += priorityScore
# Apply processing mode scoring
modeScore = self._applyProcessingModeScoring(model, options)
score += modeScore
# Apply cost constraints
if not self._meetsCostConstraints(model, inputSize, options):
score *= 0.1 # Heavily penalize but don't eliminate
return max(0.0, score)
def _meetsBasicRequirements(self, model: AiModel, inputSize: int, options: AiCallOptions) -> bool:
"""Check if model meets basic requirements."""
# Context length check
if model.contextLength > 0 and inputSize > model.contextLength * 0.8:
logger.debug(f"Model {model.name} rejected: input too large ({inputSize} > {model.contextLength * 0.8})")
return False
# Required operation types check
if options.operationTypes:
if not all(opType in model.operationTypes for opType in options.operationTypes):
logger.debug(f"Model {model.name} rejected: missing required operation types")
return False
# Capabilities check
if options.capabilities:
if not all(cap in model.capabilities for cap in options.capabilities):
logger.debug(f"Model {model.name} rejected: missing required capabilities")
return False
# Avoid operation types check
for rule in self.config.getRulesForOperation(options.operationType):
if any(opType in model.operationTypes for opType in rule.avoidOperationTypes):
logger.debug(f"Model {model.name} rejected: has avoid operation types")
return False
return True
def _applyRule(self, model: AiModel, inputSize: int, options: AiCallOptions, rule) -> float:
"""Apply a specific rule to calculate score contribution."""
score = 0.0
# Required operation types match
if all(opType in model.operationTypes for opType in rule.operationTypes):
score += 1.0
# Preferred capabilities match
preferredMatches = sum(1 for cap in rule.preferredCapabilities if cap in model.capabilities)
if rule.preferredCapabilities:
score += (preferredMatches / len(rule.preferredCapabilities)) * 0.5
# Quality rating check
if rule.minQualityRating and model.qualityRating >= rule.minQualityRating:
score += 0.3
# Context length check
if rule.minContextLength and model.contextLength >= rule.minContextLength:
score += 0.2
return score
def _applyPriorityScoring(self, model: AiModel, options: AiCallOptions) -> float:
"""Apply priority-based scoring."""
if options.priority == PriorityEnum.SPEED:
return model.speedRating * 0.1
elif options.priority == PriorityEnum.QUALITY:
return model.qualityRating * 0.1
elif options.priority == PriorityEnum.COST:
# Lower cost = higher score
costScore = max(0, 1.0 - (model.costPer1kTokensInput * 1000))
return costScore * 0.1
else: # BALANCED
return (model.qualityRating + model.speedRating) * 0.05
def _applyProcessingModeScoring(self, model: AiModel, options: AiCallOptions) -> float:
"""Apply processing mode scoring."""
if options.processingMode == ProcessingModeEnum.DETAILED:
if model.priority == PriorityEnum.QUALITY:
return 0.2
elif options.processingMode == ProcessingModeEnum.BASIC:
if model.priority == PriorityEnum.SPEED:
return 0.2
return 0.0
def _meetsCostConstraints(self, model: AiModel, inputSize: int, options: AiCallOptions) -> bool:
"""Check if model meets cost constraints."""
if options.maxCost is None:
return True
# Estimate cost
estimatedTokens = inputSize / 4
estimatedCost = (estimatedTokens / 1000) * model.costPer1kTokensInput
return estimatedCost <= options.maxCost
def _logSelectionDetails(self, model: AiModel, inputSize: int, options: AiCallOptions):
"""Log detailed selection information."""
logger.info(f"Model Selection Details:")
logger.info(f" Selected: {model.displayName} ({model.name})")
logger.info(f" Connector: {model.connectorType}")
logger.info(f" Operation: {options.operationType}")
logger.info(f" Priority: {options.priority}")
logger.info(f" Processing Mode: {options.processingMode}")
logger.info(f" Input Size: {inputSize} bytes")
logger.info(f" Context Length: {model.contextLength}")
logger.info(f" Max Tokens: {model.maxTokens}")
logger.info(f" Quality Rating: {model.qualityRating}/10")
logger.info(f" Speed Rating: {model.speedRating}/10")
logger.info(f" Cost: ${model.costPer1kTokensInput:.4f}/1k tokens")
logger.info(f" Capabilities: {', '.join(model.capabilities)}")
logger.info(f" Priority: {model.priority}")
def getFallbackCriteria(self, operationType: str) -> Dict[str, Any]:
"""Get fallback selection criteria for an operation type."""
return self.config.getFallbackCriteria(operationType)
def _selectWithFallbackCriteria(self,
availableModels: List[AiModel],
fallbackCriteria: Dict[str, Any],
inputSize: int,
options: AiCallOptions) -> Optional[AiModel]:
"""Select model using fallback criteria when normal selection fails."""
logger.info("Using fallback criteria for model selection")
# Filter models by fallback criteria
candidates = []
for model in availableModels:
if not model.isAvailable:
continue
# Check required operation types
if fallbackCriteria.get("operationTypes"):
if not all(opType in model.operationTypes for opType in fallbackCriteria["operationTypes"]):
continue
# Check quality rating
if fallbackCriteria.get("minQualityRating"):
if model.qualityRating < fallbackCriteria["minQualityRating"]:
continue
# Check cost
if fallbackCriteria.get("maxCostPer1k"):
if model.costPer1kTokensInput > fallbackCriteria["maxCostPer1k"]:
continue
# Check context length
if model.contextLength > 0 and inputSize > model.contextLength * 0.8:
continue
candidates.append(model)
if not candidates:
logger.error("No models available even with fallback criteria")
return None
# Sort by priority order from fallback criteria
priorityOrder = fallbackCriteria.get("priorityOrder", ["quality", "speed", "cost"])
def _getPriorityScore(model: AiModel) -> float:
score = 0.0
for i, priority in enumerate(priorityOrder):
weight = len(priorityOrder) - i # Higher weight for earlier priorities
if priority == "quality":
score += model.qualityRating * weight
elif priority == "speed":
score += model.speedRating * weight
elif priority == "cost":
# Lower cost = higher score
score += (1.0 - model.costPer1kTokensInput * 1000) * weight
return score
candidates.sort(key=_getPriorityScore, reverse=True)
selectedModel = candidates[0]
logger.info(f"Fallback selection: {selectedModel.name} (score: {_getPriorityScore(selectedModel):.2f})")
return selectedModel
def getFallbackModels(self,
prompt: str,
context: str,
options: AiCallOptions,
availableModels: List[AiModel]) -> List[AiModel]:
"""
Get prioritized list of models for fallback sequence.
Steps:
1. Filter models by capability requirements
2. Rate models by business requirements (priority, processing mode)
3. Sort by rating (descending), then by cost (ascending)
Args:
prompt: User prompt
context: Context data
options: AI call options
availableModels: List of available models
Returns:
Prioritized list of models for fallback sequence
"""
if not availableModels:
logger.warning("No models available for fallback selection")
return []
logger.info(f"Building fallback sequence for operation: {options.operationType}, priority: {options.priority}")
# Step 1: Filter by capability requirements
capableModels = self._filterByCapabilities(availableModels, options)
logger.info(f"Step 1 - Capable models: {[m.name for m in capableModels]}")
if not capableModels:
logger.warning("No models meet capability requirements")
return []
# Step 2: Rate models by business requirements
ratedModels = self._rateModelsByBusinessRequirements(capableModels, prompt, context, options)
logger.info(f"Step 2 - Rated models: {[(m.name, rating) for m, rating in ratedModels]}")
# Step 3: Sort by rating (descending), then by cost (ascending)
sortedModels = self._sortModelsByRatingAndCost(ratedModels)
logger.info(f"Step 3 - Sorted fallback sequence: {[m.name for m in sortedModels]}")
return sortedModels
def _filterByCapabilities(self, models: List[AiModel], options: AiCallOptions) -> List[AiModel]:
"""Filter models by required capabilities."""
capableModels = []
for model in models:
if not model.isAvailable:
continue
# Check if model supports required capabilities
if options.capabilities:
if not all(cap in model.capabilities for cap in options.capabilities):
logger.debug(f"Model {model.name} missing required capabilities: {options.capabilities}")
continue
# Check operation type compatibility
if not self._meetsBasicRequirements(model, options):
logger.debug(f"Model {model.name} doesn't meet basic requirements")
continue
capableModels.append(model)
return capableModels
def _rateModelsByBusinessRequirements(self,
models: List[AiModel],
prompt: str,
context: str,
options: AiCallOptions) -> List[Tuple[AiModel, float]]:
"""Rate models based on business requirements (priority, processing mode)."""
ratedModels = []
inputSize = len(prompt.encode("utf-8")) + len(context.encode("utf-8"))
for model in models:
# Base score from model selection logic
baseScore = self._calculateModelScore(model, inputSize, options, [])
# Apply priority-based scoring
priorityScore = self._applyPriorityScoring(model, options)
# Apply processing mode scoring
processingScore = self._applyProcessingModeScoring(model, options)
# Combine scores
totalScore = baseScore + priorityScore + processingScore
ratedModels.append((model, totalScore))
logger.debug(f"Model {model.name}: base={baseScore:.2f}, priority={priorityScore:.2f}, processing={processingScore:.2f}, total={totalScore:.2f}")
return ratedModels
def _sortModelsByRatingAndCost(self, ratedModels: List[Tuple[AiModel, float]]) -> List[AiModel]:
"""Sort models by rating (descending), then by cost (ascending)."""
def sortKey(item):
model, rating = item
# Primary sort: rating (descending)
# Secondary sort: cost (ascending)
return (-rating, model.costPer1kTokensInput)
sortedItems = sorted(ratedModels, key=sortKey)
return [model for model, rating in sortedItems]
# Global selector instance
model_selector = ModelSelector()