ready for test revised dynamic ai aware chunking system

This commit is contained in:
ValueOn AG 2025-10-23 00:35:44 +02:00
parent 6b819cc848
commit 36947b6d7e
14 changed files with 1160 additions and 1014 deletions

View file

@ -48,7 +48,7 @@ class ModelRegistry:
try: try:
# Import the module # Import the module
module = importlib.import_module(f'modules.connectors.{moduleName}') module = importlib.import_module(f'modules.aicore.{moduleName}')
# Find connector classes (classes that inherit from BaseConnectorAi) # Find connector classes (classes that inherit from BaseConnectorAi)
for attrName in dir(module): for attrName in dir(module):

View file

@ -1,158 +0,0 @@
"""
Configuration for dynamic model selection rules.
This makes model selection configurable rather than hardcoded.
"""
from typing import Dict, List, Any
from modules.datamodels.datamodelAi import OperationTypeEnum, ModelCapabilitiesEnum, PriorityEnum, SelectionRule
class ModelSelectionConfig:
"""Configuration for model selection rules."""
def __init__(self):
self.rules = self._loadDefaultRules()
self.fallbackModels = self._loadFallbackModels()
def _loadDefaultRules(self) -> List[SelectionRule]:
"""Load default selection rules."""
return [
# High quality for planning and analysis
SelectionRule(
name="highQualityAnalysis",
condition="Planning or analysis operations requiring high quality",
weight=10.0,
operationTypes=[OperationTypeEnum.PLAN, OperationTypeEnum.ANALYSE],
priority=PriorityEnum.QUALITY,
capabilities=[ModelCapabilitiesEnum.TEXT_GENERATION, ModelCapabilitiesEnum.REASONING, ModelCapabilitiesEnum.ANALYSIS],
minQualityRating=8
),
# Fast processing for basic operations
SelectionRule(
name="fastBasicProcessing",
condition="Basic operations requiring speed",
weight=8.0,
operationTypes=[OperationTypeEnum.GENERAL],
priority=PriorityEnum.SPEED,
capabilities=[ModelCapabilitiesEnum.TEXT_GENERATION, ModelCapabilitiesEnum.CHAT],
minQualityRating=5
),
# Cost-effective for high-volume operations
SelectionRule(
name="costEffectiveProcessing",
condition="High-volume operations where cost matters",
weight=7.0,
operationTypes=[OperationTypeEnum.GENERAL, OperationTypeEnum.GENERATE],
priority=PriorityEnum.COST,
capabilities=[ModelCapabilitiesEnum.TEXT_GENERATION],
maxCost=0.01 # $0.01 per 1k tokens
),
# Image analysis specific
SelectionRule(
name="imageAnalyse",
condition="Image analysis operations",
weight=10.0,
operationTypes=[OperationTypeEnum.IMAGE_ANALYSE],
priority=PriorityEnum.QUALITY,
capabilities=[ModelCapabilitiesEnum.VISION, ModelCapabilitiesEnum.MULTIMODAL],
minQualityRating=8
),
# Web research specific
SelectionRule(
name="webResearch",
condition="Web research operations",
weight=9.0,
operationTypes=[OperationTypeEnum.WEB_RESEARCH],
priority=PriorityEnum.BALANCED,
capabilities=[ModelCapabilitiesEnum.TEXT_GENERATION, ModelCapabilitiesEnum.ANALYSIS, ModelCapabilitiesEnum.WEB_SEARCH],
minQualityRating=7
),
# Large context requirements
SelectionRule(
name="largeContext",
condition="Operations requiring large context",
weight=8.0,
operationTypes=[OperationTypeEnum.GENERAL, OperationTypeEnum.ANALYSE],
priority=PriorityEnum.BALANCED,
capabilities=[ModelCapabilitiesEnum.TEXT_GENERATION],
minContextLength=100000 # 100k tokens
)
]
def _loadFallbackModels(self) -> Dict[str, Dict[str, Any]]:
"""Load fallback model selection criteria."""
return {
OperationTypeEnum.GENERAL: {
"priorityOrder": ["speed", "quality", "cost"],
"operationTypes": [ModelCapabilitiesEnum.TEXT_GENERATION, ModelCapabilitiesEnum.CHAT],
"minQualityRating": 5,
"maxCostPer1k": 0.01
},
OperationTypeEnum.IMAGE_ANALYSE: {
"priorityOrder": ["quality", "speed"],
"operationTypes": [ModelCapabilitiesEnum.VISION, ModelCapabilitiesEnum.MULTIMODAL],
"minQualityRating": 8,
"maxCostPer1k": 0.1
},
OperationTypeEnum.IMAGE_GENERATE: {
"priorityOrder": ["quality", "speed"],
"operationTypes": [ModelCapabilitiesEnum.IMAGE_GENERATE, ModelCapabilitiesEnum.ART, ModelCapabilitiesEnum.VISUAL_CREATION],
"minQualityRating": 8,
"maxCostPer1k": 0.1
},
OperationTypeEnum.WEB_RESEARCH: {
"priorityOrder": ["quality", "speed", "cost"],
"operationTypes": [ModelCapabilitiesEnum.TEXT_GENERATION, ModelCapabilitiesEnum.ANALYSIS],
"preferredTags": [ModelCapabilitiesEnum.WEB_SEARCH],
"minQualityRating": 7,
"maxCostPer1k": 0.02
},
OperationTypeEnum.PLAN: {
"priorityOrder": ["quality", "speed"],
"operationTypes": [ModelCapabilitiesEnum.TEXT_GENERATION, ModelCapabilitiesEnum.REASONING, ModelCapabilitiesEnum.ANALYSIS],
"preferredTags": [PriorityEnum.QUALITY],
"minQualityRating": 8,
"maxCostPer1k": 0.1
},
OperationTypeEnum.ANALYSE: {
"priorityOrder": ["quality", "speed"],
"operationTypes": [ModelCapabilitiesEnum.TEXT_GENERATION, ModelCapabilitiesEnum.ANALYSIS, ModelCapabilitiesEnum.REASONING],
"preferredTags": [PriorityEnum.QUALITY],
"minQualityRating": 8,
"maxCostPer1k": 0.1
}
}
def getRulesForOperation(self, operationType: str) -> List[SelectionRule]:
"""Get rules that apply to a specific operation type."""
return [rule for rule in self.rules if operationType in rule.operationTypes]
def getFallbackCriteria(self, operationType: str) -> Dict[str, Any]:
"""Get fallback selection criteria for a specific operation type."""
return self.fallbackModels.get(operationType, self.fallbackModels[OperationTypeEnum.GENERAL])
def addRule(self, rule: SelectionRule):
"""Add a new selection rule."""
self.rules.append(rule)
def removeRule(self, ruleName: str):
"""Remove a selection rule by name."""
self.rules = [rule for rule in self.rules if rule.name != ruleName]
def updateRule(self, ruleName: str, **kwargs):
"""Update an existing rule."""
for rule in self.rules:
if rule.name == ruleName:
for key, value in kwargs.items():
if hasattr(rule, key):
setattr(rule, key, value)
break
# Global configuration instance
model_selection_config = ModelSelectionConfig()

View file

@ -1,20 +1,20 @@
""" """
Dynamic model selector using configurable rules and scoring. Simplified model selection based on model properties and priority-based sorting.
No complex rules needed - just filter by properties and sort by priority!
""" """
import logging import logging
from typing import List, Optional, Dict, Any, Tuple from typing import List, Dict, Any, Optional
from modules.datamodels.datamodelAi import AiModel, AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum, ModelCapabilitiesEnum from modules.datamodels.datamodelAi import AiModel, AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum
from modules.aicore.aicoreModelSelectionConfig import model_selection_config
# Configure logger
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
class ModelSelector: class ModelSelector:
"""Dynamic model selector using configurable rules.""" """Simple model selector based on properties and priority-based sorting."""
def __init__(self): def __init__(self):
self.config = model_selection_config logger.info("ModelSelector initialized with simplified approach")
def selectModel(self, def selectModel(self,
prompt: str, prompt: str,
@ -22,270 +22,7 @@ class ModelSelector:
options: AiCallOptions, options: AiCallOptions,
availableModels: List[AiModel]) -> Optional[AiModel]: availableModels: List[AiModel]) -> Optional[AiModel]:
""" """
Select the best model based on configurable rules and scoring. Select the best model using simple filtering and priority-based sorting.
Args:
prompt: User prompt
context: Context data
options: AI call options
availableModels: List of available models to choose from
Returns:
Selected model or None if no suitable model found
"""
if not availableModels:
logger.warning("No models available for selection")
return None
logger.info(f"Selecting model for operation: {options.operationType}, priority: {options.priority}")
# Calculate input size
inputSize = len(prompt.encode("utf-8")) + len(context.encode("utf-8"))
# Get applicable rules
rules = self.config.getRulesForOperation(options.operationType)
logger.debug(f"Found {len(rules)} applicable rules for {options.operationType}")
# Score each model
scoredModels = []
for model in availableModels:
if not model.isAvailable:
continue
score = self._calculateModelScore(model, inputSize, options, rules)
if score > 0: # Only consider models with positive scores
scoredModels.append((model, score))
logger.debug(f"Model {model.name}: score={score:.2f}")
if not scoredModels:
logger.warning("No models passed the selection criteria, trying fallback criteria")
# Try fallback criteria
fallbackCriteria = self.getFallbackCriteria(options.operationType)
return self._selectWithFallbackCriteria(availableModels, fallbackCriteria, inputSize, options)
# Sort by score (highest first)
scoredModels.sort(key=lambda x: x[1], reverse=True)
selectedModel = scoredModels[0][0]
selectedScore = scoredModels[0][1]
logger.info(f"Selected model: {selectedModel.name} (score: {selectedScore:.2f})")
# Log selection details
self._logSelectionDetails(selectedModel, inputSize, options)
return selectedModel
def _calculateModelScore(self,
model: AiModel,
inputSize: int,
options: AiCallOptions,
rules: List) -> float:
"""Calculate score for a model based on rules and criteria."""
score = 0.0
# Check basic requirements
if not self._meetsBasicRequirements(model, inputSize, options):
return 0.0
# Apply rules
for rule in rules:
ruleScore = self._applyRule(model, inputSize, options, rule)
score += ruleScore * rule.weight
# Apply priority-based scoring
priorityScore = self._applyPriorityScoring(model, options)
score += priorityScore
# Apply processing mode scoring
modeScore = self._applyProcessingModeScoring(model, options)
score += modeScore
# Apply cost constraints
if not self._meetsCostConstraints(model, inputSize, options):
score *= 0.1 # Heavily penalize but don't eliminate
return max(0.0, score)
def _meetsBasicRequirements(self, model: AiModel, inputSize: int, options: AiCallOptions) -> bool:
"""Check if model meets basic requirements."""
# Context length check
if model.contextLength > 0 and inputSize > model.contextLength * 0.8:
logger.debug(f"Model {model.name} rejected: input too large ({inputSize} > {model.contextLength * 0.8})")
return False
# Required operation types check
if options.operationTypes:
if not all(opType in model.operationTypes for opType in options.operationTypes):
logger.debug(f"Model {model.name} rejected: missing required operation types")
return False
# Capabilities check
if options.capabilities:
if not all(cap in model.capabilities for cap in options.capabilities):
logger.debug(f"Model {model.name} rejected: missing required capabilities")
return False
# Avoid operation types check
for rule in self.config.getRulesForOperation(options.operationType):
if any(opType in model.operationTypes for opType in rule.avoidOperationTypes):
logger.debug(f"Model {model.name} rejected: has avoid operation types")
return False
return True
def _applyRule(self, model: AiModel, inputSize: int, options: AiCallOptions, rule) -> float:
"""Apply a specific rule to calculate score contribution."""
score = 0.0
# Required operation types match
if all(opType in model.operationTypes for opType in rule.operationTypes):
score += 1.0
# Preferred capabilities match
preferredMatches = sum(1 for cap in rule.preferredCapabilities if cap in model.capabilities)
if rule.preferredCapabilities:
score += (preferredMatches / len(rule.preferredCapabilities)) * 0.5
# Quality rating check
if rule.minQualityRating and model.qualityRating >= rule.minQualityRating:
score += 0.3
# Context length check
if rule.minContextLength and model.contextLength >= rule.minContextLength:
score += 0.2
return score
def _applyPriorityScoring(self, model: AiModel, options: AiCallOptions) -> float:
"""Apply priority-based scoring."""
if options.priority == PriorityEnum.SPEED:
return model.speedRating * 0.1
elif options.priority == PriorityEnum.QUALITY:
return model.qualityRating * 0.1
elif options.priority == PriorityEnum.COST:
# Lower cost = higher score
costScore = max(0, 1.0 - (model.costPer1kTokensInput * 1000))
return costScore * 0.1
else: # BALANCED
return (model.qualityRating + model.speedRating) * 0.05
def _applyProcessingModeScoring(self, model: AiModel, options: AiCallOptions) -> float:
"""Apply processing mode scoring."""
if options.processingMode == ProcessingModeEnum.DETAILED:
if model.priority == PriorityEnum.QUALITY:
return 0.2
elif options.processingMode == ProcessingModeEnum.BASIC:
if model.priority == PriorityEnum.SPEED:
return 0.2
return 0.0
def _meetsCostConstraints(self, model: AiModel, inputSize: int, options: AiCallOptions) -> bool:
"""Check if model meets cost constraints."""
if options.maxCost is None:
return True
# Estimate cost
estimatedTokens = inputSize / 4
estimatedCost = (estimatedTokens / 1000) * model.costPer1kTokensInput
return estimatedCost <= options.maxCost
def _logSelectionDetails(self, model: AiModel, inputSize: int, options: AiCallOptions):
"""Log detailed selection information."""
logger.info(f"Model Selection Details:")
logger.info(f" Selected: {model.displayName} ({model.name})")
logger.info(f" Connector: {model.connectorType}")
logger.info(f" Operation: {options.operationType}")
logger.info(f" Priority: {options.priority}")
logger.info(f" Processing Mode: {options.processingMode}")
logger.info(f" Input Size: {inputSize} bytes")
logger.info(f" Context Length: {model.contextLength}")
logger.info(f" Max Tokens: {model.maxTokens}")
logger.info(f" Quality Rating: {model.qualityRating}/10")
logger.info(f" Speed Rating: {model.speedRating}/10")
logger.info(f" Cost: ${model.costPer1kTokensInput:.4f}/1k tokens")
logger.info(f" Capabilities: {', '.join(model.capabilities)}")
logger.info(f" Priority: {model.priority}")
def getFallbackCriteria(self, operationType: str) -> Dict[str, Any]:
"""Get fallback selection criteria for an operation type."""
return self.config.getFallbackCriteria(operationType)
def _selectWithFallbackCriteria(self,
availableModels: List[AiModel],
fallbackCriteria: Dict[str, Any],
inputSize: int,
options: AiCallOptions) -> Optional[AiModel]:
"""Select model using fallback criteria when normal selection fails."""
logger.info("Using fallback criteria for model selection")
# Filter models by fallback criteria
candidates = []
for model in availableModels:
if not model.isAvailable:
continue
# Check required operation types
if fallbackCriteria.get("operationTypes"):
if not all(opType in model.operationTypes for opType in fallbackCriteria["operationTypes"]):
continue
# Check quality rating
if fallbackCriteria.get("minQualityRating"):
if model.qualityRating < fallbackCriteria["minQualityRating"]:
continue
# Check cost
if fallbackCriteria.get("maxCostPer1k"):
if model.costPer1kTokensInput > fallbackCriteria["maxCostPer1k"]:
continue
# Check context length
if model.contextLength > 0 and inputSize > model.contextLength * 0.8:
continue
candidates.append(model)
if not candidates:
logger.error("No models available even with fallback criteria")
return None
# Sort by priority order from fallback criteria
priorityOrder = fallbackCriteria.get("priorityOrder", ["quality", "speed", "cost"])
def _getPriorityScore(model: AiModel) -> float:
score = 0.0
for i, priority in enumerate(priorityOrder):
weight = len(priorityOrder) - i # Higher weight for earlier priorities
if priority == "quality":
score += model.qualityRating * weight
elif priority == "speed":
score += model.speedRating * weight
elif priority == "cost":
# Lower cost = higher score
score += (1.0 - model.costPer1kTokensInput * 1000) * weight
return score
candidates.sort(key=_getPriorityScore, reverse=True)
selectedModel = candidates[0]
logger.info(f"Fallback selection: {selectedModel.name} (score: {_getPriorityScore(selectedModel):.2f})")
return selectedModel
def getFallbackModels(self,
prompt: str,
context: str,
options: AiCallOptions,
availableModels: List[AiModel]) -> List[AiModel]:
"""
Get prioritized list of models for fallback sequence.
Steps:
1. Filter models by capability requirements
2. Rate models by business requirements (priority, processing mode)
3. Sort by rating (descending), then by cost (ascending)
Args: Args:
prompt: User prompt prompt: User prompt
@ -294,93 +31,195 @@ class ModelSelector:
availableModels: List of available models availableModels: List of available models
Returns: Returns:
Prioritized list of models for fallback sequence Best model for the request, or None if no suitable model found
""" """
if not availableModels: try:
logger.warning("No models available for fallback selection") # Get failover models (which includes all filtering and sorting)
failoverModelList = self.getFailoverModelList(prompt, context, options, availableModels)
if not failoverModelList:
logger.warning("No suitable models found for the request")
return None
selectedModel = failoverModelList[0] # First model is the best one
logger.info(f"Selected model: {selectedModel.name} (quality: {selectedModel.qualityRating}, cost: ${selectedModel.costPer1kTokensInput:.4f})")
return selectedModel
except Exception as e:
logger.error(f"Error selecting model: {str(e)}")
return None
def getFailoverModelList(self,
prompt: str,
context: str,
options: AiCallOptions,
availableModels: List[AiModel]) -> List[AiModel]:
"""
Get prioritized list of models using scoring-based ranking.
Args:
prompt: User prompt
context: Context data
options: AI call options
availableModels: List of available models
Returns:
List of models sorted by score (descending)
"""
try:
promptSize = len(prompt.encode("utf-8"))
contextSize = len(context.encode("utf-8"))
totalSize = promptSize + contextSize
# Step 1: Filter by operation type (MUST match)
operationFiltered = [m for m in availableModels if options.operationType in m.operationTypes]
logger.debug(f"After operation type filtering: {len(operationFiltered)} models")
# Step 2: Filter by prompt size (MUST be <= 80% of context size)
promptFiltered = [m for m in operationFiltered if m.contextLength == 0 or promptSize <= m.contextLength * 0.8]
logger.debug(f"After prompt size filtering: {len(promptFiltered)} models")
# Step 3: Calculate scores for each model
scoredModels = []
for model in promptFiltered:
score = self._calculateModelScore(model, promptSize, contextSize, totalSize, options)
scoredModels.append((model, score))
logger.debug(f"Model {model.name}: score={score:.3f}")
# Step 4: Sort by score (descending)
scoredModels.sort(key=lambda x: x[1], reverse=True)
sortedModels = [model for model, score in scoredModels]
logger.debug(f"Final sorted models: {len(sortedModels)} models")
return sortedModels
except Exception as e:
logger.error(f"Error getting failover models: {str(e)}")
return [] return []
logger.info(f"Building fallback sequence for operation: {options.operationType}, priority: {options.priority}") def _calculateModelScore(self, model: AiModel, promptSize: int, contextSize: int, totalSize: int, options: AiCallOptions) -> float:
"""
Calculate a score for a model based on how well it fulfills the criteria.
# Step 1: Filter by capability requirements Args:
capableModels = self._filterByCapabilities(availableModels, options) model: The model to score
logger.info(f"Step 1 - Capable models: {[m.name for m in capableModels]}") promptSize: Size of the prompt in bytes
contextSize: Size of the context in bytes
if not capableModels: totalSize: Total size (prompt + context) in bytes
logger.warning("No models meet capability requirements") options: AI call options
return []
# Step 2: Rate models by business requirements
ratedModels = self._rateModelsByBusinessRequirements(capableModels, prompt, context, options)
logger.info(f"Step 2 - Rated models: {[(m.name, rating) for m, rating in ratedModels]}")
# Step 3: Sort by rating (descending), then by cost (ascending)
sortedModels = self._sortModelsByRatingAndCost(ratedModels)
logger.info(f"Step 3 - Sorted fallback sequence: {[m.name for m in sortedModels]}")
return sortedModels
def _filterByCapabilities(self, models: List[AiModel], options: AiCallOptions) -> List[AiModel]:
"""Filter models by required capabilities."""
capableModels = []
for model in models:
if not model.isAvailable:
continue
# Check if model supports required capabilities Returns:
if options.capabilities: Score for the model (higher is better)
if not all(cap in model.capabilities for cap in options.capabilities): """
logger.debug(f"Model {model.name} missing required capabilities: {options.capabilities}") score = 0.0
continue
# Check operation type compatibility
if not self._meetsBasicRequirements(model, options):
logger.debug(f"Model {model.name} doesn't meet basic requirements")
continue
capableModels.append(model)
return capableModels # 1. Prompt + Context size rating
if model.contextLength > 0:
def _rateModelsByBusinessRequirements(self, modelMaxSize = model.contextLength * 0.8 # 80% of model context length
models: List[AiModel], if totalSize <= modelMaxSize:
prompt: str, # Within limits: rating = (prompt+contextsize) / (80% modelsize)
context: str, score += totalSize / modelMaxSize
options: AiCallOptions) -> List[Tuple[AiModel, float]]: else:
"""Rate models based on business requirements (priority, processing mode).""" # Exceeds limits: rating = modelsize / (prompt+contextsize) (ensures minimum chunks)
ratedModels = [] score += modelMaxSize / totalSize
inputSize = len(prompt.encode("utf-8")) + len(context.encode("utf-8")) else:
# No context length limit
score += 1.0
for model in models: # 2. Processing Mode rating
# Base score from model selection logic if hasattr(options, 'processingMode') and options.processingMode:
baseScore = self._calculateModelScore(model, inputSize, options, []) score += self._getProcessingModeRating(model.processingMode, options.processingMode)
else:
# Apply priority-based scoring score += 1.0 # No preference
priorityScore = self._applyPriorityScoring(model, options)
# Apply processing mode scoring
processingScore = self._applyProcessingModeScoring(model, options)
# Combine scores
totalScore = baseScore + priorityScore + processingScore
ratedModels.append((model, totalScore))
logger.debug(f"Model {model.name}: base={baseScore:.2f}, priority={priorityScore:.2f}, processing={processingScore:.2f}, total={totalScore:.2f}")
return ratedModels # 3. Priority rating
if hasattr(options, 'priority') and options.priority:
def _sortModelsByRatingAndCost(self, ratedModels: List[Tuple[AiModel, float]]) -> List[AiModel]: score += self._getPriorityRating(model, options.priority)
"""Sort models by rating (descending), then by cost (ascending).""" else:
def sortKey(item): score += 1.0 # No preference
model, rating = item
# Primary sort: rating (descending)
# Secondary sort: cost (ascending)
return (-rating, model.costPer1kTokensInput)
sortedItems = sorted(ratedModels, key=sortKey) return score
return [model for model, rating in sortedItems]
def _getProcessingModeRating(self, modelMode: ProcessingModeEnum, requestedMode: ProcessingModeEnum) -> float:
"""Get processing mode rating based on compatibility."""
if modelMode == requestedMode:
return 1.0
# Compatibility matrix
if requestedMode == ProcessingModeEnum.BASIC:
if modelMode == ProcessingModeEnum.ADVANCED:
return 0.5
elif modelMode == ProcessingModeEnum.DETAILED:
return 0.2
elif requestedMode == ProcessingModeEnum.ADVANCED:
if modelMode == ProcessingModeEnum.BASIC:
return 0.2
elif modelMode == ProcessingModeEnum.DETAILED:
return 0.5
elif requestedMode == ProcessingModeEnum.DETAILED:
if modelMode == ProcessingModeEnum.BASIC:
return 0.2
elif modelMode == ProcessingModeEnum.ADVANCED:
return 0.5
return 0.0 # No compatibility
def _getPriorityRating(self, model: AiModel, requestedPriority: PriorityEnum) -> float:
"""Get priority rating based on model capabilities."""
if requestedPriority == PriorityEnum.BALANCED:
return 1.0
elif requestedPriority == PriorityEnum.SPEED:
return model.speedRating / 10.0
elif requestedPriority == PriorityEnum.QUALITY:
return model.qualityRating / 10.0
elif requestedPriority == PriorityEnum.COST:
# Cost priority: cost gives 1, speed gives 0.5, quality gives 0.2
# Lower cost is better, so we invert the cost rating
costRating = 1.0 - (model.costPer1kTokensInput / 0.1) # Normalize to 0-1
costRating = max(0, costRating) # Ensure non-negative
speedRating = model.speedRating / 10.0 * 0.5
qualityRating = model.qualityRating / 10.0 * 0.2
return costRating + speedRating + qualityRating
return 1.0 # Default
def _getSizeRating(self, model: AiModel, totalSize: int) -> float:
"""Get size rating for a model based on total input size."""
if model.contextLength > 0:
modelMaxSize = model.contextLength * 0.8 # 80% of model context length
if totalSize <= modelMaxSize:
# Within limits: rating = (prompt+contextsize) / (80% modelsize)
return totalSize / modelMaxSize
else:
# Exceeds limits: rating = modelsize / (prompt+contextsize) (ensures minimum chunks)
return modelMaxSize / totalSize
else:
# No context length limit
return 1.0
def _logModelDetails(self, model: AiModel):
"""Log detailed information about a model."""
logger.info(f"Model: {model.name}")
logger.info(f" Display Name: {model.displayName}")
logger.info(f" Connector: {model.connectorType}")
logger.info(f" Context Length: {model.contextLength}")
logger.info(f" Max Tokens: {model.maxTokens}")
logger.info(f" Quality Rating: {model.qualityRating}/10")
logger.info(f" Speed Rating: {model.speedRating}/10")
logger.info(f" Cost: ${model.costPer1kTokensInput:.4f}/1k tokens")
logger.info(f" Capabilities: {', '.join(model.capabilities)}")
logger.info(f" Priority: {model.priority}")
logger.info(f" Processing Mode: {model.processingMode}")
logger.info(f" Operation Types: {', '.join(model.operationTypes)}")
# Global selector instance # Global model selector instance
model_selector = ModelSelector() modelSelector = ModelSelector()

View file

@ -63,7 +63,7 @@ class AiAnthropic(BaseConnectorAi):
functionCall=self.callAiBasic, functionCall=self.callAiBasic,
priority=PriorityEnum.QUALITY, priority=PriorityEnum.QUALITY,
processingMode=ProcessingModeEnum.DETAILED, processingMode=ProcessingModeEnum.DETAILED,
operationTypes=[OperationTypeEnum.PLAN, OperationTypeEnum.ANALYSE], operationTypes=[OperationTypeEnum.GENERAL, OperationTypeEnum.PLAN, OperationTypeEnum.ANALYSE, OperationTypeEnum.GENERATE],
version="claude-3-5-sonnet-20241022", version="claude-3-5-sonnet-20241022",
calculatePriceUsd=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.015 + (bytesReceived / 4 / 1000) * 0.075 calculatePriceUsd=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.015 + (bytesReceived / 4 / 1000) * 0.075
), ),

View file

@ -34,7 +34,7 @@ class AiInternal(BaseConnectorAi):
functionCall=self.extractDocument, functionCall=self.extractDocument,
priority=PriorityEnum.COST, priority=PriorityEnum.COST,
processingMode=ProcessingModeEnum.BASIC, processingMode=ProcessingModeEnum.BASIC,
operationTypes=[OperationTypeEnum.GENERAL], operationTypes=[OperationTypeEnum.EXTRACT],
version="internal-extractor-v1", version="internal-extractor-v1",
calculatePriceUsd=lambda processingTime, bytesSent, bytesReceived: 0.001 + (bytesSent + bytesReceived) / (1024 * 1024) * 0.01 calculatePriceUsd=lambda processingTime, bytesSent, bytesReceived: 0.001 + (bytesSent + bytesReceived) / (1024 * 1024) * 0.01
), ),

View file

@ -65,7 +65,7 @@ class AiOpenai(BaseConnectorAi):
functionCall=self.callAiBasic, functionCall=self.callAiBasic,
priority=PriorityEnum.BALANCED, priority=PriorityEnum.BALANCED,
processingMode=ProcessingModeEnum.ADVANCED, processingMode=ProcessingModeEnum.ADVANCED,
operationTypes=[OperationTypeEnum.GENERAL, OperationTypeEnum.ANALYSE], operationTypes=[OperationTypeEnum.GENERAL, OperationTypeEnum.PLAN, OperationTypeEnum.ANALYSE, OperationTypeEnum.GENERATE],
version="gpt-4o", version="gpt-4o",
calculatePriceUsd=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.03 + (bytesReceived / 4 / 1000) * 0.06 calculatePriceUsd=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.03 + (bytesReceived / 4 / 1000) * 0.06
), ),
@ -83,7 +83,7 @@ class AiOpenai(BaseConnectorAi):
functionCall=self.callAiBasic, functionCall=self.callAiBasic,
priority=PriorityEnum.SPEED, priority=PriorityEnum.SPEED,
processingMode=ProcessingModeEnum.BASIC, processingMode=ProcessingModeEnum.BASIC,
operationTypes=[OperationTypeEnum.GENERAL], operationTypes=[OperationTypeEnum.GENERAL, OperationTypeEnum.PLAN, OperationTypeEnum.GENERATE],
version="gpt-3.5-turbo", version="gpt-3.5-turbo",
calculatePriceUsd=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.0015 + (bytesReceived / 4 / 1000) * 0.002 calculatePriceUsd=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.0015 + (bytesReceived / 4 / 1000) * 0.002
), ),

View file

@ -63,7 +63,7 @@ class AiPerplexity(BaseConnectorAi):
functionCall=self.callAiBasic, functionCall=self.callAiBasic,
priority=PriorityEnum.BALANCED, priority=PriorityEnum.BALANCED,
processingMode=ProcessingModeEnum.ADVANCED, processingMode=ProcessingModeEnum.ADVANCED,
operationTypes=[OperationTypeEnum.GENERAL, OperationTypeEnum.WEB_RESEARCH], operationTypes=[OperationTypeEnum.GENERAL, OperationTypeEnum.PLAN, OperationTypeEnum.ANALYSE, OperationTypeEnum.GENERATE, OperationTypeEnum.WEB_RESEARCH],
version="llama-3.1-sonar-large-128k-online", version="llama-3.1-sonar-large-128k-online",
calculatePriceUsd=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.005 + (bytesReceived / 4 / 1000) * 0.005 calculatePriceUsd=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.005 + (bytesReceived / 4 / 1000) * 0.005
), ),

View file

@ -1,13 +1,17 @@
from typing import Optional, List, Dict, Any, Literal, Callable from typing import Optional, List, Dict, Any, Literal, Callable, TYPE_CHECKING
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from enum import Enum from enum import Enum
if TYPE_CHECKING:
from modules.datamodels.datamodelExtraction import ContentPart
# Operation Types # Operation Types
class OperationTypeEnum(str, Enum): class OperationTypeEnum(str, Enum):
GENERAL = "general" GENERAL = "general"
PLAN = "plan" PLAN = "plan"
ANALYSE = "analyse" ANALYSE = "analyse"
GENERATE = "generate" GENERATE = "generate"
EXTRACT = "extract"
WEB_RESEARCH = "webResearch" WEB_RESEARCH = "webResearch"
IMAGE_ANALYSE = "imageAnalyse" IMAGE_ANALYSE = "imageAnalyse"
IMAGE_GENERATE = "imageGenerate" IMAGE_GENERATE = "imageGenerate"
@ -141,6 +145,7 @@ class AiCallRequest(BaseModel):
prompt: str = Field(description="The user prompt") prompt: str = Field(description="The user prompt")
context: Optional[str] = Field(default=None, description="Optional external context (e.g., extracted docs)") context: Optional[str] = Field(default=None, description="Optional external context (e.g., extracted docs)")
options: AiCallOptions = Field(default_factory=AiCallOptions) options: AiCallOptions = Field(default_factory=AiCallOptions)
contentParts: Optional[List['ContentPart']] = None # NEW: Content parts for model-aware chunking
class AiCallResponse(BaseModel): class AiCallResponse(BaseModel):

View file

@ -28,6 +28,16 @@ class ChunkResult(BaseModel):
metadata: Dict[str, Any] = Field(default_factory=dict) metadata: Dict[str, Any] = Field(default_factory=dict)
class PartResult(BaseModel):
"""Preserves the relationship between a content part and its AI result."""
originalPart: ContentPart
aiResult: str
partIndex: int
documentId: str
processingTime: float = 0.0
metadata: Dict[str, Any] = Field(default_factory=dict)
class MergeStrategy(BaseModel): class MergeStrategy(BaseModel):
"""Strategy configuration for merging content parts and AI results.""" """Strategy configuration for merging content parts and AI results."""

View file

@ -7,7 +7,7 @@ import time
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
from modules.aicore.aicoreModelRegistry import modelRegistry from modules.aicore.aicoreModelRegistry import modelRegistry
from modules.aicore.aicoreModelSelector import model_selector from modules.aicore.aicoreModelSelector import modelSelector
from modules.datamodels.datamodelAi import ( from modules.datamodels.datamodelAi import (
AiModel, AiModel,
AiCallOptions, AiCallOptions,
@ -70,7 +70,7 @@ class AiObjects:
raise ValueError("No AI models available") raise ValueError("No AI models available")
# Use the dynamic model selector # Use the dynamic model selector
selectedModel = model_selector.selectModel(prompt, context, options, availableModels) selectedModel = modelSelector.selectModel(prompt, context, options, availableModels)
if not selectedModel: if not selectedModel:
logger.error("No suitable model found for the given criteria") logger.error("No suitable model found for the given criteria")
@ -81,8 +81,15 @@ class AiObjects:
async def call(self, request: AiCallRequest) -> AiCallResponse: async def call(self, request: AiCallRequest) -> AiCallResponse:
"""Call AI model for text generation with fallback mechanism.""" """Call AI model for text generation with model-aware chunking."""
# Handle content parts (unified path)
if hasattr(request, 'contentParts') and request.contentParts:
return await self._callWithContentParts(request)
# Handle traditional text/context calls
return await self._callWithTextContext(request)
async def _callWithTextContext(self, request: AiCallRequest) -> AiCallResponse:
"""Call AI model for traditional text/context calls with fallback mechanism."""
prompt = request.prompt prompt = request.prompt
context = request.context or "" context = request.context or ""
options = request.options options = request.options
@ -108,11 +115,11 @@ class AiObjects:
temperature = 0.2 temperature = 0.2
maxTokens = getattr(options, "maxTokens", None) maxTokens = getattr(options, "maxTokens", None)
# Get fallback models for this operation type # Get failover models for this operation type
availableModels = modelRegistry.getAvailableModels() availableModels = modelRegistry.getAvailableModels()
fallbackModels = model_selector.getFallbackModels(prompt, context, options, availableModels) failoverModelList = modelSelector.getFailoverModelList(prompt, context, options, availableModels)
if not fallbackModels: if not failoverModelList:
errorMsg = f"No suitable models found for operation {options.operationType}" errorMsg = f"No suitable models found for operation {options.operationType}"
logger.error(errorMsg) logger.error(errorMsg)
return AiCallResponse( return AiCallResponse(
@ -125,11 +132,11 @@ class AiObjects:
errorCount=1 errorCount=1
) )
# Try each model in fallback sequence # Try each model in failover sequence
lastError = None lastError = None
for attempt, model in enumerate(fallbackModels): for attempt, model in enumerate(failoverModelList):
try: try:
logger.info(f"Attempting AI call with model: {model.name} (attempt {attempt + 1}/{len(fallbackModels)})") logger.info(f"Attempting AI call with model: {model.name} (attempt {attempt + 1}/{len(failoverModelList)})")
# Call the model # Call the model
response = await self._callWithModel(model, prompt, context, temperature, maxTokens, inputBytes) response = await self._callWithModel(model, prompt, context, temperature, maxTokens, inputBytes)
@ -142,15 +149,15 @@ class AiObjects:
logger.warning(f"❌ AI call failed with model {model.name}: {str(e)}") logger.warning(f"❌ AI call failed with model {model.name}: {str(e)}")
# If this is not the last model, try the next one # If this is not the last model, try the next one
if attempt < len(fallbackModels) - 1: if attempt < len(failoverModelList) - 1:
logger.info(f"🔄 Trying next fallback model...") logger.info(f"🔄 Trying next failover model...")
continue continue
else: else:
# All models failed # All models failed
logger.error(f"💥 All {len(fallbackModels)} models failed for operation {options.operationType}") logger.error(f"💥 All {len(failoverModelList)} models failed for operation {options.operationType}")
break break
# All fallback attempts failed - return error response # All failover attempts failed - return error response
errorMsg = f"All AI models failed for operation {options.operationType}. Last error: {str(lastError)}" errorMsg = f"All AI models failed for operation {options.operationType}. Last error: {str(lastError)}"
logger.error(errorMsg) logger.error(errorMsg)
return AiCallResponse( return AiCallResponse(
@ -163,6 +170,241 @@ class AiObjects:
errorCount=1 errorCount=1
) )
async def _callWithContentParts(self, request: AiCallRequest) -> AiCallResponse:
"""Process content parts with model-aware chunking (unified for single and multiple parts)."""
prompt = request.prompt
options = request.options
contentParts = request.contentParts
# Get failover models
availableModels = modelRegistry.getAvailableModels()
failoverModelList = modelSelector.getFailoverModelList(prompt, "", options, availableModels)
if not failoverModelList:
return self._createErrorResponse("No suitable models found", 0, 0)
# Process each content part
allResults = []
for contentPart in contentParts:
partResult = await self._processContentPartWithFallback(contentPart, prompt, options, failoverModelList)
allResults.append(partResult)
# Merge all results
mergedContent = self._mergePartResults(allResults)
return AiCallResponse(
content=mergedContent,
modelName="multiple",
priceUsd=sum(r.priceUsd for r in allResults),
processingTime=sum(r.processingTime for r in allResults),
bytesSent=sum(r.bytesSent for r in allResults),
bytesReceived=sum(r.bytesReceived for r in allResults),
errorCount=sum(r.errorCount for r in allResults)
)
async def _processContentPartWithFallback(self, contentPart, prompt: str, options, failoverModelList) -> AiCallResponse:
"""Process a single content part with model-aware chunking and fallback."""
lastError = None
for attempt, model in enumerate(failoverModelList):
try:
logger.info(f"Processing content part with model: {model.name} (attempt {attempt + 1}/{len(failoverModelList)})")
# Check if part fits in model context
partSize = len(contentPart.data.encode('utf-8')) if contentPart.data else 0
modelContextBytes = model.contextLength * 4 # Convert tokens to bytes
if partSize <= modelContextBytes:
# Part fits - call AI directly
response = await self._callWithModel(model, prompt, contentPart.data, 0.2, None, partSize)
logger.info(f"✅ Content part processed successfully with model: {model.name}")
return response
else:
# Part too large - chunk it
chunks = await self._chunkContentPart(contentPart, model, options)
if not chunks:
raise ValueError(f"Failed to chunk content part for model {model.name}")
# Process each chunk
chunkResults = []
for chunk in chunks:
chunkResponse = await self._callWithModel(model, prompt, chunk['data'], 0.2, None, chunk['size'])
chunkResults.append(chunkResponse)
# Merge chunk results
mergedContent = self._mergeChunkResults(chunkResults)
totalPrice = sum(r.priceUsd for r in chunkResults)
totalTime = sum(r.processingTime for r in chunkResults)
totalBytesSent = sum(r.bytesSent for r in chunkResults)
totalBytesReceived = sum(r.bytesReceived for r in chunkResults)
totalErrors = sum(r.errorCount for r in chunkResults)
logger.info(f"✅ Content part chunked and processed with model: {model.name} ({len(chunks)} chunks)")
return AiCallResponse(
content=mergedContent,
modelName=model.name,
priceUsd=totalPrice,
processingTime=totalTime,
bytesSent=totalBytesSent,
bytesReceived=totalBytesReceived,
errorCount=totalErrors
)
except Exception as e:
lastError = e
logger.warning(f"❌ Model {model.name} failed for content part: {str(e)}")
if attempt < len(failoverModelList) - 1:
logger.info(f"🔄 Trying next failover model...")
continue
else:
logger.error(f"💥 All {len(failoverModelList)} models failed for content part")
break
# All models failed
return self._createErrorResponse(f"All models failed: {str(lastError)}", 0, 0)
async def _chunkContentPart(self, contentPart, model, options) -> List[Dict[str, Any]]:
"""Chunk a content part based on model capabilities."""
# Calculate model-specific chunk sizes
modelContextBytes = model.contextLength * 4 # Convert tokens to bytes
maxContextBytes = int(modelContextBytes * 0.9) # 90% of context length
textChunkSize = int(maxContextBytes * 0.7) # 70% of max context for text chunks
imageChunkSize = int(maxContextBytes * 0.8) # 80% of max context for image chunks
# Build chunking options
chunkingOptions = {
"textChunkSize": textChunkSize,
"imageChunkSize": imageChunkSize,
"maxSize": maxContextBytes,
"chunkAllowed": True
}
# Get appropriate chunker
from modules.services.serviceExtraction.subRegistry import ChunkerRegistry
chunkerRegistry = ChunkerRegistry()
chunker = chunkerRegistry.resolve(contentPart.typeGroup)
if not chunker:
logger.warning(f"No chunker found for typeGroup: {contentPart.typeGroup}")
return []
# Chunk the content part
try:
chunks = chunker.chunk(contentPart, chunkingOptions)
logger.debug(f"Created {len(chunks)} chunks for {contentPart.typeGroup} part")
return chunks
except Exception as e:
logger.error(f"Chunking failed for {contentPart.typeGroup}: {str(e)}")
return []
def _mergePartResults(self, partResults: List[AiCallResponse]) -> str:
"""Merge part results using the existing sophisticated merging system."""
if not partResults:
return ""
# Convert AiCallResponse results to ContentParts for merging
from modules.datamodels.datamodelExtraction import ContentPart
from modules.services.serviceExtraction.subUtils import makeId
content_parts = []
for i, result in enumerate(partResults):
if result.content:
content_part = ContentPart(
id=makeId(),
parentId=None,
label=f"ai_result_{i}",
typeGroup="text", # Default to text for AI results
mimeType="text/plain",
data=result.content,
metadata={
"aiResult": True,
"modelName": result.modelName,
"priceUsd": result.priceUsd,
"processingTime": result.processingTime,
"bytesSent": result.bytesSent,
"bytesReceived": result.bytesReceived
}
)
content_parts.append(content_part)
# Use existing merging system
merge_strategy = {
"useIntelligentMerging": True,
"groupBy": "typeGroup",
"orderBy": "id",
"mergeType": "concatenate"
}
from modules.services.serviceExtraction.subPipeline import _applyMerging
merged_parts = _applyMerging(content_parts, merge_strategy)
# Convert merged parts back to final string
final_content = "\n\n".join([part.data for part in merged_parts])
logger.info(f"Merged {len(partResults)} AI results using existing merging system")
return final_content.strip()
def _mergeChunkResults(self, chunkResults: List[AiCallResponse]) -> str:
"""Merge chunk results using the existing sophisticated merging system."""
if not chunkResults:
return ""
# Convert AiCallResponse results to ContentParts for merging
from modules.datamodels.datamodelExtraction import ContentPart
from modules.services.serviceExtraction.subUtils import makeId
content_parts = []
for i, result in enumerate(chunkResults):
if result.content:
content_part = ContentPart(
id=makeId(),
parentId=None,
label=f"chunk_result_{i}",
typeGroup="text", # Default to text for AI results
mimeType="text/plain",
data=result.content,
metadata={
"aiResult": True,
"chunk": True,
"modelName": result.modelName,
"priceUsd": result.priceUsd,
"processingTime": result.processingTime,
"bytesSent": result.bytesSent,
"bytesReceived": result.bytesReceived
}
)
content_parts.append(content_part)
# Use existing merging system
merge_strategy = {
"useIntelligentMerging": True,
"groupBy": "typeGroup",
"orderBy": "id",
"mergeType": "concatenate"
}
from modules.services.serviceExtraction.subPipeline import _applyMerging
merged_parts = _applyMerging(content_parts, merge_strategy)
# Convert merged parts back to final string
final_content = "\n\n".join([part.data for part in merged_parts])
logger.info(f"Merged {len(chunkResults)} chunk results using existing merging system")
return final_content.strip()
def _createErrorResponse(self, errorMsg: str, inputBytes: int, outputBytes: int) -> AiCallResponse:
"""Create an error response."""
return AiCallResponse(
content=errorMsg,
modelName="error",
priceUsd=0.0,
processingTime=0.0,
bytesSent=inputBytes,
bytesReceived=outputBytes,
errorCount=1
)
async def _callWithModel(self, model: AiModel, prompt: str, context: str, temperature: float, maxTokens: int, inputBytes: int) -> AiCallResponse: async def _callWithModel(self, model: AiModel, prompt: str, context: str, temperature: float, maxTokens: int, inputBytes: int) -> AiCallResponse:
"""Call a specific model and return the response.""" """Call a specific model and return the response."""
# Replace <TOKEN_LIMIT> placeholder in prompt for this specific model # Replace <TOKEN_LIMIT> placeholder in prompt for this specific model
@ -245,9 +487,9 @@ class AiObjects:
# Get fallback models for image analysis # Get fallback models for image analysis
availableModels = modelRegistry.getAvailableModels() availableModels = modelRegistry.getAvailableModels()
fallbackModels = model_selector.getFallbackModels(prompt, "", options, availableModels) failoverModelList = modelSelector.getFailoverModelList(prompt, "", options, availableModels)
if not fallbackModels: if not failoverModelList:
errorMsg = f"No suitable models found for image analysis" errorMsg = f"No suitable models found for image analysis"
logger.error(errorMsg) logger.error(errorMsg)
return AiCallResponse( return AiCallResponse(
@ -262,9 +504,9 @@ class AiObjects:
# Try each model in fallback sequence # Try each model in fallback sequence
lastError = None lastError = None
for attempt, model in enumerate(fallbackModels): for attempt, model in enumerate(failoverModelList):
try: try:
logger.info(f"Attempting image analysis with model: {model.name} (attempt {attempt + 1}/{len(fallbackModels)})") logger.info(f"Attempting image analysis with model: {model.name} (attempt {attempt + 1}/{len(failoverModelList)})")
# Call the model # Call the model
response = await self._callImageWithModel(model, prompt, imageData, mimeType, inputBytes) response = await self._callImageWithModel(model, prompt, imageData, mimeType, inputBytes)
@ -277,12 +519,12 @@ class AiObjects:
logger.warning(f"❌ Image analysis failed with model {model.name}: {str(e)}") logger.warning(f"❌ Image analysis failed with model {model.name}: {str(e)}")
# If this is not the last model, try the next one # If this is not the last model, try the next one
if attempt < len(fallbackModels) - 1: if attempt < len(failoverModelList) - 1:
logger.info(f"🔄 Trying next fallback model for image analysis...") logger.info(f"🔄 Trying next fallback model for image analysis...")
continue continue
else: else:
# All models failed # All models failed
logger.error(f"💥 All {len(fallbackModels)} models failed for image analysis") logger.error(f"💥 All {len(failoverModelList)} models failed for image analysis")
break break
# All fallback attempts failed - return error response # All fallback attempts failed - return error response

View file

@ -54,8 +54,8 @@ class SubDocumentProcessing:
options: Optional[AiCallOptions] = None options: Optional[AiCallOptions] = None
) -> str: ) -> str:
""" """
Process documents with per-chunk AI calls and merge results. Process documents with model-aware chunking and merge results.
FIXED: Now preserves chunk relationships and document structure. NEW: Uses model-aware chunking in AI call phase instead of extraction phase.
Args: Args:
documents: List of ChatDocument objects to process documents: List of ChatDocument objects to process
@ -68,23 +68,14 @@ class SubDocumentProcessing:
if not documents: if not documents:
return "" return ""
# Get model capabilities for size calculation # Build extraction options WITHOUT chunking parameters
model_capabilities = self._getModelCapabilitiesForContent(prompt, documents, options)
# Build extraction options for chunking with intelligent merging
extractionOptions: Dict[str, Any] = { extractionOptions: Dict[str, Any] = {
"prompt": prompt, "prompt": prompt,
"operationType": options.operationType if options else "general", "operationType": options.operationType if options else "general",
"processDocumentsIndividually": True, # Process each document separately "processDocumentsIndividually": True,
"maxSize": model_capabilities["maxContextBytes"], # REMOVED: maxSize, textChunkSize, imageChunkSize
"chunkAllowed": True,
"textChunkSize": model_capabilities["textChunkSize"],
"imageChunkSize": model_capabilities["imageChunkSize"],
"imageMaxPixels": 1024 * 1024,
"imageQuality": 85,
"mergeStrategy": { "mergeStrategy": {
"useIntelligentMerging": True, # Enable intelligent token-aware merging "useIntelligentMerging": True,
"capabilities": model_capabilities,
"prompt": prompt, "prompt": prompt,
"groupBy": "typeGroup", "groupBy": "typeGroup",
"orderBy": "id", "orderBy": "id",
@ -95,17 +86,17 @@ class SubDocumentProcessing:
logger.debug(f"Per-chunk extraction options: prompt length={len(extractionOptions.get('prompt', ''))} chars, operationType={extractionOptions.get('operationType')}") logger.debug(f"Per-chunk extraction options: prompt length={len(extractionOptions.get('prompt', ''))} chars, operationType={extractionOptions.get('operationType')}")
try: try:
# Extract content with chunking # Extract content WITHOUT chunking
extractionResult = self.extractionService.extractContent(documents, extractionOptions) extractionResult = self.extractionService.extractContent(documents, extractionOptions)
if not isinstance(extractionResult, list): if not isinstance(extractionResult, list):
return "[Error: No extraction results]" return "[Error: No extraction results]"
# FIXED: Process chunks with proper mapping # Process parts (not chunks) with model-aware AI calls
chunkResults = await self._processChunksWithMapping(extractionResult, prompt, options) partResults = await self._processPartsWithMapping(extractionResult, prompt, options)
# FIXED: Merge with preserved chunk relationships # Merge results using existing merging system
mergedContent = self._mergeChunkResults(chunkResults, options) mergedContent = self._mergePartResults(partResults, options)
# Save merged extraction content to debug # Save merged extraction content to debug
self.services.utils.writeDebugFile(mergedContent or '', "extractionMergedText") self.services.utils.writeDebugFile(mergedContent or '', "extractionMergedText")
@ -123,29 +114,19 @@ class SubDocumentProcessing:
options: Optional[AiCallOptions] = None options: Optional[AiCallOptions] = None
) -> Dict[str, Any]: ) -> Dict[str, Any]:
""" """
Process documents with per-chunk AI calls and merge results in JSON mode. Process documents with model-aware chunking and merge results in JSON mode.
Returns structured JSON document instead of text. Returns structured JSON document instead of text.
""" """
if not documents: if not documents:
return {"metadata": {"title": "Empty Document"}, "sections": []} return {"metadata": {"title": "Empty Document"}, "sections": []}
# Get model capabilities for size calculation # Build extraction options WITHOUT chunking parameters
model_capabilities = self._getModelCapabilitiesForContent(prompt, documents, options)
# Build extraction options for chunking with intelligent merging
extractionOptions: Dict[str, Any] = { extractionOptions: Dict[str, Any] = {
"prompt": prompt, "prompt": prompt,
"operationType": options.operationType if options else "general", "operationType": options.operationType if options else "general",
"processDocumentsIndividually": True, # Process each document separately "processDocumentsIndividually": True,
"maxSize": model_capabilities["maxContextBytes"],
"chunkAllowed": True,
"textChunkSize": model_capabilities["textChunkSize"],
"imageChunkSize": model_capabilities["imageChunkSize"],
"imageMaxPixels": 1024 * 1024,
"imageQuality": 85,
"mergeStrategy": { "mergeStrategy": {
"useIntelligentMerging": True, # Enable intelligent token-aware merging "useIntelligentMerging": True,
"capabilities": model_capabilities,
"prompt": prompt, "prompt": prompt,
"groupBy": "typeGroup", "groupBy": "typeGroup",
"orderBy": "id", "orderBy": "id",
@ -156,17 +137,17 @@ class SubDocumentProcessing:
logger.debug(f"Per-chunk extraction options (JSON mode): prompt length={len(extractionOptions.get('prompt', ''))} chars, operationType={extractionOptions.get('operationType')}") logger.debug(f"Per-chunk extraction options (JSON mode): prompt length={len(extractionOptions.get('prompt', ''))} chars, operationType={extractionOptions.get('operationType')}")
try: try:
# Extract content with chunking # Extract content WITHOUT chunking
extractionResult = self.extractionService.extractContent(documents, extractionOptions) extractionResult = self.extractionService.extractContent(documents, extractionOptions)
if not isinstance(extractionResult, list): if not isinstance(extractionResult, list):
return {"metadata": {"title": "Error Document"}, "sections": []} return {"metadata": {"title": "Error Document"}, "sections": []}
# Process chunks with proper mapping # Process parts with model-aware chunking
chunkResults = await self._processChunksWithMapping(extractionResult, prompt, options, generate_json=True) partResults = await self._processPartsWithMapping(extractionResult, prompt, options)
# Merge with JSON mode # Convert to JSON format (simplified for now)
mergedJsonDocument = self._mergeChunkResultsJson(chunkResults, options) mergedJsonDocument = self._convertPartResultsToJson(partResults, options)
# Normalize merged JSON into a single canonical table (only if table content exists) # Normalize merged JSON into a single canonical table (only if table content exists)
try: try:
@ -505,6 +486,127 @@ CONTINUATION INSTRUCTIONS:
""" """
return await self.processDocumentsPerChunk(documents, prompt, options) return await self.processDocumentsPerChunk(documents, prompt, options)
async def _processPartsWithMapping(
self,
extractionResult: List[ContentExtracted],
prompt: str,
options: Optional[AiCallOptions] = None
) -> List['PartResult']:
"""Process content parts with model-aware chunking and proper mapping."""
from modules.datamodels.datamodelExtraction import PartResult
import asyncio
# Collect all parts that need processing
parts_to_process = []
part_index = 0
for ec in extractionResult:
for part in ec.parts:
if part.typeGroup in ("text", "table", "structure", "image", "container", "binary"):
# Skip empty container parts
if part.typeGroup == "container" and (not part.data or len(part.data.strip()) == 0):
logger.debug(f"Skipping empty container part: mimeType={part.mimeType}")
continue
parts_to_process.append({
'part': part,
'part_index': part_index,
'document_id': ec.id
})
part_index += 1
logger.info(f"Processing {len(parts_to_process)} parts with model-aware chunking")
# Process parts in parallel
async def process_single_part(part_info: Dict) -> PartResult:
part = part_info['part']
part_index = part_info['part_index']
document_id = part_info['document_id']
start_time = time.time()
try:
# Create AI call request with content part
from modules.datamodels.datamodelAi import AiCallRequest
request = AiCallRequest(
prompt=prompt,
context="", # Context is in the content part
options=options,
contentParts=[part] # Pass as list for unified processing
)
# Call AI with model-aware chunking
response = await self.aiObjects.call(request)
processing_time = time.time() - start_time
return PartResult(
originalPart=part,
aiResult=response.content,
partIndex=part_index,
documentId=document_id,
processingTime=processing_time,
metadata={
"success": True,
"partSize": len(part.data) if part.data else 0,
"resultSize": len(response.content),
"typeGroup": part.typeGroup,
"modelName": response.modelName,
"priceUsd": response.priceUsd
}
)
except Exception as e:
processing_time = time.time() - start_time
logger.warning(f"Error processing part {part_index}: {str(e)}")
return PartResult(
originalPart=part,
aiResult=f"[Error processing part: {str(e)}]",
partIndex=part_index,
documentId=document_id,
processingTime=processing_time,
metadata={
"success": False,
"error": str(e),
"partSize": len(part.data) if part.data else 0,
"typeGroup": part.typeGroup
}
)
# Process parts with concurrency control
max_concurrent = 5
if options and hasattr(options, 'maxConcurrentParts'):
max_concurrent = options.maxConcurrentParts
semaphore = asyncio.Semaphore(max_concurrent)
async def process_with_semaphore(part_info):
async with semaphore:
return await process_single_part(part_info)
tasks = [process_with_semaphore(part_info) for part_info in parts_to_process]
part_results = await asyncio.gather(*tasks, return_exceptions=True)
# Handle exceptions
processed_results = []
for i, result in enumerate(part_results):
if isinstance(result, Exception):
part_info = parts_to_process[i]
processed_results.append(PartResult(
originalPart=part_info['part'],
aiResult=f"[Error in parallel processing: {str(result)}]",
partIndex=part_info['part_index'],
documentId=part_info['document_id'],
processingTime=0.0,
metadata={"success": False, "error": str(result)}
))
elif result is not None:
processed_results.append(result)
logger.info(f"Completed processing {len(processed_results)} parts")
return processed_results
async def _processChunksWithMapping( async def _processChunksWithMapping(
self, self,
extractionResult: List[ContentExtracted], extractionResult: List[ContentExtracted],
@ -907,340 +1009,451 @@ CONTINUATION INSTRUCTIONS:
logger.info(f"Completed processing {len(processed_results)} chunks") logger.info(f"Completed processing {len(processed_results)} chunks")
return processed_results return processed_results
def _mergePartResults(
self,
partResults: List['PartResult'],
options: Optional[AiCallOptions] = None
) -> str:
"""Merge part results using existing sophisticated merging system."""
if not partResults:
return ""
# Convert PartResults back to ContentParts for existing merger system
from modules.datamodels.datamodelExtraction import ContentPart
content_parts = []
for part_result in partResults:
# Create ContentPart from PartResult with proper typeGroup
content_part = ContentPart(
id=part_result.originalPart.id,
parentId=part_result.originalPart.parentId,
label=part_result.originalPart.label,
typeGroup=part_result.originalPart.typeGroup, # Use original typeGroup
mimeType=part_result.originalPart.mimeType,
data=part_result.aiResult, # Use AI result as data
metadata={
**part_result.originalPart.metadata,
"aiResult": True,
"partIndex": part_result.partIndex,
"documentId": part_result.documentId,
"processingTime": part_result.processingTime,
"success": part_result.metadata.get("success", False)
}
)
content_parts.append(content_part)
# Use existing merging strategy from options
merge_strategy = {
"useIntelligentMerging": True,
"groupBy": "documentId", # Group by document
"orderBy": "partIndex", # Order by part index
"mergeType": "concatenate"
}
if options and hasattr(options, 'mergeStrategy'):
merge_strategy.update(options.mergeStrategy)
# Apply existing merging logic using the sophisticated merging system
from modules.services.serviceExtraction.subPipeline import _applyMerging
merged_parts = _applyMerging(content_parts, merge_strategy)
# Convert merged parts back to final string
final_content = "\n\n".join([part.data for part in merged_parts])
logger.info(f"Merged {len(partResults)} parts using existing sophisticated merging system")
return final_content.strip()
def _convertPartResultsToJson(
self,
partResults: List['PartResult'],
options: Optional[AiCallOptions] = None
) -> Dict[str, Any]:
"""Convert part results to JSON format using existing sophisticated merging system."""
if not partResults:
return {"metadata": {"title": "Empty Document"}, "sections": []}
# Convert PartResults back to ContentParts for existing merger system
from modules.datamodels.datamodelExtraction import ContentPart
content_parts = []
for part_result in partResults:
# Create ContentPart from PartResult with proper typeGroup
content_part = ContentPart(
id=part_result.originalPart.id,
parentId=part_result.originalPart.parentId,
label=part_result.originalPart.label,
typeGroup=part_result.originalPart.typeGroup, # Use original typeGroup
mimeType=part_result.originalPart.mimeType,
data=part_result.aiResult, # Use AI result as data
metadata={
**part_result.originalPart.metadata,
"aiResult": True,
"partIndex": part_result.partIndex,
"documentId": part_result.documentId,
"processingTime": part_result.processingTime,
"success": part_result.metadata.get("success", False)
}
)
content_parts.append(content_part)
# Use existing merging strategy for JSON mode
merge_strategy = {
"useIntelligentMerging": True,
"groupBy": "documentId", # Group by document
"orderBy": "partIndex", # Order by part index
"mergeType": "concatenate"
}
if options and hasattr(options, 'mergeStrategy'):
merge_strategy.update(options.mergeStrategy)
# Apply existing merging logic using the sophisticated merging system
from modules.services.serviceExtraction.subPipeline import _applyMerging
merged_parts = _applyMerging(content_parts, merge_strategy)
# Convert merged parts to JSON format
all_sections = []
document_titles = []
for part in merged_parts:
if part.metadata.get("success", False):
try:
# Parse JSON from AI result
part_json = json.loads(part.data)
# Check if this is a multi-file response (has "documents" key)
if isinstance(part_json, dict) and "documents" in part_json:
# This is a multi-file response - merge all documents
logger.debug(f"Processing multi-file response from part {part.id} with {len(part_json['documents'])} documents")
# Return multi-file response directly
return {
"metadata": part_json.get("metadata", {"title": "Merged Document"}),
"documents": part_json["documents"]
}
# Extract sections from single-file response
elif isinstance(part_json, dict) and "sections" in part_json:
for section in part_json["sections"]:
# Add part context to section
section["metadata"] = section.get("metadata", {})
section["metadata"]["source_part"] = part.id
section["metadata"]["source_document"] = part.metadata.get("documentId", "unknown")
section["metadata"]["part_index"] = part.metadata.get("partIndex", 0)
all_sections.append(section)
# Extract document title
if isinstance(part_json, dict) and "metadata" in part_json:
title = part_json["metadata"].get("title", "")
if title and title not in document_titles:
document_titles.append(title)
except json.JSONDecodeError as e:
logger.warning(f"Failed to parse JSON from part {part.id}: {str(e)}")
# Create a fallback section for invalid JSON
fallback_section = {
"id": f"error_section_{part.id}",
"title": "Error Section",
"content_type": "paragraph",
"elements": [{
"text": f"Error parsing part {part.id}: {str(e)}"
}],
"order": part.metadata.get("partIndex", 0),
"metadata": {
"source_document": part.metadata.get("documentId", "unknown"),
"part_id": part.id,
"error": str(e)
}
}
all_sections.append(fallback_section)
else:
# Handle error parts
error_section = {
"id": f"error_section_{part.id}",
"title": "Error Section",
"content_type": "paragraph",
"elements": [{
"text": f"Error in part {part.id}: {part.metadata.get('error', 'Unknown error')}"
}],
"order": part.metadata.get("partIndex", 0),
"metadata": {
"source_document": part.metadata.get("documentId", "unknown"),
"part_id": part.id,
"error": part.metadata.get('error', 'Unknown error')
}
}
all_sections.append(error_section)
# Sort sections by order
all_sections.sort(key=lambda x: x.get("order", 0))
# Create merged document with sections
merged_document = {
"metadata": {
"title": document_titles[0] if document_titles else "Merged Document",
"extraction_method": "model_aware_chunking_with_merging",
"version": "2.0"
},
"sections": all_sections,
"summary": f"Merged document using sophisticated merging system",
"tags": ["merged", "ai_generated", "model_aware", "sophisticated_merging"]
}
logger.info(f"Converted {len(partResults)} parts to JSON format using existing sophisticated merging system")
return merged_document
def _mergeChunkResults( def _mergeChunkResults(
self, self,
chunkResults: List[ChunkResult], chunkResults: List[ChunkResult],
options: Optional[AiCallOptions] = None options: Optional[AiCallOptions] = None
) -> str: ) -> str:
"""Merge chunk results while preserving document structure and chunk order.""" """Merge chunk results using existing sophisticated merging system."""
if not chunkResults: if not chunkResults:
return "" return ""
# Get merging configuration from options # Convert ChunkResults back to ContentParts for existing merger system
chunk_separator = "\n\n---\n\n" from modules.datamodels.datamodelExtraction import ContentPart
include_document_headers = True content_parts = []
include_chunk_metadata = False
if options:
if hasattr(options, 'chunkSeparator'):
chunk_separator = options.chunkSeparator
elif hasattr(options, 'mergeStrategy') and options.mergeStrategy:
chunk_separator = options.mergeStrategy.get("chunkSeparator", "\n\n---\n\n")
# Check for enhanced options
if hasattr(options, 'preserveChunkMetadata'):
include_chunk_metadata = options.preserveChunkMetadata
# Group chunk results by document
results_by_document = {}
for chunk_result in chunkResults: for chunk_result in chunkResults:
doc_id = chunk_result.documentId # Create ContentPart from ChunkResult with proper typeGroup
if doc_id not in results_by_document: content_part = ContentPart(
results_by_document[doc_id] = [] id=chunk_result.originalChunk.id,
results_by_document[doc_id].append(chunk_result) parentId=chunk_result.originalChunk.parentId,
label=chunk_result.originalChunk.label,
typeGroup=chunk_result.originalChunk.typeGroup, # Use original typeGroup
mimeType=chunk_result.originalChunk.mimeType,
data=chunk_result.aiResult, # Use AI result as data
metadata={
**chunk_result.originalChunk.metadata,
"aiResult": True,
"chunk": True,
"chunkIndex": chunk_result.chunkIndex,
"documentId": chunk_result.documentId,
"processingTime": chunk_result.processingTime,
"success": chunk_result.metadata.get("success", False)
}
)
content_parts.append(content_part)
# Sort chunks within each document by chunk index # Use existing merging strategy from options
for doc_id in results_by_document: merge_strategy = {
results_by_document[doc_id].sort(key=lambda x: x.chunkIndex) "useIntelligentMerging": True,
"groupBy": "documentId", # Group by document
"orderBy": "chunkIndex", # Order by chunk index
"mergeType": "concatenate"
}
# Merge results for each document if options and hasattr(options, 'mergeStrategy'):
merged_documents = [] merge_strategy.update(options.mergeStrategy)
for doc_id, doc_chunks in results_by_document.items(): # Apply existing merging logic using the sophisticated merging system
# Build document header if enabled from modules.services.serviceExtraction.subPipeline import _applyMerging
doc_header = "" merged_parts = _applyMerging(content_parts, merge_strategy)
if include_document_headers:
doc_header = f"\n\n=== DOCUMENT: {doc_id} ===\n\n"
# Merge chunks for this document
doc_content = ""
for i, chunk_result in enumerate(doc_chunks):
# Add chunk separator (except for first chunk)
if i > 0:
doc_content += chunk_separator
# Add chunk content with optional metadata
chunk_metadata = chunk_result.metadata
if chunk_metadata.get("success", False):
chunk_content = chunk_result.aiResult
# Add chunk metadata if enabled
if include_chunk_metadata:
chunk_info = f"[Chunk {chunk_result.chunkIndex} - {chunk_metadata.get('typeGroup', 'unknown')} - {chunk_metadata.get('chunkSize', 0)} chars]"
chunk_content = f"{chunk_info}\n{chunk_content}"
doc_content += chunk_content
else:
# Handle error chunks
error_msg = f"[ERROR in chunk {chunk_result.chunkIndex}: {chunk_metadata.get('error', 'Unknown error')}]"
doc_content += error_msg
merged_documents.append(doc_header + doc_content)
# Join all documents # Convert merged parts back to final string
final_result = "\n\n".join(merged_documents) final_content = "\n\n".join([part.data for part in merged_parts])
logger.info(f"Merged {len(chunkResults)} chunks from {len(results_by_document)} documents") logger.info(f"Merged {len(chunkResults)} chunks using existing sophisticated merging system")
return final_result.strip() return final_content.strip()
def _mergeChunkResultsClean( def _mergeChunkResultsClean(
self, self,
chunkResults: List[ChunkResult], chunkResults: List[ChunkResult],
options: Optional[AiCallOptions] = None options: Optional[AiCallOptions] = None
) -> str: ) -> str:
"""Merge chunk results in CLEAN mode - no debug metadata or document headers.""" """Merge chunk results in CLEAN mode using existing sophisticated merging system."""
if not chunkResults: if not chunkResults:
return "" return ""
# Get merging configuration from options # Convert ChunkResults back to ContentParts for existing merger system
chunk_separator = "\n\n" from modules.datamodels.datamodelExtraction import ContentPart
include_document_headers = False # CLEAN MODE: No document headers content_parts = []
include_chunk_metadata = False # CLEAN MODE: No chunk metadata
if options:
if hasattr(options, 'chunkSeparator'):
chunk_separator = options.chunkSeparator
elif hasattr(options, 'mergeStrategy') and options.mergeStrategy:
chunk_separator = options.mergeStrategy.get("chunkSeparator", "\n\n")
# Group chunk results by document
results_by_document = {}
for chunk_result in chunkResults: for chunk_result in chunkResults:
doc_id = chunk_result.documentId # Skip empty or error chunks in clean mode
if doc_id not in results_by_document: if not chunk_result.metadata.get("success", False):
results_by_document[doc_id] = [] continue
results_by_document[doc_id].append(chunk_result) if not chunk_result.aiResult or not chunk_result.aiResult.strip():
continue
# Sort chunks within each document by chunk index # Skip container/binary chunks in clean mode
for doc_id in results_by_document: if chunk_result.aiResult.startswith("[Skipped ") and "content:" in chunk_result.aiResult:
results_by_document[doc_id].sort(key=lambda x: x.chunkIndex) continue
# Merge results for each document in CLEAN mode
merged_documents = []
for doc_id, doc_chunks in results_by_document.items():
# CLEAN MODE: No document headers
doc_header = ""
# Merge chunks for this document # Create ContentPart from ChunkResult with proper typeGroup
doc_content = "" content_part = ContentPart(
for i, chunk_result in enumerate(doc_chunks): id=chunk_result.originalChunk.id,
# Add chunk separator (except for first chunk) parentId=chunk_result.originalChunk.parentId,
if i > 0: label=chunk_result.originalChunk.label,
doc_content += chunk_separator typeGroup=chunk_result.originalChunk.typeGroup, # Use original typeGroup
mimeType=chunk_result.originalChunk.mimeType,
# Add chunk content without metadata data=chunk_result.aiResult, # Use AI result as data
chunk_metadata = chunk_result.metadata metadata={
if chunk_metadata.get("success", False): **chunk_result.originalChunk.metadata,
chunk_content = chunk_result.aiResult "aiResult": True,
"chunk": True,
# CLEAN MODE: Skip container/binary chunks entirely "chunkIndex": chunk_result.chunkIndex,
if chunk_content.startswith("[Skipped ") and "content:" in chunk_content: "documentId": chunk_result.documentId,
continue # Skip container/binary chunks in clean mode "processingTime": chunk_result.processingTime,
"success": chunk_result.metadata.get("success", False)
# CLEAN MODE: Skip empty or whitespace-only chunks }
if not chunk_content.strip(): )
continue # Skip empty chunks in clean mode content_parts.append(content_part)
# CLEAN MODE: No chunk metadata
doc_content += chunk_content
else:
# Handle error chunks silently in clean mode
continue
merged_documents.append(doc_header + doc_content)
# Join all documents # Use existing merging strategy for clean mode
final_result = "\n\n".join(merged_documents) merge_strategy = {
"useIntelligentMerging": True,
"groupBy": "documentId", # Group by document
"orderBy": "chunkIndex", # Order by chunk index
"mergeType": "concatenate"
}
return final_result.strip() if options and hasattr(options, 'mergeStrategy'):
merge_strategy.update(options.mergeStrategy)
# Apply existing merging logic using the sophisticated merging system
from modules.services.serviceExtraction.subPipeline import _applyMerging
merged_parts = _applyMerging(content_parts, merge_strategy)
# Convert merged parts back to final string
final_content = "\n\n".join([part.data for part in merged_parts])
logger.info(f"Merged {len(content_parts)} chunks in clean mode using existing sophisticated merging system")
return final_content.strip()
def _mergeChunkResultsJson( def _mergeChunkResultsJson(
self, self,
chunkResults: List[ChunkResult], chunkResults: List[ChunkResult],
options: Optional[AiCallOptions] = None options: Optional[AiCallOptions] = None
) -> Dict[str, Any]: ) -> Dict[str, Any]:
"""Merge chunk results in JSON mode - returns structured JSON document.""" """Merge chunk results in JSON mode using existing sophisticated merging system."""
if not chunkResults: if not chunkResults:
return {"metadata": {"title": "Empty Document"}, "sections": []} return {"metadata": {"title": "Empty Document"}, "sections": []}
# Group chunk results by document # Convert ChunkResults back to ContentParts for existing merger system
results_by_document = {} from modules.datamodels.datamodelExtraction import ContentPart
content_parts = []
for chunk_result in chunkResults: for chunk_result in chunkResults:
doc_id = chunk_result.documentId # Create ContentPart from ChunkResult with proper typeGroup
if doc_id not in results_by_document: content_part = ContentPart(
results_by_document[doc_id] = [] id=chunk_result.originalChunk.id,
results_by_document[doc_id].append(chunk_result) parentId=chunk_result.originalChunk.parentId,
label=chunk_result.originalChunk.label,
typeGroup=chunk_result.originalChunk.typeGroup, # Use original typeGroup
mimeType=chunk_result.originalChunk.mimeType,
data=chunk_result.aiResult, # Use AI result as data
metadata={
**chunk_result.originalChunk.metadata,
"aiResult": True,
"chunk": True,
"chunkIndex": chunk_result.chunkIndex,
"documentId": chunk_result.documentId,
"processingTime": chunk_result.processingTime,
"success": chunk_result.metadata.get("success", False)
}
)
content_parts.append(content_part)
# Sort chunks within each document by chunk index # Use existing merging strategy for JSON mode
for doc_id in results_by_document: merge_strategy = {
results_by_document[doc_id].sort(key=lambda x: x.chunkIndex) "useIntelligentMerging": True,
"groupBy": "documentId", # Group by document
"orderBy": "chunkIndex", # Order by chunk index
"mergeType": "concatenate"
}
# Merge JSON results for each document if options and hasattr(options, 'mergeStrategy'):
all_documents = [] merge_strategy.update(options.mergeStrategy)
# Apply existing merging logic using the sophisticated merging system
from modules.services.serviceExtraction.subPipeline import _applyMerging
merged_parts = _applyMerging(content_parts, merge_strategy)
# Convert merged parts to JSON format
all_sections = [] all_sections = []
document_titles = [] document_titles = []
combined_metadata = {"title": "Merged Document", "splitStrategy": "by_section"}
for doc_id, doc_chunks in results_by_document.items(): for part in merged_parts:
# Process each chunk's JSON result if part.metadata.get("success", False):
for chunk_result in doc_chunks: try:
chunk_metadata = chunk_result.metadata # Parse JSON from AI result
if chunk_metadata.get("success", False): chunk_json = json.loads(part.data)
try:
# Parse JSON from AI result # Check if this is a multi-file response (has "documents" key)
chunk_json = json.loads(chunk_result.aiResult) if isinstance(chunk_json, dict) and "documents" in chunk_json:
# This is a multi-file response - merge all documents
logger.debug(f"Processing multi-file response from part {part.id} with {len(chunk_json['documents'])} documents")
# Check if this is a multi-file response (has "documents" key) # Return multi-file response directly
if isinstance(chunk_json, dict) and "documents" in chunk_json: return {
# This is a multi-file response - merge all documents "metadata": chunk_json.get("metadata", {"title": "Merged Document"}),
logger.debug(f"Processing multi-file response from chunk {chunk_result.chunkIndex} with {len(chunk_json['documents'])} documents") "documents": chunk_json["documents"]
# Add all documents from this chunk
for doc in chunk_json["documents"]:
# Add chunk context to document
doc["metadata"] = doc.get("metadata", {})
doc["metadata"]["source_chunk"] = chunk_result.chunkIndex
doc["metadata"]["source_document"] = doc_id
all_documents.append(doc)
# Update combined metadata
if "metadata" in chunk_json:
combined_metadata.update(chunk_json["metadata"])
# Extract sections from single-file response (fallback)
elif isinstance(chunk_json, dict) and "sections" in chunk_json:
for section in chunk_json["sections"]:
# Add document context to section
section["metadata"] = section.get("metadata", {})
section["metadata"]["source_document"] = doc_id
section["metadata"]["chunk_index"] = chunk_result.chunkIndex
all_sections.append(section)
# Extract document title
if isinstance(chunk_json, dict) and "metadata" in chunk_json:
title = chunk_json["metadata"].get("title", "")
if title and title not in document_titles:
document_titles.append(title)
except json.JSONDecodeError as e:
logger.warning(f"Failed to parse JSON from chunk {chunk_result.chunkIndex}: {str(e)}")
# Create a fallback section for invalid JSON
fallback_section = {
"id": f"error_section_{chunk_result.chunkIndex}",
"title": "Error Section",
"content_type": "paragraph",
"elements": [{
"text": f"Error parsing chunk {chunk_result.chunkIndex}: {str(e)}"
}],
"order": chunk_result.chunkIndex,
"metadata": {
"source_document": doc_id,
"chunk_index": chunk_result.chunkIndex,
"error": str(e)
}
} }
all_sections.append(fallback_section)
else: # Extract sections from single-file response
# Handle error chunks elif isinstance(chunk_json, dict) and "sections" in chunk_json:
error_section = { for section in chunk_json["sections"]:
"id": f"error_section_{chunk_result.chunkIndex}", # Add part context to section
section["metadata"] = section.get("metadata", {})
section["metadata"]["source_part"] = part.id
section["metadata"]["source_document"] = part.metadata.get("documentId", "unknown")
section["metadata"]["chunk_index"] = part.metadata.get("chunkIndex", 0)
all_sections.append(section)
# Extract document title
if isinstance(chunk_json, dict) and "metadata" in chunk_json:
title = chunk_json["metadata"].get("title", "")
if title and title not in document_titles:
document_titles.append(title)
except json.JSONDecodeError as e:
logger.warning(f"Failed to parse JSON from part {part.id}: {str(e)}")
# Create a fallback section for invalid JSON
fallback_section = {
"id": f"error_section_{part.id}",
"title": "Error Section", "title": "Error Section",
"content_type": "paragraph", "content_type": "paragraph",
"elements": [{ "elements": [{
"text": f"Error in chunk {chunk_result.chunkIndex}: {chunk_metadata.get('error', 'Unknown error')}" "text": f"Error parsing part {part.id}: {str(e)}"
}], }],
"order": chunk_result.chunkIndex, "order": part.metadata.get("chunkIndex", 0),
"metadata": { "metadata": {
"source_document": doc_id, "source_document": part.metadata.get("documentId", "unknown"),
"chunk_index": chunk_result.chunkIndex, "part_id": part.id,
"error": chunk_metadata.get('error', 'Unknown error') "error": str(e)
} }
} }
all_sections.append(error_section) all_sections.append(fallback_section)
else:
# Handle error parts
error_section = {
"id": f"error_section_{part.id}",
"title": "Error Section",
"content_type": "paragraph",
"elements": [{
"text": f"Error in part {part.id}: {part.metadata.get('error', 'Unknown error')}"
}],
"order": part.metadata.get("chunkIndex", 0),
"metadata": {
"source_document": part.metadata.get("documentId", "unknown"),
"part_id": part.id,
"error": part.metadata.get('error', 'Unknown error')
}
}
all_sections.append(error_section)
# Sort sections by order # Sort sections by order
all_sections.sort(key=lambda x: x.get("order", 0)) all_sections.sort(key=lambda x: x.get("order", 0))
# If we have merged documents from multi-file responses, return them # Create merged document with sections
if all_documents:
logger.info(f"Merged {len(all_documents)} documents from {len(chunkResults)} chunks")
return {
"metadata": combined_metadata,
"documents": all_documents
}
# Otherwise, create merged document with sections (single-file fallback)
merged_document = { merged_document = {
"metadata": { "metadata": {
"title": document_titles[0] if document_titles else "Merged Document", "title": document_titles[0] if document_titles else "Merged Document",
"source_documents": list(results_by_document.keys()), "extraction_method": "ai_json_extraction_with_merging",
"extraction_method": "ai_json_extraction", "version": "2.0"
"version": "1.0"
}, },
"sections": all_sections, "sections": all_sections,
"summary": f"Merged document from {len(results_by_document)} source documents", "summary": f"Merged document using sophisticated merging system",
"tags": ["merged", "ai_generated"] "tags": ["merged", "ai_generated", "sophisticated_merging"]
} }
logger.info(f"Merged {len(chunkResults)} chunks from {len(results_by_document)} documents (JSON mode)") logger.info(f"Merged {len(chunkResults)} chunks using existing sophisticated merging system (JSON mode)")
return merged_document return merged_document
def _getModelCapabilitiesForContent(self, prompt: str, documents: Optional[List[ChatDocument]], options: AiCallOptions) -> Dict[str, int]: # REMOVED: _getModelCapabilitiesForContent method - no longer needed with model-aware chunking
"""
Get model capabilities for content processing, including appropriate size limits for chunking.
Uses centralized model selection to determine chunking parameters.
"""
# Estimate total content size
prompt_size = len(prompt.encode('utf-8'))
document_size = 0
if documents:
# Rough estimate of document content size
for doc in documents:
document_size += doc.fileSize or 0
total_size = prompt_size + document_size
# Use centralized model selection to get the best model for chunking parameters
try:
from modules.aicore.aicoreModelRegistry import modelRegistry
from modules.aicore.aicoreModelSelector import model_selector
# Get available models and select the best one for this operation
availableModels = modelRegistry.getAvailableModels()
selectedModel = model_selector.selectModel(prompt, "", options, availableModels)
if selectedModel:
context_length = selectedModel.contextLength
model_name = selectedModel.name
logger.debug(f"Selected model for chunking: {model_name} with context length: {context_length}")
else:
# Fallback to conservative default if no model selected
context_length = 128000 # GPT-4o default
model_name = "fallback"
logger.warning(f"No model selected for chunking, using fallback context length: {context_length}")
except Exception as e:
# Fallback to conservative default if model selection fails
context_length = 128000 # GPT-4o default
model_name = "fallback"
logger.error(f"Model selection failed for chunking: {e}, using fallback context length: {context_length}")
# Calculate appropriate sizes
# Convert tokens to bytes (rough estimate: 1 token ≈ 4 characters)
context_length_bytes = int(context_length * 4)
max_context_bytes = int(context_length_bytes * 0.9) # 90% of context length
text_chunk_size = int(max_context_bytes * 0.7) # 70% of max context for text chunks
image_chunk_size = int(max_context_bytes * 0.8) # 80% of max context for image chunks
logger.debug(f"Content size: {total_size} bytes, Max context: {max_context_bytes} bytes")
logger.debug(f"Text chunk size: {text_chunk_size} bytes, Image chunk size: {image_chunk_size} bytes")
return {
"maxContextBytes": max_context_bytes,
"textChunkSize": text_chunk_size,
"imageChunkSize": image_chunk_size
}

View file

@ -73,139 +73,18 @@ def runExtraction(extractorRegistry: ExtractorRegistry, chunkerRegistry: Chunker
parts = extractor.extract(documentBytes, {"fileName": fileName, "mimeType": mimeType, "options": options}) parts = extractor.extract(documentBytes, {"fileName": fileName, "mimeType": mimeType, "options": options})
# Apply chunking and size limiting # REMOVED: poolAndLimit(parts, chunkerRegistry, options)
parts = poolAndLimit(parts, chunkerRegistry, options) # REMOVED: Chunking logic - now handled in AI call phase
# Optional merge step - but preserve chunks # Apply merging strategy if provided (preserve existing logic)
mergeStrategy = options.get("mergeStrategy", {}) mergeStrategy = options.get("mergeStrategy", {})
if mergeStrategy: if mergeStrategy:
parts = _applyMerging(parts, mergeStrategy)
# Don't merge chunks - they should stay separate for processing
non_chunk_parts = [p for p in parts if not p.metadata.get("chunk", False)]
chunk_parts = [p for p in parts if p.metadata.get("chunk", False)]
logger.debug(f"runExtraction: Preserving {len(chunk_parts)} chunks from merging")
logger.debug(f"runExtraction - non_chunk_parts: {len(non_chunk_parts)}, chunk_parts: {len(chunk_parts)}")
# Apply intelligent merging for small text parts
if non_chunk_parts:
# Count text parts
text_parts = [p for p in non_chunk_parts if p.typeGroup == "text"]
if len(text_parts) > 5: # If we have many small text parts, merge them
logger.info(f"🔧 Merging {len(text_parts)} small text parts for efficiency")
non_chunk_parts = _mergeParts(non_chunk_parts, mergeStrategy)
# Combine non-chunk parts with chunk parts (chunks stay separate)
parts = non_chunk_parts + chunk_parts
logger.debug(f"runExtraction: Final parts after merging: {len(parts)} (chunks: {len(chunk_parts)})")
logger.debug(f"runExtraction - Final parts: {len(parts)} (chunks: {len(chunk_parts)})")
# Timestamp-only extraction debug dumps removed
return ContentExtracted(id=makeId(), parts=parts) return ContentExtracted(id=makeId(), parts=parts)
def poolAndLimit(parts: List[ContentPart], chunkerRegistry: ChunkerRegistry, options: Dict[str, Any]) -> List[ContentPart]: # REMOVED: poolAndLimit function - chunking now handled in AI call phase
maxSize = int(options.get("maxSize", 0) or 0)
chunkAllowed = bool(options.get("chunkAllowed", False))
mergeStrategy = options.get("mergeStrategy", {})
if maxSize <= 0:
# Still apply merging if strategy provided
if mergeStrategy:
return _applyMerging(parts, mergeStrategy)
return parts
# First, try to fit within size limit
current = 0
kept: List[ContentPart] = []
remaining: List[ContentPart] = []
logger.debug(f"Starting poolAndLimit with {len(parts)} parts, maxSize={maxSize}")
for i, p in enumerate(parts):
size = int(p.metadata.get("size", 0) or 0)
# Show first 50 characters of text content for debugging
content_preview = p.data[:50].replace('\n', '\\n') if p.data else ""
logger.debug(f"Part {i}: {p.typeGroup} - {size} bytes - '{content_preview}...' (current: {current})")
if current + size <= maxSize:
kept.append(p)
current += size
logger.debug(f"Part {i} kept (total: {current})")
else:
remaining.append(p)
logger.debug(f"Part {i} moved to remaining")
logger.debug(f"Kept: {len(kept)}, Remaining: {len(remaining)}")
# If we have remaining parts and chunking is allowed, try chunking
if remaining and chunkAllowed:
logger.debug(f"=== CHUNKING ACTIVATED ===")
logger.debug(f"Remaining parts to chunk: {len(remaining)}")
logger.debug(f"Max size limit: {maxSize} bytes")
logger.debug(f"Current size used: {current} bytes")
logger.debug(f"Chunking {len(remaining)} remaining parts")
for p in remaining:
if p.typeGroup in ("text", "table", "structure", "image", "container", "binary"):
logger.debug(f"Chunking {p.typeGroup} part: {len(p.data)} chars")
logger.debug(f"Chunking {p.typeGroup} part with {len(p.data)} chars")
chunks = chunkerRegistry.resolve(p.typeGroup).chunk(p, options)
logger.debug(f"Created {len(chunks)} chunks")
logger.debug(f"Created {len(chunks)} chunks")
chunks_added = 0
for ch in chunks:
chSize = int(ch.get("size", 0) or 0)
# Add all chunks - don't limit by maxSize since they'll be processed separately
kept.append(ContentPart(
id=makeId(),
parentId=p.id,
label=f"chunk_{ch.get('order', 0)}",
typeGroup=p.typeGroup,
mimeType=p.mimeType,
data=ch.get("data", ""),
metadata={
"size": chSize,
"chunk": True,
**ch.get("metadata", {})
}
))
chunks_added += 1
logger.debug(f"Added chunk {ch.get('order', 0)}: {chSize} bytes")
logger.debug(f"Added {chunks_added} chunks from {p.typeGroup} part")
# Apply merging strategy if provided, but preserve chunks
if mergeStrategy:
# Don't merge chunks - they should stay separate for processing
non_chunk_parts = [p for p in kept if not p.metadata.get("chunk", False)]
chunk_parts = [p for p in kept if p.metadata.get("chunk", False)]
logger.debug(f"Preserving {len(chunk_parts)} chunks from merging")
# Apply intelligent merging for small text parts
if non_chunk_parts:
# Count text parts
text_parts = [p for p in non_chunk_parts if p.typeGroup == "text"]
if len(text_parts) > 5: # If we have many small text parts, merge them
logger.info(f"🔧 Merging {len(text_parts)} small text parts for efficiency")
non_chunk_parts = _applyMerging(non_chunk_parts, mergeStrategy)
# Combine non-chunk parts with chunk parts (chunks stay separate)
kept = non_chunk_parts + chunk_parts
logger.debug(f"Final parts after merging: {len(kept)} (chunks: {len(chunk_parts)})")
logger.debug(f"Final parts after merging: {len(kept)} (chunks: {len(chunk_parts)})")
# Re-check size after merging
totalSize = sum(int(p.metadata.get("size", 0) or 0) for p in kept)
if totalSize > maxSize and mergeStrategy.get("maxSize"):
# Apply size limit to merged parts
kept = _applySizeLimit(kept, maxSize)
logger.debug(f"poolAndLimit returning {len(kept)} parts")
return kept
def _applyMerging(parts: List[ContentPart], strategy: Dict[str, Any]) -> List[ContentPart]: def _applyMerging(parts: List[ContentPart], strategy: Dict[str, Any]) -> List[ContentPart]:
@ -264,37 +143,5 @@ def _applyMerging(parts: List[ContentPart], strategy: Dict[str, Any]) -> List[Co
return merged return merged
def _applySizeLimit(parts: List[ContentPart], maxSize: int) -> List[ContentPart]: # REMOVED: _applySizeLimit function - no longer needed after removing poolAndLimit
"""Apply size limit by prioritizing parts and truncating if necessary."""
# Sort by priority: text first, then others
priority_order = {"text": 0, "table": 1, "structure": 2, "image": 3, "binary": 4, "metadata": 5, "container": 6}
sorted_parts = sorted(parts, key=lambda p: priority_order.get(p.typeGroup, 99))
kept: List[ContentPart] = []
current_size = 0
for part in sorted_parts:
part_size = int(part.metadata.get("size", 0) or 0)
if current_size + part_size <= maxSize:
kept.append(part)
current_size += part_size
else:
# Try to truncate text parts
if part.typeGroup == "text" and part_size > 0:
remaining_size = maxSize - current_size
if remaining_size > 1000: # Only truncate if we have meaningful space
truncated_data = part.data[:remaining_size * 4] # Rough character estimate
truncated_part = ContentPart(
id=makeId(),
parentId=part.parentId,
label=f"{part.label}_truncated",
typeGroup=part.typeGroup,
mimeType=part.mimeType,
data=truncated_data,
metadata={**part.metadata, "size": len(truncated_data.encode('utf-8')), "truncated": True}
)
kept.append(truncated_part)
break
return kept

View file

@ -138,6 +138,7 @@ class AIBehaviorTester:
self.testResults.append(result) self.testResults.append(result)
return result return result
def _extractContinuationInstruction(self, response: str) -> str: def _extractContinuationInstruction(self, response: str) -> str:
"""Extract continuation instruction from response.""" """Extract continuation instruction from response."""
try: try:

View file

@ -25,7 +25,7 @@ from modules.datamodels.datamodelAi import (
) )
from modules.datamodels.datamodelUam import User from modules.datamodels.datamodelUam import User
from modules.aicore.aicoreModelRegistry import modelRegistry from modules.aicore.aicoreModelRegistry import modelRegistry
from modules.aicore.aicoreModelSelector import model_selector from modules.aicore.aicoreModelSelector import modelSelector
class ModelSelectionTester: class ModelSelectionTester:
@ -45,6 +45,51 @@ class ModelSelectionTester:
self.services.ai = await AiService.create(self.services) self.services.ai = await AiService.create(self.services)
async def _printFallbackListWithContext(self, title: str, prompt: str, context: str, options: AiCallOptions) -> None:
print(f"\n{'='*80}")
print(f"{title}")
print(f"{'='*80}")
print(
f"Operation={options.operationType.name}, Priority={options.priority.name}, ProcessingMode={options.processingMode.name}"
)
# Show context and prompt sizes
promptSize = len(prompt.encode("utf-8"))
contextSize = len(context.encode("utf-8"))
totalSize = promptSize + contextSize
print(f"Prompt size: {promptSize} bytes, Context size: {contextSize} bytes, Total: {totalSize} bytes")
availableModels = modelRegistry.getAvailableModels()
failoverModelList = modelSelector.getFailoverModelList(
prompt=prompt,
context=context,
options=options,
availableModels=availableModels,
)
if not failoverModelList:
print("No suitable models found (capability filter returned empty list).")
return
print("Prioritized fallback model sequence (name | quality | speed | $/1k in | ctx | score):")
for idx, m in enumerate(failoverModelList, 1):
costIn = getattr(m, "costPer1kTokensInput", 0.0)
# Calculate detailed score breakdown
promptSize = len(prompt.encode("utf-8"))
contextSize = len(context.encode("utf-8"))
totalSize = promptSize + contextSize
# Get detailed scoring
sizeRating = modelSelector._getSizeRating(m, totalSize)
processingModeRating = modelSelector._getProcessingModeRating(m.processingMode, options.processingMode)
priorityRating = modelSelector._getPriorityRating(m, options.priority)
totalScore = sizeRating + processingModeRating + priorityRating
print(
f" {idx:>2}. {m.name} | Q={getattr(m, 'qualityRating', 0)} | S={getattr(m, 'speedRating', 0)} | ${costIn:.4f} | ctx={getattr(m, 'contextLength', 0)} | score={totalScore:.3f}"
)
print(f" Size: {sizeRating:.3f}, ProcessingMode: {processingModeRating:.3f}, Priority: {priorityRating:.3f}")
async def _printFallbackList(self, title: str, prompt: str, options: AiCallOptions) -> None: async def _printFallbackList(self, title: str, prompt: str, options: AiCallOptions) -> None:
print(f"\n{'='*80}") print(f"\n{'='*80}")
print(f"{title}") print(f"{title}")
@ -53,24 +98,43 @@ class ModelSelectionTester:
f"Operation={options.operationType.name}, Priority={options.priority.name}, ProcessingMode={options.processingMode.name}" f"Operation={options.operationType.name}, Priority={options.priority.name}, ProcessingMode={options.processingMode.name}"
) )
# Show context and prompt sizes
context = "" # Currently using empty context
promptSize = len(prompt.encode("utf-8"))
contextSize = len(context.encode("utf-8"))
totalSize = promptSize + contextSize
print(f"Prompt size: {promptSize} bytes, Context size: {contextSize} bytes, Total: {totalSize} bytes")
availableModels = modelRegistry.getAvailableModels() availableModels = modelRegistry.getAvailableModels()
fallbackModels = model_selector.getFallbackModels( failoverModelList = modelSelector.getFailoverModelList(
prompt=prompt, prompt=prompt,
context="", context=context,
options=options, options=options,
availableModels=availableModels, availableModels=availableModels,
) )
if not fallbackModels: if not failoverModelList:
print("No suitable models found (capability filter returned empty list).") print("No suitable models found (capability filter returned empty list).")
return return
print("Prioritized fallback model sequence (name | quality | speed | $/1k in | ctx):") print("Prioritized fallback model sequence (name | quality | speed | $/1k in | ctx | score):")
for idx, m in enumerate(fallbackModels, 1): for idx, m in enumerate(failoverModelList, 1):
costIn = getattr(m, "costPer1kTokensInput", 0.0) costIn = getattr(m, "costPer1kTokensInput", 0.0)
# Calculate detailed score breakdown
promptSize = len(prompt.encode("utf-8"))
contextSize = len(context.encode("utf-8"))
totalSize = promptSize + contextSize
# Get detailed scoring
sizeRating = modelSelector._getSizeRating(m, totalSize)
processingModeRating = modelSelector._getProcessingModeRating(m.processingMode, options.processingMode)
priorityRating = modelSelector._getPriorityRating(m, options.priority)
totalScore = sizeRating + processingModeRating + priorityRating
print( print(
f" {idx:>2}. {m.name} | Q={getattr(m, 'qualityRating', 0)} | S={getattr(m, 'speedRating', 0)} | ${costIn:.4f} | ctx={getattr(m, 'contextLength', 0)}" f" {idx:>2}. {m.name} | Q={getattr(m, 'qualityRating', 0)} | S={getattr(m, 'speedRating', 0)} | ${costIn:.4f} | ctx={getattr(m, 'contextLength', 0)} | score={totalScore:.3f}"
) )
print(f" Size: {sizeRating:.3f}, ProcessingMode: {processingModeRating:.3f}, Priority: {priorityRating:.3f}")
async def run(self) -> None: async def run(self) -> None:
# Scenarios reflecting workflows/ # Scenarios reflecting workflows/
@ -146,10 +210,93 @@ class ModelSelectionTester:
) )
) )
# Intent analysis (user input understanding)
scenarios.append(
(
"ANALYSE - Quality, Detailed (Intent Analysis)",
"Analyze user intent and extract key requirements from the following request: 'I need to create a comprehensive marketing strategy for our new product launch including budget allocation, timeline, and target audience analysis.'",
AiCallOptions(
operationType=OperationTypeEnum.ANALYSE,
priority=PriorityEnum.QUALITY,
compressPrompt=False,
compressContext=False,
processingMode=ProcessingModeEnum.DETAILED,
maxCost=0.08,
maxProcessingTime=45,
resultFormat="json",
temperature=0.2,
),
)
)
# Review/Validation (quality assurance)
scenarios.append(
(
"ANALYSE - Quality, Detailed (Review/Validation)",
"Review and validate the following business proposal for completeness, accuracy, and compliance with industry standards. Identify any gaps or areas for improvement.",
AiCallOptions(
operationType=OperationTypeEnum.ANALYSE,
priority=PriorityEnum.QUALITY,
compressPrompt=False,
compressContext=False,
processingMode=ProcessingModeEnum.DETAILED,
maxCost=0.10,
maxProcessingTime=60,
resultFormat="json",
temperature=0.1,
),
)
)
# Large context scenario (to test size-based scoring)
scenarios.append(
(
"GENERAL - Balanced, Advanced (Large Context Test)",
"Process this large document and provide a comprehensive summary.",
AiCallOptions(
operationType=OperationTypeEnum.GENERAL,
priority=PriorityEnum.BALANCED,
compressPrompt=False,
compressContext=False,
processingMode=ProcessingModeEnum.ADVANCED,
maxCost=0.15,
maxProcessingTime=120,
),
)
)
# Iterate and print lists # Iterate and print lists
for title, prompt, options in scenarios: for title, prompt, options in scenarios:
await self._printFallbackList(title, prompt, options) await self._printFallbackList(title, prompt, options)
# Test with actual context to see size-based scoring
largeContext = """
This is a comprehensive business document containing detailed information about our company's strategic initiatives,
financial performance, market analysis, competitive landscape, operational metrics, customer feedback,
product development roadmap, technology stack, human resources, legal compliance, risk management,
sustainability efforts, and future growth plans. The document spans multiple sections including executive summary,
market research, financial statements, operational reports, customer insights, product specifications,
technology architecture, HR policies, legal frameworks, risk assessments, environmental impact studies,
and strategic recommendations. This extensive content is designed to test the model selection algorithm's
ability to handle large context sizes and make intelligent decisions about which models are best suited
for processing such substantial amounts of information while maintaining efficiency and cost-effectiveness.
""" * 10 # Repeat to make it even larger
await self._printFallbackListWithContext(
"GENERAL - Balanced, Advanced (Large Context Test)",
"Analyze this comprehensive business document and provide key insights.",
largeContext,
AiCallOptions(
operationType=OperationTypeEnum.GENERAL,
priority=PriorityEnum.BALANCED,
compressPrompt=False,
compressContext=False,
processingMode=ProcessingModeEnum.ADVANCED,
maxCost=0.15,
maxProcessingTime=120,
),
)
async def main() -> None: async def main() -> None:
tester = ModelSelectionTester() tester = ModelSelectionTester()