ready for test revised dynamic ai aware chunking system
This commit is contained in:
parent
6b819cc848
commit
36947b6d7e
14 changed files with 1160 additions and 1014 deletions
|
|
@ -48,7 +48,7 @@ class ModelRegistry:
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Import the module
|
# Import the module
|
||||||
module = importlib.import_module(f'modules.connectors.{moduleName}')
|
module = importlib.import_module(f'modules.aicore.{moduleName}')
|
||||||
|
|
||||||
# Find connector classes (classes that inherit from BaseConnectorAi)
|
# Find connector classes (classes that inherit from BaseConnectorAi)
|
||||||
for attrName in dir(module):
|
for attrName in dir(module):
|
||||||
|
|
|
||||||
|
|
@ -1,158 +0,0 @@
|
||||||
"""
|
|
||||||
Configuration for dynamic model selection rules.
|
|
||||||
This makes model selection configurable rather than hardcoded.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from typing import Dict, List, Any
|
|
||||||
from modules.datamodels.datamodelAi import OperationTypeEnum, ModelCapabilitiesEnum, PriorityEnum, SelectionRule
|
|
||||||
|
|
||||||
|
|
||||||
class ModelSelectionConfig:
|
|
||||||
"""Configuration for model selection rules."""
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
self.rules = self._loadDefaultRules()
|
|
||||||
self.fallbackModels = self._loadFallbackModels()
|
|
||||||
|
|
||||||
def _loadDefaultRules(self) -> List[SelectionRule]:
|
|
||||||
"""Load default selection rules."""
|
|
||||||
return [
|
|
||||||
# High quality for planning and analysis
|
|
||||||
SelectionRule(
|
|
||||||
name="highQualityAnalysis",
|
|
||||||
condition="Planning or analysis operations requiring high quality",
|
|
||||||
weight=10.0,
|
|
||||||
operationTypes=[OperationTypeEnum.PLAN, OperationTypeEnum.ANALYSE],
|
|
||||||
priority=PriorityEnum.QUALITY,
|
|
||||||
capabilities=[ModelCapabilitiesEnum.TEXT_GENERATION, ModelCapabilitiesEnum.REASONING, ModelCapabilitiesEnum.ANALYSIS],
|
|
||||||
minQualityRating=8
|
|
||||||
),
|
|
||||||
|
|
||||||
# Fast processing for basic operations
|
|
||||||
SelectionRule(
|
|
||||||
name="fastBasicProcessing",
|
|
||||||
condition="Basic operations requiring speed",
|
|
||||||
weight=8.0,
|
|
||||||
operationTypes=[OperationTypeEnum.GENERAL],
|
|
||||||
priority=PriorityEnum.SPEED,
|
|
||||||
capabilities=[ModelCapabilitiesEnum.TEXT_GENERATION, ModelCapabilitiesEnum.CHAT],
|
|
||||||
minQualityRating=5
|
|
||||||
),
|
|
||||||
|
|
||||||
# Cost-effective for high-volume operations
|
|
||||||
SelectionRule(
|
|
||||||
name="costEffectiveProcessing",
|
|
||||||
condition="High-volume operations where cost matters",
|
|
||||||
weight=7.0,
|
|
||||||
operationTypes=[OperationTypeEnum.GENERAL, OperationTypeEnum.GENERATE],
|
|
||||||
priority=PriorityEnum.COST,
|
|
||||||
capabilities=[ModelCapabilitiesEnum.TEXT_GENERATION],
|
|
||||||
maxCost=0.01 # $0.01 per 1k tokens
|
|
||||||
),
|
|
||||||
|
|
||||||
# Image analysis specific
|
|
||||||
SelectionRule(
|
|
||||||
name="imageAnalyse",
|
|
||||||
condition="Image analysis operations",
|
|
||||||
weight=10.0,
|
|
||||||
operationTypes=[OperationTypeEnum.IMAGE_ANALYSE],
|
|
||||||
priority=PriorityEnum.QUALITY,
|
|
||||||
capabilities=[ModelCapabilitiesEnum.VISION, ModelCapabilitiesEnum.MULTIMODAL],
|
|
||||||
minQualityRating=8
|
|
||||||
),
|
|
||||||
|
|
||||||
# Web research specific
|
|
||||||
SelectionRule(
|
|
||||||
name="webResearch",
|
|
||||||
condition="Web research operations",
|
|
||||||
weight=9.0,
|
|
||||||
operationTypes=[OperationTypeEnum.WEB_RESEARCH],
|
|
||||||
priority=PriorityEnum.BALANCED,
|
|
||||||
capabilities=[ModelCapabilitiesEnum.TEXT_GENERATION, ModelCapabilitiesEnum.ANALYSIS, ModelCapabilitiesEnum.WEB_SEARCH],
|
|
||||||
minQualityRating=7
|
|
||||||
),
|
|
||||||
|
|
||||||
# Large context requirements
|
|
||||||
SelectionRule(
|
|
||||||
name="largeContext",
|
|
||||||
condition="Operations requiring large context",
|
|
||||||
weight=8.0,
|
|
||||||
operationTypes=[OperationTypeEnum.GENERAL, OperationTypeEnum.ANALYSE],
|
|
||||||
priority=PriorityEnum.BALANCED,
|
|
||||||
capabilities=[ModelCapabilitiesEnum.TEXT_GENERATION],
|
|
||||||
minContextLength=100000 # 100k tokens
|
|
||||||
)
|
|
||||||
]
|
|
||||||
|
|
||||||
def _loadFallbackModels(self) -> Dict[str, Dict[str, Any]]:
|
|
||||||
"""Load fallback model selection criteria."""
|
|
||||||
return {
|
|
||||||
OperationTypeEnum.GENERAL: {
|
|
||||||
"priorityOrder": ["speed", "quality", "cost"],
|
|
||||||
"operationTypes": [ModelCapabilitiesEnum.TEXT_GENERATION, ModelCapabilitiesEnum.CHAT],
|
|
||||||
"minQualityRating": 5,
|
|
||||||
"maxCostPer1k": 0.01
|
|
||||||
},
|
|
||||||
OperationTypeEnum.IMAGE_ANALYSE: {
|
|
||||||
"priorityOrder": ["quality", "speed"],
|
|
||||||
"operationTypes": [ModelCapabilitiesEnum.VISION, ModelCapabilitiesEnum.MULTIMODAL],
|
|
||||||
"minQualityRating": 8,
|
|
||||||
"maxCostPer1k": 0.1
|
|
||||||
},
|
|
||||||
OperationTypeEnum.IMAGE_GENERATE: {
|
|
||||||
"priorityOrder": ["quality", "speed"],
|
|
||||||
"operationTypes": [ModelCapabilitiesEnum.IMAGE_GENERATE, ModelCapabilitiesEnum.ART, ModelCapabilitiesEnum.VISUAL_CREATION],
|
|
||||||
"minQualityRating": 8,
|
|
||||||
"maxCostPer1k": 0.1
|
|
||||||
},
|
|
||||||
OperationTypeEnum.WEB_RESEARCH: {
|
|
||||||
"priorityOrder": ["quality", "speed", "cost"],
|
|
||||||
"operationTypes": [ModelCapabilitiesEnum.TEXT_GENERATION, ModelCapabilitiesEnum.ANALYSIS],
|
|
||||||
"preferredTags": [ModelCapabilitiesEnum.WEB_SEARCH],
|
|
||||||
"minQualityRating": 7,
|
|
||||||
"maxCostPer1k": 0.02
|
|
||||||
},
|
|
||||||
OperationTypeEnum.PLAN: {
|
|
||||||
"priorityOrder": ["quality", "speed"],
|
|
||||||
"operationTypes": [ModelCapabilitiesEnum.TEXT_GENERATION, ModelCapabilitiesEnum.REASONING, ModelCapabilitiesEnum.ANALYSIS],
|
|
||||||
"preferredTags": [PriorityEnum.QUALITY],
|
|
||||||
"minQualityRating": 8,
|
|
||||||
"maxCostPer1k": 0.1
|
|
||||||
},
|
|
||||||
OperationTypeEnum.ANALYSE: {
|
|
||||||
"priorityOrder": ["quality", "speed"],
|
|
||||||
"operationTypes": [ModelCapabilitiesEnum.TEXT_GENERATION, ModelCapabilitiesEnum.ANALYSIS, ModelCapabilitiesEnum.REASONING],
|
|
||||||
"preferredTags": [PriorityEnum.QUALITY],
|
|
||||||
"minQualityRating": 8,
|
|
||||||
"maxCostPer1k": 0.1
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
def getRulesForOperation(self, operationType: str) -> List[SelectionRule]:
|
|
||||||
"""Get rules that apply to a specific operation type."""
|
|
||||||
return [rule for rule in self.rules if operationType in rule.operationTypes]
|
|
||||||
|
|
||||||
def getFallbackCriteria(self, operationType: str) -> Dict[str, Any]:
|
|
||||||
"""Get fallback selection criteria for a specific operation type."""
|
|
||||||
return self.fallbackModels.get(operationType, self.fallbackModels[OperationTypeEnum.GENERAL])
|
|
||||||
|
|
||||||
def addRule(self, rule: SelectionRule):
|
|
||||||
"""Add a new selection rule."""
|
|
||||||
self.rules.append(rule)
|
|
||||||
|
|
||||||
def removeRule(self, ruleName: str):
|
|
||||||
"""Remove a selection rule by name."""
|
|
||||||
self.rules = [rule for rule in self.rules if rule.name != ruleName]
|
|
||||||
|
|
||||||
def updateRule(self, ruleName: str, **kwargs):
|
|
||||||
"""Update an existing rule."""
|
|
||||||
for rule in self.rules:
|
|
||||||
if rule.name == ruleName:
|
|
||||||
for key, value in kwargs.items():
|
|
||||||
if hasattr(rule, key):
|
|
||||||
setattr(rule, key, value)
|
|
||||||
break
|
|
||||||
|
|
||||||
|
|
||||||
# Global configuration instance
|
|
||||||
model_selection_config = ModelSelectionConfig()
|
|
||||||
|
|
@ -1,20 +1,20 @@
|
||||||
"""
|
"""
|
||||||
Dynamic model selector using configurable rules and scoring.
|
Simplified model selection based on model properties and priority-based sorting.
|
||||||
|
No complex rules needed - just filter by properties and sort by priority!
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from typing import List, Optional, Dict, Any, Tuple
|
from typing import List, Dict, Any, Optional
|
||||||
from modules.datamodels.datamodelAi import AiModel, AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum, ModelCapabilitiesEnum
|
from modules.datamodels.datamodelAi import AiModel, AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum
|
||||||
from modules.aicore.aicoreModelSelectionConfig import model_selection_config
|
|
||||||
|
|
||||||
|
# Configure logger
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class ModelSelector:
|
class ModelSelector:
|
||||||
"""Dynamic model selector using configurable rules."""
|
"""Simple model selector based on properties and priority-based sorting."""
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.config = model_selection_config
|
logger.info("ModelSelector initialized with simplified approach")
|
||||||
|
|
||||||
def selectModel(self,
|
def selectModel(self,
|
||||||
prompt: str,
|
prompt: str,
|
||||||
|
|
@ -22,270 +22,7 @@ class ModelSelector:
|
||||||
options: AiCallOptions,
|
options: AiCallOptions,
|
||||||
availableModels: List[AiModel]) -> Optional[AiModel]:
|
availableModels: List[AiModel]) -> Optional[AiModel]:
|
||||||
"""
|
"""
|
||||||
Select the best model based on configurable rules and scoring.
|
Select the best model using simple filtering and priority-based sorting.
|
||||||
|
|
||||||
Args:
|
|
||||||
prompt: User prompt
|
|
||||||
context: Context data
|
|
||||||
options: AI call options
|
|
||||||
availableModels: List of available models to choose from
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Selected model or None if no suitable model found
|
|
||||||
"""
|
|
||||||
if not availableModels:
|
|
||||||
logger.warning("No models available for selection")
|
|
||||||
return None
|
|
||||||
|
|
||||||
logger.info(f"Selecting model for operation: {options.operationType}, priority: {options.priority}")
|
|
||||||
|
|
||||||
# Calculate input size
|
|
||||||
inputSize = len(prompt.encode("utf-8")) + len(context.encode("utf-8"))
|
|
||||||
|
|
||||||
# Get applicable rules
|
|
||||||
rules = self.config.getRulesForOperation(options.operationType)
|
|
||||||
logger.debug(f"Found {len(rules)} applicable rules for {options.operationType}")
|
|
||||||
|
|
||||||
# Score each model
|
|
||||||
scoredModels = []
|
|
||||||
for model in availableModels:
|
|
||||||
if not model.isAvailable:
|
|
||||||
continue
|
|
||||||
|
|
||||||
score = self._calculateModelScore(model, inputSize, options, rules)
|
|
||||||
if score > 0: # Only consider models with positive scores
|
|
||||||
scoredModels.append((model, score))
|
|
||||||
logger.debug(f"Model {model.name}: score={score:.2f}")
|
|
||||||
|
|
||||||
if not scoredModels:
|
|
||||||
logger.warning("No models passed the selection criteria, trying fallback criteria")
|
|
||||||
# Try fallback criteria
|
|
||||||
fallbackCriteria = self.getFallbackCriteria(options.operationType)
|
|
||||||
return self._selectWithFallbackCriteria(availableModels, fallbackCriteria, inputSize, options)
|
|
||||||
|
|
||||||
# Sort by score (highest first)
|
|
||||||
scoredModels.sort(key=lambda x: x[1], reverse=True)
|
|
||||||
|
|
||||||
selectedModel = scoredModels[0][0]
|
|
||||||
selectedScore = scoredModels[0][1]
|
|
||||||
|
|
||||||
logger.info(f"Selected model: {selectedModel.name} (score: {selectedScore:.2f})")
|
|
||||||
|
|
||||||
# Log selection details
|
|
||||||
self._logSelectionDetails(selectedModel, inputSize, options)
|
|
||||||
|
|
||||||
return selectedModel
|
|
||||||
|
|
||||||
def _calculateModelScore(self,
|
|
||||||
model: AiModel,
|
|
||||||
inputSize: int,
|
|
||||||
options: AiCallOptions,
|
|
||||||
rules: List) -> float:
|
|
||||||
"""Calculate score for a model based on rules and criteria."""
|
|
||||||
score = 0.0
|
|
||||||
|
|
||||||
# Check basic requirements
|
|
||||||
if not self._meetsBasicRequirements(model, inputSize, options):
|
|
||||||
return 0.0
|
|
||||||
|
|
||||||
# Apply rules
|
|
||||||
for rule in rules:
|
|
||||||
ruleScore = self._applyRule(model, inputSize, options, rule)
|
|
||||||
score += ruleScore * rule.weight
|
|
||||||
|
|
||||||
# Apply priority-based scoring
|
|
||||||
priorityScore = self._applyPriorityScoring(model, options)
|
|
||||||
score += priorityScore
|
|
||||||
|
|
||||||
# Apply processing mode scoring
|
|
||||||
modeScore = self._applyProcessingModeScoring(model, options)
|
|
||||||
score += modeScore
|
|
||||||
|
|
||||||
# Apply cost constraints
|
|
||||||
if not self._meetsCostConstraints(model, inputSize, options):
|
|
||||||
score *= 0.1 # Heavily penalize but don't eliminate
|
|
||||||
|
|
||||||
return max(0.0, score)
|
|
||||||
|
|
||||||
def _meetsBasicRequirements(self, model: AiModel, inputSize: int, options: AiCallOptions) -> bool:
|
|
||||||
"""Check if model meets basic requirements."""
|
|
||||||
# Context length check
|
|
||||||
if model.contextLength > 0 and inputSize > model.contextLength * 0.8:
|
|
||||||
logger.debug(f"Model {model.name} rejected: input too large ({inputSize} > {model.contextLength * 0.8})")
|
|
||||||
return False
|
|
||||||
|
|
||||||
# Required operation types check
|
|
||||||
if options.operationTypes:
|
|
||||||
if not all(opType in model.operationTypes for opType in options.operationTypes):
|
|
||||||
logger.debug(f"Model {model.name} rejected: missing required operation types")
|
|
||||||
return False
|
|
||||||
|
|
||||||
# Capabilities check
|
|
||||||
if options.capabilities:
|
|
||||||
if not all(cap in model.capabilities for cap in options.capabilities):
|
|
||||||
logger.debug(f"Model {model.name} rejected: missing required capabilities")
|
|
||||||
return False
|
|
||||||
|
|
||||||
# Avoid operation types check
|
|
||||||
for rule in self.config.getRulesForOperation(options.operationType):
|
|
||||||
if any(opType in model.operationTypes for opType in rule.avoidOperationTypes):
|
|
||||||
logger.debug(f"Model {model.name} rejected: has avoid operation types")
|
|
||||||
return False
|
|
||||||
|
|
||||||
return True
|
|
||||||
|
|
||||||
def _applyRule(self, model: AiModel, inputSize: int, options: AiCallOptions, rule) -> float:
|
|
||||||
"""Apply a specific rule to calculate score contribution."""
|
|
||||||
score = 0.0
|
|
||||||
|
|
||||||
# Required operation types match
|
|
||||||
if all(opType in model.operationTypes for opType in rule.operationTypes):
|
|
||||||
score += 1.0
|
|
||||||
|
|
||||||
# Preferred capabilities match
|
|
||||||
preferredMatches = sum(1 for cap in rule.preferredCapabilities if cap in model.capabilities)
|
|
||||||
if rule.preferredCapabilities:
|
|
||||||
score += (preferredMatches / len(rule.preferredCapabilities)) * 0.5
|
|
||||||
|
|
||||||
# Quality rating check
|
|
||||||
if rule.minQualityRating and model.qualityRating >= rule.minQualityRating:
|
|
||||||
score += 0.3
|
|
||||||
|
|
||||||
# Context length check
|
|
||||||
if rule.minContextLength and model.contextLength >= rule.minContextLength:
|
|
||||||
score += 0.2
|
|
||||||
|
|
||||||
return score
|
|
||||||
|
|
||||||
def _applyPriorityScoring(self, model: AiModel, options: AiCallOptions) -> float:
|
|
||||||
"""Apply priority-based scoring."""
|
|
||||||
if options.priority == PriorityEnum.SPEED:
|
|
||||||
return model.speedRating * 0.1
|
|
||||||
elif options.priority == PriorityEnum.QUALITY:
|
|
||||||
return model.qualityRating * 0.1
|
|
||||||
elif options.priority == PriorityEnum.COST:
|
|
||||||
# Lower cost = higher score
|
|
||||||
costScore = max(0, 1.0 - (model.costPer1kTokensInput * 1000))
|
|
||||||
return costScore * 0.1
|
|
||||||
else: # BALANCED
|
|
||||||
return (model.qualityRating + model.speedRating) * 0.05
|
|
||||||
|
|
||||||
def _applyProcessingModeScoring(self, model: AiModel, options: AiCallOptions) -> float:
|
|
||||||
"""Apply processing mode scoring."""
|
|
||||||
if options.processingMode == ProcessingModeEnum.DETAILED:
|
|
||||||
if model.priority == PriorityEnum.QUALITY:
|
|
||||||
return 0.2
|
|
||||||
elif options.processingMode == ProcessingModeEnum.BASIC:
|
|
||||||
if model.priority == PriorityEnum.SPEED:
|
|
||||||
return 0.2
|
|
||||||
|
|
||||||
return 0.0
|
|
||||||
|
|
||||||
def _meetsCostConstraints(self, model: AiModel, inputSize: int, options: AiCallOptions) -> bool:
|
|
||||||
"""Check if model meets cost constraints."""
|
|
||||||
if options.maxCost is None:
|
|
||||||
return True
|
|
||||||
|
|
||||||
# Estimate cost
|
|
||||||
estimatedTokens = inputSize / 4
|
|
||||||
estimatedCost = (estimatedTokens / 1000) * model.costPer1kTokensInput
|
|
||||||
|
|
||||||
return estimatedCost <= options.maxCost
|
|
||||||
|
|
||||||
def _logSelectionDetails(self, model: AiModel, inputSize: int, options: AiCallOptions):
|
|
||||||
"""Log detailed selection information."""
|
|
||||||
logger.info(f"Model Selection Details:")
|
|
||||||
logger.info(f" Selected: {model.displayName} ({model.name})")
|
|
||||||
logger.info(f" Connector: {model.connectorType}")
|
|
||||||
logger.info(f" Operation: {options.operationType}")
|
|
||||||
logger.info(f" Priority: {options.priority}")
|
|
||||||
logger.info(f" Processing Mode: {options.processingMode}")
|
|
||||||
logger.info(f" Input Size: {inputSize} bytes")
|
|
||||||
logger.info(f" Context Length: {model.contextLength}")
|
|
||||||
logger.info(f" Max Tokens: {model.maxTokens}")
|
|
||||||
logger.info(f" Quality Rating: {model.qualityRating}/10")
|
|
||||||
logger.info(f" Speed Rating: {model.speedRating}/10")
|
|
||||||
logger.info(f" Cost: ${model.costPer1kTokensInput:.4f}/1k tokens")
|
|
||||||
logger.info(f" Capabilities: {', '.join(model.capabilities)}")
|
|
||||||
logger.info(f" Priority: {model.priority}")
|
|
||||||
|
|
||||||
def getFallbackCriteria(self, operationType: str) -> Dict[str, Any]:
|
|
||||||
"""Get fallback selection criteria for an operation type."""
|
|
||||||
return self.config.getFallbackCriteria(operationType)
|
|
||||||
|
|
||||||
def _selectWithFallbackCriteria(self,
|
|
||||||
availableModels: List[AiModel],
|
|
||||||
fallbackCriteria: Dict[str, Any],
|
|
||||||
inputSize: int,
|
|
||||||
options: AiCallOptions) -> Optional[AiModel]:
|
|
||||||
"""Select model using fallback criteria when normal selection fails."""
|
|
||||||
logger.info("Using fallback criteria for model selection")
|
|
||||||
|
|
||||||
# Filter models by fallback criteria
|
|
||||||
candidates = []
|
|
||||||
for model in availableModels:
|
|
||||||
if not model.isAvailable:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Check required operation types
|
|
||||||
if fallbackCriteria.get("operationTypes"):
|
|
||||||
if not all(opType in model.operationTypes for opType in fallbackCriteria["operationTypes"]):
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Check quality rating
|
|
||||||
if fallbackCriteria.get("minQualityRating"):
|
|
||||||
if model.qualityRating < fallbackCriteria["minQualityRating"]:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Check cost
|
|
||||||
if fallbackCriteria.get("maxCostPer1k"):
|
|
||||||
if model.costPer1kTokensInput > fallbackCriteria["maxCostPer1k"]:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Check context length
|
|
||||||
if model.contextLength > 0 and inputSize > model.contextLength * 0.8:
|
|
||||||
continue
|
|
||||||
|
|
||||||
candidates.append(model)
|
|
||||||
|
|
||||||
if not candidates:
|
|
||||||
logger.error("No models available even with fallback criteria")
|
|
||||||
return None
|
|
||||||
|
|
||||||
# Sort by priority order from fallback criteria
|
|
||||||
priorityOrder = fallbackCriteria.get("priorityOrder", ["quality", "speed", "cost"])
|
|
||||||
|
|
||||||
def _getPriorityScore(model: AiModel) -> float:
|
|
||||||
score = 0.0
|
|
||||||
for i, priority in enumerate(priorityOrder):
|
|
||||||
weight = len(priorityOrder) - i # Higher weight for earlier priorities
|
|
||||||
if priority == "quality":
|
|
||||||
score += model.qualityRating * weight
|
|
||||||
elif priority == "speed":
|
|
||||||
score += model.speedRating * weight
|
|
||||||
elif priority == "cost":
|
|
||||||
# Lower cost = higher score
|
|
||||||
score += (1.0 - model.costPer1kTokensInput * 1000) * weight
|
|
||||||
return score
|
|
||||||
|
|
||||||
candidates.sort(key=_getPriorityScore, reverse=True)
|
|
||||||
selectedModel = candidates[0]
|
|
||||||
|
|
||||||
logger.info(f"Fallback selection: {selectedModel.name} (score: {_getPriorityScore(selectedModel):.2f})")
|
|
||||||
return selectedModel
|
|
||||||
|
|
||||||
def getFallbackModels(self,
|
|
||||||
prompt: str,
|
|
||||||
context: str,
|
|
||||||
options: AiCallOptions,
|
|
||||||
availableModels: List[AiModel]) -> List[AiModel]:
|
|
||||||
"""
|
|
||||||
Get prioritized list of models for fallback sequence.
|
|
||||||
|
|
||||||
Steps:
|
|
||||||
1. Filter models by capability requirements
|
|
||||||
2. Rate models by business requirements (priority, processing mode)
|
|
||||||
3. Sort by rating (descending), then by cost (ascending)
|
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
prompt: User prompt
|
prompt: User prompt
|
||||||
|
|
@ -294,93 +31,195 @@ class ModelSelector:
|
||||||
availableModels: List of available models
|
availableModels: List of available models
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Prioritized list of models for fallback sequence
|
Best model for the request, or None if no suitable model found
|
||||||
"""
|
"""
|
||||||
if not availableModels:
|
try:
|
||||||
logger.warning("No models available for fallback selection")
|
# Get failover models (which includes all filtering and sorting)
|
||||||
|
failoverModelList = self.getFailoverModelList(prompt, context, options, availableModels)
|
||||||
|
|
||||||
|
if not failoverModelList:
|
||||||
|
logger.warning("No suitable models found for the request")
|
||||||
|
return None
|
||||||
|
|
||||||
|
selectedModel = failoverModelList[0] # First model is the best one
|
||||||
|
logger.info(f"Selected model: {selectedModel.name} (quality: {selectedModel.qualityRating}, cost: ${selectedModel.costPer1kTokensInput:.4f})")
|
||||||
|
return selectedModel
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error selecting model: {str(e)}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def getFailoverModelList(self,
|
||||||
|
prompt: str,
|
||||||
|
context: str,
|
||||||
|
options: AiCallOptions,
|
||||||
|
availableModels: List[AiModel]) -> List[AiModel]:
|
||||||
|
"""
|
||||||
|
Get prioritized list of models using scoring-based ranking.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
prompt: User prompt
|
||||||
|
context: Context data
|
||||||
|
options: AI call options
|
||||||
|
availableModels: List of available models
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of models sorted by score (descending)
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
promptSize = len(prompt.encode("utf-8"))
|
||||||
|
contextSize = len(context.encode("utf-8"))
|
||||||
|
totalSize = promptSize + contextSize
|
||||||
|
|
||||||
|
# Step 1: Filter by operation type (MUST match)
|
||||||
|
operationFiltered = [m for m in availableModels if options.operationType in m.operationTypes]
|
||||||
|
logger.debug(f"After operation type filtering: {len(operationFiltered)} models")
|
||||||
|
|
||||||
|
# Step 2: Filter by prompt size (MUST be <= 80% of context size)
|
||||||
|
promptFiltered = [m for m in operationFiltered if m.contextLength == 0 or promptSize <= m.contextLength * 0.8]
|
||||||
|
logger.debug(f"After prompt size filtering: {len(promptFiltered)} models")
|
||||||
|
|
||||||
|
# Step 3: Calculate scores for each model
|
||||||
|
scoredModels = []
|
||||||
|
for model in promptFiltered:
|
||||||
|
score = self._calculateModelScore(model, promptSize, contextSize, totalSize, options)
|
||||||
|
scoredModels.append((model, score))
|
||||||
|
logger.debug(f"Model {model.name}: score={score:.3f}")
|
||||||
|
|
||||||
|
# Step 4: Sort by score (descending)
|
||||||
|
scoredModels.sort(key=lambda x: x[1], reverse=True)
|
||||||
|
sortedModels = [model for model, score in scoredModels]
|
||||||
|
|
||||||
|
logger.debug(f"Final sorted models: {len(sortedModels)} models")
|
||||||
|
return sortedModels
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error getting failover models: {str(e)}")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
logger.info(f"Building fallback sequence for operation: {options.operationType}, priority: {options.priority}")
|
def _calculateModelScore(self, model: AiModel, promptSize: int, contextSize: int, totalSize: int, options: AiCallOptions) -> float:
|
||||||
|
"""
|
||||||
|
Calculate a score for a model based on how well it fulfills the criteria.
|
||||||
|
|
||||||
# Step 1: Filter by capability requirements
|
Args:
|
||||||
capableModels = self._filterByCapabilities(availableModels, options)
|
model: The model to score
|
||||||
logger.info(f"Step 1 - Capable models: {[m.name for m in capableModels]}")
|
promptSize: Size of the prompt in bytes
|
||||||
|
contextSize: Size of the context in bytes
|
||||||
if not capableModels:
|
totalSize: Total size (prompt + context) in bytes
|
||||||
logger.warning("No models meet capability requirements")
|
options: AI call options
|
||||||
return []
|
|
||||||
|
|
||||||
# Step 2: Rate models by business requirements
|
|
||||||
ratedModels = self._rateModelsByBusinessRequirements(capableModels, prompt, context, options)
|
|
||||||
logger.info(f"Step 2 - Rated models: {[(m.name, rating) for m, rating in ratedModels]}")
|
|
||||||
|
|
||||||
# Step 3: Sort by rating (descending), then by cost (ascending)
|
|
||||||
sortedModels = self._sortModelsByRatingAndCost(ratedModels)
|
|
||||||
logger.info(f"Step 3 - Sorted fallback sequence: {[m.name for m in sortedModels]}")
|
|
||||||
|
|
||||||
return sortedModels
|
|
||||||
|
|
||||||
def _filterByCapabilities(self, models: List[AiModel], options: AiCallOptions) -> List[AiModel]:
|
|
||||||
"""Filter models by required capabilities."""
|
|
||||||
capableModels = []
|
|
||||||
|
|
||||||
for model in models:
|
|
||||||
if not model.isAvailable:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Check if model supports required capabilities
|
Returns:
|
||||||
if options.capabilities:
|
Score for the model (higher is better)
|
||||||
if not all(cap in model.capabilities for cap in options.capabilities):
|
"""
|
||||||
logger.debug(f"Model {model.name} missing required capabilities: {options.capabilities}")
|
score = 0.0
|
||||||
continue
|
|
||||||
|
|
||||||
# Check operation type compatibility
|
|
||||||
if not self._meetsBasicRequirements(model, options):
|
|
||||||
logger.debug(f"Model {model.name} doesn't meet basic requirements")
|
|
||||||
continue
|
|
||||||
|
|
||||||
capableModels.append(model)
|
|
||||||
|
|
||||||
return capableModels
|
# 1. Prompt + Context size rating
|
||||||
|
if model.contextLength > 0:
|
||||||
def _rateModelsByBusinessRequirements(self,
|
modelMaxSize = model.contextLength * 0.8 # 80% of model context length
|
||||||
models: List[AiModel],
|
if totalSize <= modelMaxSize:
|
||||||
prompt: str,
|
# Within limits: rating = (prompt+contextsize) / (80% modelsize)
|
||||||
context: str,
|
score += totalSize / modelMaxSize
|
||||||
options: AiCallOptions) -> List[Tuple[AiModel, float]]:
|
else:
|
||||||
"""Rate models based on business requirements (priority, processing mode)."""
|
# Exceeds limits: rating = modelsize / (prompt+contextsize) (ensures minimum chunks)
|
||||||
ratedModels = []
|
score += modelMaxSize / totalSize
|
||||||
inputSize = len(prompt.encode("utf-8")) + len(context.encode("utf-8"))
|
else:
|
||||||
|
# No context length limit
|
||||||
|
score += 1.0
|
||||||
|
|
||||||
for model in models:
|
# 2. Processing Mode rating
|
||||||
# Base score from model selection logic
|
if hasattr(options, 'processingMode') and options.processingMode:
|
||||||
baseScore = self._calculateModelScore(model, inputSize, options, [])
|
score += self._getProcessingModeRating(model.processingMode, options.processingMode)
|
||||||
|
else:
|
||||||
# Apply priority-based scoring
|
score += 1.0 # No preference
|
||||||
priorityScore = self._applyPriorityScoring(model, options)
|
|
||||||
|
|
||||||
# Apply processing mode scoring
|
|
||||||
processingScore = self._applyProcessingModeScoring(model, options)
|
|
||||||
|
|
||||||
# Combine scores
|
|
||||||
totalScore = baseScore + priorityScore + processingScore
|
|
||||||
|
|
||||||
ratedModels.append((model, totalScore))
|
|
||||||
logger.debug(f"Model {model.name}: base={baseScore:.2f}, priority={priorityScore:.2f}, processing={processingScore:.2f}, total={totalScore:.2f}")
|
|
||||||
|
|
||||||
return ratedModels
|
# 3. Priority rating
|
||||||
|
if hasattr(options, 'priority') and options.priority:
|
||||||
def _sortModelsByRatingAndCost(self, ratedModels: List[Tuple[AiModel, float]]) -> List[AiModel]:
|
score += self._getPriorityRating(model, options.priority)
|
||||||
"""Sort models by rating (descending), then by cost (ascending)."""
|
else:
|
||||||
def sortKey(item):
|
score += 1.0 # No preference
|
||||||
model, rating = item
|
|
||||||
# Primary sort: rating (descending)
|
|
||||||
# Secondary sort: cost (ascending)
|
|
||||||
return (-rating, model.costPer1kTokensInput)
|
|
||||||
|
|
||||||
sortedItems = sorted(ratedModels, key=sortKey)
|
return score
|
||||||
return [model for model, rating in sortedItems]
|
|
||||||
|
def _getProcessingModeRating(self, modelMode: ProcessingModeEnum, requestedMode: ProcessingModeEnum) -> float:
|
||||||
|
"""Get processing mode rating based on compatibility."""
|
||||||
|
if modelMode == requestedMode:
|
||||||
|
return 1.0
|
||||||
|
|
||||||
|
# Compatibility matrix
|
||||||
|
if requestedMode == ProcessingModeEnum.BASIC:
|
||||||
|
if modelMode == ProcessingModeEnum.ADVANCED:
|
||||||
|
return 0.5
|
||||||
|
elif modelMode == ProcessingModeEnum.DETAILED:
|
||||||
|
return 0.2
|
||||||
|
|
||||||
|
elif requestedMode == ProcessingModeEnum.ADVANCED:
|
||||||
|
if modelMode == ProcessingModeEnum.BASIC:
|
||||||
|
return 0.2
|
||||||
|
elif modelMode == ProcessingModeEnum.DETAILED:
|
||||||
|
return 0.5
|
||||||
|
|
||||||
|
elif requestedMode == ProcessingModeEnum.DETAILED:
|
||||||
|
if modelMode == ProcessingModeEnum.BASIC:
|
||||||
|
return 0.2
|
||||||
|
elif modelMode == ProcessingModeEnum.ADVANCED:
|
||||||
|
return 0.5
|
||||||
|
|
||||||
|
return 0.0 # No compatibility
|
||||||
|
|
||||||
|
def _getPriorityRating(self, model: AiModel, requestedPriority: PriorityEnum) -> float:
|
||||||
|
"""Get priority rating based on model capabilities."""
|
||||||
|
if requestedPriority == PriorityEnum.BALANCED:
|
||||||
|
return 1.0
|
||||||
|
|
||||||
|
elif requestedPriority == PriorityEnum.SPEED:
|
||||||
|
return model.speedRating / 10.0
|
||||||
|
|
||||||
|
elif requestedPriority == PriorityEnum.QUALITY:
|
||||||
|
return model.qualityRating / 10.0
|
||||||
|
|
||||||
|
elif requestedPriority == PriorityEnum.COST:
|
||||||
|
# Cost priority: cost gives 1, speed gives 0.5, quality gives 0.2
|
||||||
|
# Lower cost is better, so we invert the cost rating
|
||||||
|
costRating = 1.0 - (model.costPer1kTokensInput / 0.1) # Normalize to 0-1
|
||||||
|
costRating = max(0, costRating) # Ensure non-negative
|
||||||
|
|
||||||
|
speedRating = model.speedRating / 10.0 * 0.5
|
||||||
|
qualityRating = model.qualityRating / 10.0 * 0.2
|
||||||
|
|
||||||
|
return costRating + speedRating + qualityRating
|
||||||
|
|
||||||
|
return 1.0 # Default
|
||||||
|
|
||||||
|
def _getSizeRating(self, model: AiModel, totalSize: int) -> float:
|
||||||
|
"""Get size rating for a model based on total input size."""
|
||||||
|
if model.contextLength > 0:
|
||||||
|
modelMaxSize = model.contextLength * 0.8 # 80% of model context length
|
||||||
|
if totalSize <= modelMaxSize:
|
||||||
|
# Within limits: rating = (prompt+contextsize) / (80% modelsize)
|
||||||
|
return totalSize / modelMaxSize
|
||||||
|
else:
|
||||||
|
# Exceeds limits: rating = modelsize / (prompt+contextsize) (ensures minimum chunks)
|
||||||
|
return modelMaxSize / totalSize
|
||||||
|
else:
|
||||||
|
# No context length limit
|
||||||
|
return 1.0
|
||||||
|
|
||||||
|
|
||||||
|
def _logModelDetails(self, model: AiModel):
|
||||||
|
"""Log detailed information about a model."""
|
||||||
|
logger.info(f"Model: {model.name}")
|
||||||
|
logger.info(f" Display Name: {model.displayName}")
|
||||||
|
logger.info(f" Connector: {model.connectorType}")
|
||||||
|
logger.info(f" Context Length: {model.contextLength}")
|
||||||
|
logger.info(f" Max Tokens: {model.maxTokens}")
|
||||||
|
logger.info(f" Quality Rating: {model.qualityRating}/10")
|
||||||
|
logger.info(f" Speed Rating: {model.speedRating}/10")
|
||||||
|
logger.info(f" Cost: ${model.costPer1kTokensInput:.4f}/1k tokens")
|
||||||
|
logger.info(f" Capabilities: {', '.join(model.capabilities)}")
|
||||||
|
logger.info(f" Priority: {model.priority}")
|
||||||
|
logger.info(f" Processing Mode: {model.processingMode}")
|
||||||
|
logger.info(f" Operation Types: {', '.join(model.operationTypes)}")
|
||||||
|
|
||||||
|
|
||||||
# Global selector instance
|
# Global model selector instance
|
||||||
model_selector = ModelSelector()
|
modelSelector = ModelSelector()
|
||||||
|
|
@ -63,7 +63,7 @@ class AiAnthropic(BaseConnectorAi):
|
||||||
functionCall=self.callAiBasic,
|
functionCall=self.callAiBasic,
|
||||||
priority=PriorityEnum.QUALITY,
|
priority=PriorityEnum.QUALITY,
|
||||||
processingMode=ProcessingModeEnum.DETAILED,
|
processingMode=ProcessingModeEnum.DETAILED,
|
||||||
operationTypes=[OperationTypeEnum.PLAN, OperationTypeEnum.ANALYSE],
|
operationTypes=[OperationTypeEnum.GENERAL, OperationTypeEnum.PLAN, OperationTypeEnum.ANALYSE, OperationTypeEnum.GENERATE],
|
||||||
version="claude-3-5-sonnet-20241022",
|
version="claude-3-5-sonnet-20241022",
|
||||||
calculatePriceUsd=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.015 + (bytesReceived / 4 / 1000) * 0.075
|
calculatePriceUsd=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.015 + (bytesReceived / 4 / 1000) * 0.075
|
||||||
),
|
),
|
||||||
|
|
|
||||||
|
|
@ -34,7 +34,7 @@ class AiInternal(BaseConnectorAi):
|
||||||
functionCall=self.extractDocument,
|
functionCall=self.extractDocument,
|
||||||
priority=PriorityEnum.COST,
|
priority=PriorityEnum.COST,
|
||||||
processingMode=ProcessingModeEnum.BASIC,
|
processingMode=ProcessingModeEnum.BASIC,
|
||||||
operationTypes=[OperationTypeEnum.GENERAL],
|
operationTypes=[OperationTypeEnum.EXTRACT],
|
||||||
version="internal-extractor-v1",
|
version="internal-extractor-v1",
|
||||||
calculatePriceUsd=lambda processingTime, bytesSent, bytesReceived: 0.001 + (bytesSent + bytesReceived) / (1024 * 1024) * 0.01
|
calculatePriceUsd=lambda processingTime, bytesSent, bytesReceived: 0.001 + (bytesSent + bytesReceived) / (1024 * 1024) * 0.01
|
||||||
),
|
),
|
||||||
|
|
|
||||||
|
|
@ -65,7 +65,7 @@ class AiOpenai(BaseConnectorAi):
|
||||||
functionCall=self.callAiBasic,
|
functionCall=self.callAiBasic,
|
||||||
priority=PriorityEnum.BALANCED,
|
priority=PriorityEnum.BALANCED,
|
||||||
processingMode=ProcessingModeEnum.ADVANCED,
|
processingMode=ProcessingModeEnum.ADVANCED,
|
||||||
operationTypes=[OperationTypeEnum.GENERAL, OperationTypeEnum.ANALYSE],
|
operationTypes=[OperationTypeEnum.GENERAL, OperationTypeEnum.PLAN, OperationTypeEnum.ANALYSE, OperationTypeEnum.GENERATE],
|
||||||
version="gpt-4o",
|
version="gpt-4o",
|
||||||
calculatePriceUsd=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.03 + (bytesReceived / 4 / 1000) * 0.06
|
calculatePriceUsd=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.03 + (bytesReceived / 4 / 1000) * 0.06
|
||||||
),
|
),
|
||||||
|
|
@ -83,7 +83,7 @@ class AiOpenai(BaseConnectorAi):
|
||||||
functionCall=self.callAiBasic,
|
functionCall=self.callAiBasic,
|
||||||
priority=PriorityEnum.SPEED,
|
priority=PriorityEnum.SPEED,
|
||||||
processingMode=ProcessingModeEnum.BASIC,
|
processingMode=ProcessingModeEnum.BASIC,
|
||||||
operationTypes=[OperationTypeEnum.GENERAL],
|
operationTypes=[OperationTypeEnum.GENERAL, OperationTypeEnum.PLAN, OperationTypeEnum.GENERATE],
|
||||||
version="gpt-3.5-turbo",
|
version="gpt-3.5-turbo",
|
||||||
calculatePriceUsd=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.0015 + (bytesReceived / 4 / 1000) * 0.002
|
calculatePriceUsd=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.0015 + (bytesReceived / 4 / 1000) * 0.002
|
||||||
),
|
),
|
||||||
|
|
|
||||||
|
|
@ -63,7 +63,7 @@ class AiPerplexity(BaseConnectorAi):
|
||||||
functionCall=self.callAiBasic,
|
functionCall=self.callAiBasic,
|
||||||
priority=PriorityEnum.BALANCED,
|
priority=PriorityEnum.BALANCED,
|
||||||
processingMode=ProcessingModeEnum.ADVANCED,
|
processingMode=ProcessingModeEnum.ADVANCED,
|
||||||
operationTypes=[OperationTypeEnum.GENERAL, OperationTypeEnum.WEB_RESEARCH],
|
operationTypes=[OperationTypeEnum.GENERAL, OperationTypeEnum.PLAN, OperationTypeEnum.ANALYSE, OperationTypeEnum.GENERATE, OperationTypeEnum.WEB_RESEARCH],
|
||||||
version="llama-3.1-sonar-large-128k-online",
|
version="llama-3.1-sonar-large-128k-online",
|
||||||
calculatePriceUsd=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.005 + (bytesReceived / 4 / 1000) * 0.005
|
calculatePriceUsd=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.005 + (bytesReceived / 4 / 1000) * 0.005
|
||||||
),
|
),
|
||||||
|
|
|
||||||
|
|
@ -1,13 +1,17 @@
|
||||||
from typing import Optional, List, Dict, Any, Literal, Callable
|
from typing import Optional, List, Dict, Any, Literal, Callable, TYPE_CHECKING
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from modules.datamodels.datamodelExtraction import ContentPart
|
||||||
|
|
||||||
# Operation Types
|
# Operation Types
|
||||||
class OperationTypeEnum(str, Enum):
|
class OperationTypeEnum(str, Enum):
|
||||||
GENERAL = "general"
|
GENERAL = "general"
|
||||||
PLAN = "plan"
|
PLAN = "plan"
|
||||||
ANALYSE = "analyse"
|
ANALYSE = "analyse"
|
||||||
GENERATE = "generate"
|
GENERATE = "generate"
|
||||||
|
EXTRACT = "extract"
|
||||||
WEB_RESEARCH = "webResearch"
|
WEB_RESEARCH = "webResearch"
|
||||||
IMAGE_ANALYSE = "imageAnalyse"
|
IMAGE_ANALYSE = "imageAnalyse"
|
||||||
IMAGE_GENERATE = "imageGenerate"
|
IMAGE_GENERATE = "imageGenerate"
|
||||||
|
|
@ -141,6 +145,7 @@ class AiCallRequest(BaseModel):
|
||||||
prompt: str = Field(description="The user prompt")
|
prompt: str = Field(description="The user prompt")
|
||||||
context: Optional[str] = Field(default=None, description="Optional external context (e.g., extracted docs)")
|
context: Optional[str] = Field(default=None, description="Optional external context (e.g., extracted docs)")
|
||||||
options: AiCallOptions = Field(default_factory=AiCallOptions)
|
options: AiCallOptions = Field(default_factory=AiCallOptions)
|
||||||
|
contentParts: Optional[List['ContentPart']] = None # NEW: Content parts for model-aware chunking
|
||||||
|
|
||||||
|
|
||||||
class AiCallResponse(BaseModel):
|
class AiCallResponse(BaseModel):
|
||||||
|
|
|
||||||
|
|
@ -28,6 +28,16 @@ class ChunkResult(BaseModel):
|
||||||
metadata: Dict[str, Any] = Field(default_factory=dict)
|
metadata: Dict[str, Any] = Field(default_factory=dict)
|
||||||
|
|
||||||
|
|
||||||
|
class PartResult(BaseModel):
|
||||||
|
"""Preserves the relationship between a content part and its AI result."""
|
||||||
|
originalPart: ContentPart
|
||||||
|
aiResult: str
|
||||||
|
partIndex: int
|
||||||
|
documentId: str
|
||||||
|
processingTime: float = 0.0
|
||||||
|
metadata: Dict[str, Any] = Field(default_factory=dict)
|
||||||
|
|
||||||
|
|
||||||
class MergeStrategy(BaseModel):
|
class MergeStrategy(BaseModel):
|
||||||
"""Strategy configuration for merging content parts and AI results."""
|
"""Strategy configuration for merging content parts and AI results."""
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,7 @@ import time
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
from modules.aicore.aicoreModelRegistry import modelRegistry
|
from modules.aicore.aicoreModelRegistry import modelRegistry
|
||||||
from modules.aicore.aicoreModelSelector import model_selector
|
from modules.aicore.aicoreModelSelector import modelSelector
|
||||||
from modules.datamodels.datamodelAi import (
|
from modules.datamodels.datamodelAi import (
|
||||||
AiModel,
|
AiModel,
|
||||||
AiCallOptions,
|
AiCallOptions,
|
||||||
|
|
@ -70,7 +70,7 @@ class AiObjects:
|
||||||
raise ValueError("No AI models available")
|
raise ValueError("No AI models available")
|
||||||
|
|
||||||
# Use the dynamic model selector
|
# Use the dynamic model selector
|
||||||
selectedModel = model_selector.selectModel(prompt, context, options, availableModels)
|
selectedModel = modelSelector.selectModel(prompt, context, options, availableModels)
|
||||||
|
|
||||||
if not selectedModel:
|
if not selectedModel:
|
||||||
logger.error("No suitable model found for the given criteria")
|
logger.error("No suitable model found for the given criteria")
|
||||||
|
|
@ -81,8 +81,15 @@ class AiObjects:
|
||||||
|
|
||||||
|
|
||||||
async def call(self, request: AiCallRequest) -> AiCallResponse:
|
async def call(self, request: AiCallRequest) -> AiCallResponse:
|
||||||
"""Call AI model for text generation with fallback mechanism."""
|
"""Call AI model for text generation with model-aware chunking."""
|
||||||
|
# Handle content parts (unified path)
|
||||||
|
if hasattr(request, 'contentParts') and request.contentParts:
|
||||||
|
return await self._callWithContentParts(request)
|
||||||
|
# Handle traditional text/context calls
|
||||||
|
return await self._callWithTextContext(request)
|
||||||
|
|
||||||
|
async def _callWithTextContext(self, request: AiCallRequest) -> AiCallResponse:
|
||||||
|
"""Call AI model for traditional text/context calls with fallback mechanism."""
|
||||||
prompt = request.prompt
|
prompt = request.prompt
|
||||||
context = request.context or ""
|
context = request.context or ""
|
||||||
options = request.options
|
options = request.options
|
||||||
|
|
@ -108,11 +115,11 @@ class AiObjects:
|
||||||
temperature = 0.2
|
temperature = 0.2
|
||||||
maxTokens = getattr(options, "maxTokens", None)
|
maxTokens = getattr(options, "maxTokens", None)
|
||||||
|
|
||||||
# Get fallback models for this operation type
|
# Get failover models for this operation type
|
||||||
availableModels = modelRegistry.getAvailableModels()
|
availableModels = modelRegistry.getAvailableModels()
|
||||||
fallbackModels = model_selector.getFallbackModels(prompt, context, options, availableModels)
|
failoverModelList = modelSelector.getFailoverModelList(prompt, context, options, availableModels)
|
||||||
|
|
||||||
if not fallbackModels:
|
if not failoverModelList:
|
||||||
errorMsg = f"No suitable models found for operation {options.operationType}"
|
errorMsg = f"No suitable models found for operation {options.operationType}"
|
||||||
logger.error(errorMsg)
|
logger.error(errorMsg)
|
||||||
return AiCallResponse(
|
return AiCallResponse(
|
||||||
|
|
@ -125,11 +132,11 @@ class AiObjects:
|
||||||
errorCount=1
|
errorCount=1
|
||||||
)
|
)
|
||||||
|
|
||||||
# Try each model in fallback sequence
|
# Try each model in failover sequence
|
||||||
lastError = None
|
lastError = None
|
||||||
for attempt, model in enumerate(fallbackModels):
|
for attempt, model in enumerate(failoverModelList):
|
||||||
try:
|
try:
|
||||||
logger.info(f"Attempting AI call with model: {model.name} (attempt {attempt + 1}/{len(fallbackModels)})")
|
logger.info(f"Attempting AI call with model: {model.name} (attempt {attempt + 1}/{len(failoverModelList)})")
|
||||||
|
|
||||||
# Call the model
|
# Call the model
|
||||||
response = await self._callWithModel(model, prompt, context, temperature, maxTokens, inputBytes)
|
response = await self._callWithModel(model, prompt, context, temperature, maxTokens, inputBytes)
|
||||||
|
|
@ -142,15 +149,15 @@ class AiObjects:
|
||||||
logger.warning(f"❌ AI call failed with model {model.name}: {str(e)}")
|
logger.warning(f"❌ AI call failed with model {model.name}: {str(e)}")
|
||||||
|
|
||||||
# If this is not the last model, try the next one
|
# If this is not the last model, try the next one
|
||||||
if attempt < len(fallbackModels) - 1:
|
if attempt < len(failoverModelList) - 1:
|
||||||
logger.info(f"🔄 Trying next fallback model...")
|
logger.info(f"🔄 Trying next failover model...")
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
# All models failed
|
# All models failed
|
||||||
logger.error(f"💥 All {len(fallbackModels)} models failed for operation {options.operationType}")
|
logger.error(f"💥 All {len(failoverModelList)} models failed for operation {options.operationType}")
|
||||||
break
|
break
|
||||||
|
|
||||||
# All fallback attempts failed - return error response
|
# All failover attempts failed - return error response
|
||||||
errorMsg = f"All AI models failed for operation {options.operationType}. Last error: {str(lastError)}"
|
errorMsg = f"All AI models failed for operation {options.operationType}. Last error: {str(lastError)}"
|
||||||
logger.error(errorMsg)
|
logger.error(errorMsg)
|
||||||
return AiCallResponse(
|
return AiCallResponse(
|
||||||
|
|
@ -163,6 +170,241 @@ class AiObjects:
|
||||||
errorCount=1
|
errorCount=1
|
||||||
)
|
)
|
||||||
|
|
||||||
|
async def _callWithContentParts(self, request: AiCallRequest) -> AiCallResponse:
|
||||||
|
"""Process content parts with model-aware chunking (unified for single and multiple parts)."""
|
||||||
|
prompt = request.prompt
|
||||||
|
options = request.options
|
||||||
|
contentParts = request.contentParts
|
||||||
|
|
||||||
|
# Get failover models
|
||||||
|
availableModels = modelRegistry.getAvailableModels()
|
||||||
|
failoverModelList = modelSelector.getFailoverModelList(prompt, "", options, availableModels)
|
||||||
|
|
||||||
|
if not failoverModelList:
|
||||||
|
return self._createErrorResponse("No suitable models found", 0, 0)
|
||||||
|
|
||||||
|
# Process each content part
|
||||||
|
allResults = []
|
||||||
|
for contentPart in contentParts:
|
||||||
|
partResult = await self._processContentPartWithFallback(contentPart, prompt, options, failoverModelList)
|
||||||
|
allResults.append(partResult)
|
||||||
|
|
||||||
|
# Merge all results
|
||||||
|
mergedContent = self._mergePartResults(allResults)
|
||||||
|
|
||||||
|
return AiCallResponse(
|
||||||
|
content=mergedContent,
|
||||||
|
modelName="multiple",
|
||||||
|
priceUsd=sum(r.priceUsd for r in allResults),
|
||||||
|
processingTime=sum(r.processingTime for r in allResults),
|
||||||
|
bytesSent=sum(r.bytesSent for r in allResults),
|
||||||
|
bytesReceived=sum(r.bytesReceived for r in allResults),
|
||||||
|
errorCount=sum(r.errorCount for r in allResults)
|
||||||
|
)
|
||||||
|
|
||||||
|
async def _processContentPartWithFallback(self, contentPart, prompt: str, options, failoverModelList) -> AiCallResponse:
|
||||||
|
"""Process a single content part with model-aware chunking and fallback."""
|
||||||
|
lastError = None
|
||||||
|
|
||||||
|
for attempt, model in enumerate(failoverModelList):
|
||||||
|
try:
|
||||||
|
logger.info(f"Processing content part with model: {model.name} (attempt {attempt + 1}/{len(failoverModelList)})")
|
||||||
|
|
||||||
|
# Check if part fits in model context
|
||||||
|
partSize = len(contentPart.data.encode('utf-8')) if contentPart.data else 0
|
||||||
|
modelContextBytes = model.contextLength * 4 # Convert tokens to bytes
|
||||||
|
|
||||||
|
if partSize <= modelContextBytes:
|
||||||
|
# Part fits - call AI directly
|
||||||
|
response = await self._callWithModel(model, prompt, contentPart.data, 0.2, None, partSize)
|
||||||
|
logger.info(f"✅ Content part processed successfully with model: {model.name}")
|
||||||
|
return response
|
||||||
|
else:
|
||||||
|
# Part too large - chunk it
|
||||||
|
chunks = await self._chunkContentPart(contentPart, model, options)
|
||||||
|
if not chunks:
|
||||||
|
raise ValueError(f"Failed to chunk content part for model {model.name}")
|
||||||
|
|
||||||
|
# Process each chunk
|
||||||
|
chunkResults = []
|
||||||
|
for chunk in chunks:
|
||||||
|
chunkResponse = await self._callWithModel(model, prompt, chunk['data'], 0.2, None, chunk['size'])
|
||||||
|
chunkResults.append(chunkResponse)
|
||||||
|
|
||||||
|
# Merge chunk results
|
||||||
|
mergedContent = self._mergeChunkResults(chunkResults)
|
||||||
|
totalPrice = sum(r.priceUsd for r in chunkResults)
|
||||||
|
totalTime = sum(r.processingTime for r in chunkResults)
|
||||||
|
totalBytesSent = sum(r.bytesSent for r in chunkResults)
|
||||||
|
totalBytesReceived = sum(r.bytesReceived for r in chunkResults)
|
||||||
|
totalErrors = sum(r.errorCount for r in chunkResults)
|
||||||
|
|
||||||
|
logger.info(f"✅ Content part chunked and processed with model: {model.name} ({len(chunks)} chunks)")
|
||||||
|
return AiCallResponse(
|
||||||
|
content=mergedContent,
|
||||||
|
modelName=model.name,
|
||||||
|
priceUsd=totalPrice,
|
||||||
|
processingTime=totalTime,
|
||||||
|
bytesSent=totalBytesSent,
|
||||||
|
bytesReceived=totalBytesReceived,
|
||||||
|
errorCount=totalErrors
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
lastError = e
|
||||||
|
logger.warning(f"❌ Model {model.name} failed for content part: {str(e)}")
|
||||||
|
|
||||||
|
if attempt < len(failoverModelList) - 1:
|
||||||
|
logger.info(f"🔄 Trying next failover model...")
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
logger.error(f"💥 All {len(failoverModelList)} models failed for content part")
|
||||||
|
break
|
||||||
|
|
||||||
|
# All models failed
|
||||||
|
return self._createErrorResponse(f"All models failed: {str(lastError)}", 0, 0)
|
||||||
|
|
||||||
|
async def _chunkContentPart(self, contentPart, model, options) -> List[Dict[str, Any]]:
|
||||||
|
"""Chunk a content part based on model capabilities."""
|
||||||
|
# Calculate model-specific chunk sizes
|
||||||
|
modelContextBytes = model.contextLength * 4 # Convert tokens to bytes
|
||||||
|
maxContextBytes = int(modelContextBytes * 0.9) # 90% of context length
|
||||||
|
textChunkSize = int(maxContextBytes * 0.7) # 70% of max context for text chunks
|
||||||
|
imageChunkSize = int(maxContextBytes * 0.8) # 80% of max context for image chunks
|
||||||
|
|
||||||
|
# Build chunking options
|
||||||
|
chunkingOptions = {
|
||||||
|
"textChunkSize": textChunkSize,
|
||||||
|
"imageChunkSize": imageChunkSize,
|
||||||
|
"maxSize": maxContextBytes,
|
||||||
|
"chunkAllowed": True
|
||||||
|
}
|
||||||
|
|
||||||
|
# Get appropriate chunker
|
||||||
|
from modules.services.serviceExtraction.subRegistry import ChunkerRegistry
|
||||||
|
chunkerRegistry = ChunkerRegistry()
|
||||||
|
chunker = chunkerRegistry.resolve(contentPart.typeGroup)
|
||||||
|
|
||||||
|
if not chunker:
|
||||||
|
logger.warning(f"No chunker found for typeGroup: {contentPart.typeGroup}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
# Chunk the content part
|
||||||
|
try:
|
||||||
|
chunks = chunker.chunk(contentPart, chunkingOptions)
|
||||||
|
logger.debug(f"Created {len(chunks)} chunks for {contentPart.typeGroup} part")
|
||||||
|
return chunks
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Chunking failed for {contentPart.typeGroup}: {str(e)}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
def _mergePartResults(self, partResults: List[AiCallResponse]) -> str:
|
||||||
|
"""Merge part results using the existing sophisticated merging system."""
|
||||||
|
if not partResults:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
# Convert AiCallResponse results to ContentParts for merging
|
||||||
|
from modules.datamodels.datamodelExtraction import ContentPart
|
||||||
|
from modules.services.serviceExtraction.subUtils import makeId
|
||||||
|
|
||||||
|
content_parts = []
|
||||||
|
for i, result in enumerate(partResults):
|
||||||
|
if result.content:
|
||||||
|
content_part = ContentPart(
|
||||||
|
id=makeId(),
|
||||||
|
parentId=None,
|
||||||
|
label=f"ai_result_{i}",
|
||||||
|
typeGroup="text", # Default to text for AI results
|
||||||
|
mimeType="text/plain",
|
||||||
|
data=result.content,
|
||||||
|
metadata={
|
||||||
|
"aiResult": True,
|
||||||
|
"modelName": result.modelName,
|
||||||
|
"priceUsd": result.priceUsd,
|
||||||
|
"processingTime": result.processingTime,
|
||||||
|
"bytesSent": result.bytesSent,
|
||||||
|
"bytesReceived": result.bytesReceived
|
||||||
|
}
|
||||||
|
)
|
||||||
|
content_parts.append(content_part)
|
||||||
|
|
||||||
|
# Use existing merging system
|
||||||
|
merge_strategy = {
|
||||||
|
"useIntelligentMerging": True,
|
||||||
|
"groupBy": "typeGroup",
|
||||||
|
"orderBy": "id",
|
||||||
|
"mergeType": "concatenate"
|
||||||
|
}
|
||||||
|
|
||||||
|
from modules.services.serviceExtraction.subPipeline import _applyMerging
|
||||||
|
merged_parts = _applyMerging(content_parts, merge_strategy)
|
||||||
|
|
||||||
|
# Convert merged parts back to final string
|
||||||
|
final_content = "\n\n".join([part.data for part in merged_parts])
|
||||||
|
|
||||||
|
logger.info(f"Merged {len(partResults)} AI results using existing merging system")
|
||||||
|
return final_content.strip()
|
||||||
|
|
||||||
|
def _mergeChunkResults(self, chunkResults: List[AiCallResponse]) -> str:
|
||||||
|
"""Merge chunk results using the existing sophisticated merging system."""
|
||||||
|
if not chunkResults:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
# Convert AiCallResponse results to ContentParts for merging
|
||||||
|
from modules.datamodels.datamodelExtraction import ContentPart
|
||||||
|
from modules.services.serviceExtraction.subUtils import makeId
|
||||||
|
|
||||||
|
content_parts = []
|
||||||
|
for i, result in enumerate(chunkResults):
|
||||||
|
if result.content:
|
||||||
|
content_part = ContentPart(
|
||||||
|
id=makeId(),
|
||||||
|
parentId=None,
|
||||||
|
label=f"chunk_result_{i}",
|
||||||
|
typeGroup="text", # Default to text for AI results
|
||||||
|
mimeType="text/plain",
|
||||||
|
data=result.content,
|
||||||
|
metadata={
|
||||||
|
"aiResult": True,
|
||||||
|
"chunk": True,
|
||||||
|
"modelName": result.modelName,
|
||||||
|
"priceUsd": result.priceUsd,
|
||||||
|
"processingTime": result.processingTime,
|
||||||
|
"bytesSent": result.bytesSent,
|
||||||
|
"bytesReceived": result.bytesReceived
|
||||||
|
}
|
||||||
|
)
|
||||||
|
content_parts.append(content_part)
|
||||||
|
|
||||||
|
# Use existing merging system
|
||||||
|
merge_strategy = {
|
||||||
|
"useIntelligentMerging": True,
|
||||||
|
"groupBy": "typeGroup",
|
||||||
|
"orderBy": "id",
|
||||||
|
"mergeType": "concatenate"
|
||||||
|
}
|
||||||
|
|
||||||
|
from modules.services.serviceExtraction.subPipeline import _applyMerging
|
||||||
|
merged_parts = _applyMerging(content_parts, merge_strategy)
|
||||||
|
|
||||||
|
# Convert merged parts back to final string
|
||||||
|
final_content = "\n\n".join([part.data for part in merged_parts])
|
||||||
|
|
||||||
|
logger.info(f"Merged {len(chunkResults)} chunk results using existing merging system")
|
||||||
|
return final_content.strip()
|
||||||
|
|
||||||
|
def _createErrorResponse(self, errorMsg: str, inputBytes: int, outputBytes: int) -> AiCallResponse:
|
||||||
|
"""Create an error response."""
|
||||||
|
return AiCallResponse(
|
||||||
|
content=errorMsg,
|
||||||
|
modelName="error",
|
||||||
|
priceUsd=0.0,
|
||||||
|
processingTime=0.0,
|
||||||
|
bytesSent=inputBytes,
|
||||||
|
bytesReceived=outputBytes,
|
||||||
|
errorCount=1
|
||||||
|
)
|
||||||
|
|
||||||
async def _callWithModel(self, model: AiModel, prompt: str, context: str, temperature: float, maxTokens: int, inputBytes: int) -> AiCallResponse:
|
async def _callWithModel(self, model: AiModel, prompt: str, context: str, temperature: float, maxTokens: int, inputBytes: int) -> AiCallResponse:
|
||||||
"""Call a specific model and return the response."""
|
"""Call a specific model and return the response."""
|
||||||
# Replace <TOKEN_LIMIT> placeholder in prompt for this specific model
|
# Replace <TOKEN_LIMIT> placeholder in prompt for this specific model
|
||||||
|
|
@ -245,9 +487,9 @@ class AiObjects:
|
||||||
|
|
||||||
# Get fallback models for image analysis
|
# Get fallback models for image analysis
|
||||||
availableModels = modelRegistry.getAvailableModels()
|
availableModels = modelRegistry.getAvailableModels()
|
||||||
fallbackModels = model_selector.getFallbackModels(prompt, "", options, availableModels)
|
failoverModelList = modelSelector.getFailoverModelList(prompt, "", options, availableModels)
|
||||||
|
|
||||||
if not fallbackModels:
|
if not failoverModelList:
|
||||||
errorMsg = f"No suitable models found for image analysis"
|
errorMsg = f"No suitable models found for image analysis"
|
||||||
logger.error(errorMsg)
|
logger.error(errorMsg)
|
||||||
return AiCallResponse(
|
return AiCallResponse(
|
||||||
|
|
@ -262,9 +504,9 @@ class AiObjects:
|
||||||
|
|
||||||
# Try each model in fallback sequence
|
# Try each model in fallback sequence
|
||||||
lastError = None
|
lastError = None
|
||||||
for attempt, model in enumerate(fallbackModels):
|
for attempt, model in enumerate(failoverModelList):
|
||||||
try:
|
try:
|
||||||
logger.info(f"Attempting image analysis with model: {model.name} (attempt {attempt + 1}/{len(fallbackModels)})")
|
logger.info(f"Attempting image analysis with model: {model.name} (attempt {attempt + 1}/{len(failoverModelList)})")
|
||||||
|
|
||||||
# Call the model
|
# Call the model
|
||||||
response = await self._callImageWithModel(model, prompt, imageData, mimeType, inputBytes)
|
response = await self._callImageWithModel(model, prompt, imageData, mimeType, inputBytes)
|
||||||
|
|
@ -277,12 +519,12 @@ class AiObjects:
|
||||||
logger.warning(f"❌ Image analysis failed with model {model.name}: {str(e)}")
|
logger.warning(f"❌ Image analysis failed with model {model.name}: {str(e)}")
|
||||||
|
|
||||||
# If this is not the last model, try the next one
|
# If this is not the last model, try the next one
|
||||||
if attempt < len(fallbackModels) - 1:
|
if attempt < len(failoverModelList) - 1:
|
||||||
logger.info(f"🔄 Trying next fallback model for image analysis...")
|
logger.info(f"🔄 Trying next fallback model for image analysis...")
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
# All models failed
|
# All models failed
|
||||||
logger.error(f"💥 All {len(fallbackModels)} models failed for image analysis")
|
logger.error(f"💥 All {len(failoverModelList)} models failed for image analysis")
|
||||||
break
|
break
|
||||||
|
|
||||||
# All fallback attempts failed - return error response
|
# All fallback attempts failed - return error response
|
||||||
|
|
|
||||||
|
|
@ -54,8 +54,8 @@ class SubDocumentProcessing:
|
||||||
options: Optional[AiCallOptions] = None
|
options: Optional[AiCallOptions] = None
|
||||||
) -> str:
|
) -> str:
|
||||||
"""
|
"""
|
||||||
Process documents with per-chunk AI calls and merge results.
|
Process documents with model-aware chunking and merge results.
|
||||||
FIXED: Now preserves chunk relationships and document structure.
|
NEW: Uses model-aware chunking in AI call phase instead of extraction phase.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
documents: List of ChatDocument objects to process
|
documents: List of ChatDocument objects to process
|
||||||
|
|
@ -68,23 +68,14 @@ class SubDocumentProcessing:
|
||||||
if not documents:
|
if not documents:
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
# Get model capabilities for size calculation
|
# Build extraction options WITHOUT chunking parameters
|
||||||
model_capabilities = self._getModelCapabilitiesForContent(prompt, documents, options)
|
|
||||||
|
|
||||||
# Build extraction options for chunking with intelligent merging
|
|
||||||
extractionOptions: Dict[str, Any] = {
|
extractionOptions: Dict[str, Any] = {
|
||||||
"prompt": prompt,
|
"prompt": prompt,
|
||||||
"operationType": options.operationType if options else "general",
|
"operationType": options.operationType if options else "general",
|
||||||
"processDocumentsIndividually": True, # Process each document separately
|
"processDocumentsIndividually": True,
|
||||||
"maxSize": model_capabilities["maxContextBytes"],
|
# REMOVED: maxSize, textChunkSize, imageChunkSize
|
||||||
"chunkAllowed": True,
|
|
||||||
"textChunkSize": model_capabilities["textChunkSize"],
|
|
||||||
"imageChunkSize": model_capabilities["imageChunkSize"],
|
|
||||||
"imageMaxPixels": 1024 * 1024,
|
|
||||||
"imageQuality": 85,
|
|
||||||
"mergeStrategy": {
|
"mergeStrategy": {
|
||||||
"useIntelligentMerging": True, # Enable intelligent token-aware merging
|
"useIntelligentMerging": True,
|
||||||
"capabilities": model_capabilities,
|
|
||||||
"prompt": prompt,
|
"prompt": prompt,
|
||||||
"groupBy": "typeGroup",
|
"groupBy": "typeGroup",
|
||||||
"orderBy": "id",
|
"orderBy": "id",
|
||||||
|
|
@ -95,17 +86,17 @@ class SubDocumentProcessing:
|
||||||
logger.debug(f"Per-chunk extraction options: prompt length={len(extractionOptions.get('prompt', ''))} chars, operationType={extractionOptions.get('operationType')}")
|
logger.debug(f"Per-chunk extraction options: prompt length={len(extractionOptions.get('prompt', ''))} chars, operationType={extractionOptions.get('operationType')}")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Extract content with chunking
|
# Extract content WITHOUT chunking
|
||||||
extractionResult = self.extractionService.extractContent(documents, extractionOptions)
|
extractionResult = self.extractionService.extractContent(documents, extractionOptions)
|
||||||
|
|
||||||
if not isinstance(extractionResult, list):
|
if not isinstance(extractionResult, list):
|
||||||
return "[Error: No extraction results]"
|
return "[Error: No extraction results]"
|
||||||
|
|
||||||
# FIXED: Process chunks with proper mapping
|
# Process parts (not chunks) with model-aware AI calls
|
||||||
chunkResults = await self._processChunksWithMapping(extractionResult, prompt, options)
|
partResults = await self._processPartsWithMapping(extractionResult, prompt, options)
|
||||||
|
|
||||||
# FIXED: Merge with preserved chunk relationships
|
# Merge results using existing merging system
|
||||||
mergedContent = self._mergeChunkResults(chunkResults, options)
|
mergedContent = self._mergePartResults(partResults, options)
|
||||||
|
|
||||||
# Save merged extraction content to debug
|
# Save merged extraction content to debug
|
||||||
self.services.utils.writeDebugFile(mergedContent or '', "extractionMergedText")
|
self.services.utils.writeDebugFile(mergedContent or '', "extractionMergedText")
|
||||||
|
|
@ -123,29 +114,19 @@ class SubDocumentProcessing:
|
||||||
options: Optional[AiCallOptions] = None
|
options: Optional[AiCallOptions] = None
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Process documents with per-chunk AI calls and merge results in JSON mode.
|
Process documents with model-aware chunking and merge results in JSON mode.
|
||||||
Returns structured JSON document instead of text.
|
Returns structured JSON document instead of text.
|
||||||
"""
|
"""
|
||||||
if not documents:
|
if not documents:
|
||||||
return {"metadata": {"title": "Empty Document"}, "sections": []}
|
return {"metadata": {"title": "Empty Document"}, "sections": []}
|
||||||
|
|
||||||
# Get model capabilities for size calculation
|
# Build extraction options WITHOUT chunking parameters
|
||||||
model_capabilities = self._getModelCapabilitiesForContent(prompt, documents, options)
|
|
||||||
|
|
||||||
# Build extraction options for chunking with intelligent merging
|
|
||||||
extractionOptions: Dict[str, Any] = {
|
extractionOptions: Dict[str, Any] = {
|
||||||
"prompt": prompt,
|
"prompt": prompt,
|
||||||
"operationType": options.operationType if options else "general",
|
"operationType": options.operationType if options else "general",
|
||||||
"processDocumentsIndividually": True, # Process each document separately
|
"processDocumentsIndividually": True,
|
||||||
"maxSize": model_capabilities["maxContextBytes"],
|
|
||||||
"chunkAllowed": True,
|
|
||||||
"textChunkSize": model_capabilities["textChunkSize"],
|
|
||||||
"imageChunkSize": model_capabilities["imageChunkSize"],
|
|
||||||
"imageMaxPixels": 1024 * 1024,
|
|
||||||
"imageQuality": 85,
|
|
||||||
"mergeStrategy": {
|
"mergeStrategy": {
|
||||||
"useIntelligentMerging": True, # Enable intelligent token-aware merging
|
"useIntelligentMerging": True,
|
||||||
"capabilities": model_capabilities,
|
|
||||||
"prompt": prompt,
|
"prompt": prompt,
|
||||||
"groupBy": "typeGroup",
|
"groupBy": "typeGroup",
|
||||||
"orderBy": "id",
|
"orderBy": "id",
|
||||||
|
|
@ -156,17 +137,17 @@ class SubDocumentProcessing:
|
||||||
logger.debug(f"Per-chunk extraction options (JSON mode): prompt length={len(extractionOptions.get('prompt', ''))} chars, operationType={extractionOptions.get('operationType')}")
|
logger.debug(f"Per-chunk extraction options (JSON mode): prompt length={len(extractionOptions.get('prompt', ''))} chars, operationType={extractionOptions.get('operationType')}")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Extract content with chunking
|
# Extract content WITHOUT chunking
|
||||||
extractionResult = self.extractionService.extractContent(documents, extractionOptions)
|
extractionResult = self.extractionService.extractContent(documents, extractionOptions)
|
||||||
|
|
||||||
if not isinstance(extractionResult, list):
|
if not isinstance(extractionResult, list):
|
||||||
return {"metadata": {"title": "Error Document"}, "sections": []}
|
return {"metadata": {"title": "Error Document"}, "sections": []}
|
||||||
|
|
||||||
# Process chunks with proper mapping
|
# Process parts with model-aware chunking
|
||||||
chunkResults = await self._processChunksWithMapping(extractionResult, prompt, options, generate_json=True)
|
partResults = await self._processPartsWithMapping(extractionResult, prompt, options)
|
||||||
|
|
||||||
# Merge with JSON mode
|
# Convert to JSON format (simplified for now)
|
||||||
mergedJsonDocument = self._mergeChunkResultsJson(chunkResults, options)
|
mergedJsonDocument = self._convertPartResultsToJson(partResults, options)
|
||||||
|
|
||||||
# Normalize merged JSON into a single canonical table (only if table content exists)
|
# Normalize merged JSON into a single canonical table (only if table content exists)
|
||||||
try:
|
try:
|
||||||
|
|
@ -505,6 +486,127 @@ CONTINUATION INSTRUCTIONS:
|
||||||
"""
|
"""
|
||||||
return await self.processDocumentsPerChunk(documents, prompt, options)
|
return await self.processDocumentsPerChunk(documents, prompt, options)
|
||||||
|
|
||||||
|
async def _processPartsWithMapping(
|
||||||
|
self,
|
||||||
|
extractionResult: List[ContentExtracted],
|
||||||
|
prompt: str,
|
||||||
|
options: Optional[AiCallOptions] = None
|
||||||
|
) -> List['PartResult']:
|
||||||
|
"""Process content parts with model-aware chunking and proper mapping."""
|
||||||
|
from modules.datamodels.datamodelExtraction import PartResult
|
||||||
|
import asyncio
|
||||||
|
|
||||||
|
# Collect all parts that need processing
|
||||||
|
parts_to_process = []
|
||||||
|
part_index = 0
|
||||||
|
|
||||||
|
for ec in extractionResult:
|
||||||
|
for part in ec.parts:
|
||||||
|
if part.typeGroup in ("text", "table", "structure", "image", "container", "binary"):
|
||||||
|
# Skip empty container parts
|
||||||
|
if part.typeGroup == "container" and (not part.data or len(part.data.strip()) == 0):
|
||||||
|
logger.debug(f"Skipping empty container part: mimeType={part.mimeType}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
parts_to_process.append({
|
||||||
|
'part': part,
|
||||||
|
'part_index': part_index,
|
||||||
|
'document_id': ec.id
|
||||||
|
})
|
||||||
|
part_index += 1
|
||||||
|
|
||||||
|
logger.info(f"Processing {len(parts_to_process)} parts with model-aware chunking")
|
||||||
|
|
||||||
|
# Process parts in parallel
|
||||||
|
async def process_single_part(part_info: Dict) -> PartResult:
|
||||||
|
part = part_info['part']
|
||||||
|
part_index = part_info['part_index']
|
||||||
|
document_id = part_info['document_id']
|
||||||
|
|
||||||
|
start_time = time.time()
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Create AI call request with content part
|
||||||
|
from modules.datamodels.datamodelAi import AiCallRequest
|
||||||
|
request = AiCallRequest(
|
||||||
|
prompt=prompt,
|
||||||
|
context="", # Context is in the content part
|
||||||
|
options=options,
|
||||||
|
contentParts=[part] # Pass as list for unified processing
|
||||||
|
)
|
||||||
|
|
||||||
|
# Call AI with model-aware chunking
|
||||||
|
response = await self.aiObjects.call(request)
|
||||||
|
|
||||||
|
processing_time = time.time() - start_time
|
||||||
|
|
||||||
|
return PartResult(
|
||||||
|
originalPart=part,
|
||||||
|
aiResult=response.content,
|
||||||
|
partIndex=part_index,
|
||||||
|
documentId=document_id,
|
||||||
|
processingTime=processing_time,
|
||||||
|
metadata={
|
||||||
|
"success": True,
|
||||||
|
"partSize": len(part.data) if part.data else 0,
|
||||||
|
"resultSize": len(response.content),
|
||||||
|
"typeGroup": part.typeGroup,
|
||||||
|
"modelName": response.modelName,
|
||||||
|
"priceUsd": response.priceUsd
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
processing_time = time.time() - start_time
|
||||||
|
logger.warning(f"Error processing part {part_index}: {str(e)}")
|
||||||
|
|
||||||
|
return PartResult(
|
||||||
|
originalPart=part,
|
||||||
|
aiResult=f"[Error processing part: {str(e)}]",
|
||||||
|
partIndex=part_index,
|
||||||
|
documentId=document_id,
|
||||||
|
processingTime=processing_time,
|
||||||
|
metadata={
|
||||||
|
"success": False,
|
||||||
|
"error": str(e),
|
||||||
|
"partSize": len(part.data) if part.data else 0,
|
||||||
|
"typeGroup": part.typeGroup
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
# Process parts with concurrency control
|
||||||
|
max_concurrent = 5
|
||||||
|
if options and hasattr(options, 'maxConcurrentParts'):
|
||||||
|
max_concurrent = options.maxConcurrentParts
|
||||||
|
|
||||||
|
semaphore = asyncio.Semaphore(max_concurrent)
|
||||||
|
|
||||||
|
async def process_with_semaphore(part_info):
|
||||||
|
async with semaphore:
|
||||||
|
return await process_single_part(part_info)
|
||||||
|
|
||||||
|
tasks = [process_with_semaphore(part_info) for part_info in parts_to_process]
|
||||||
|
part_results = await asyncio.gather(*tasks, return_exceptions=True)
|
||||||
|
|
||||||
|
# Handle exceptions
|
||||||
|
processed_results = []
|
||||||
|
for i, result in enumerate(part_results):
|
||||||
|
if isinstance(result, Exception):
|
||||||
|
part_info = parts_to_process[i]
|
||||||
|
processed_results.append(PartResult(
|
||||||
|
originalPart=part_info['part'],
|
||||||
|
aiResult=f"[Error in parallel processing: {str(result)}]",
|
||||||
|
partIndex=part_info['part_index'],
|
||||||
|
documentId=part_info['document_id'],
|
||||||
|
processingTime=0.0,
|
||||||
|
metadata={"success": False, "error": str(result)}
|
||||||
|
))
|
||||||
|
elif result is not None:
|
||||||
|
processed_results.append(result)
|
||||||
|
|
||||||
|
logger.info(f"Completed processing {len(processed_results)} parts")
|
||||||
|
return processed_results
|
||||||
|
|
||||||
async def _processChunksWithMapping(
|
async def _processChunksWithMapping(
|
||||||
self,
|
self,
|
||||||
extractionResult: List[ContentExtracted],
|
extractionResult: List[ContentExtracted],
|
||||||
|
|
@ -907,340 +1009,451 @@ CONTINUATION INSTRUCTIONS:
|
||||||
logger.info(f"Completed processing {len(processed_results)} chunks")
|
logger.info(f"Completed processing {len(processed_results)} chunks")
|
||||||
return processed_results
|
return processed_results
|
||||||
|
|
||||||
|
def _mergePartResults(
|
||||||
|
self,
|
||||||
|
partResults: List['PartResult'],
|
||||||
|
options: Optional[AiCallOptions] = None
|
||||||
|
) -> str:
|
||||||
|
"""Merge part results using existing sophisticated merging system."""
|
||||||
|
if not partResults:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
# Convert PartResults back to ContentParts for existing merger system
|
||||||
|
from modules.datamodels.datamodelExtraction import ContentPart
|
||||||
|
content_parts = []
|
||||||
|
for part_result in partResults:
|
||||||
|
# Create ContentPart from PartResult with proper typeGroup
|
||||||
|
content_part = ContentPart(
|
||||||
|
id=part_result.originalPart.id,
|
||||||
|
parentId=part_result.originalPart.parentId,
|
||||||
|
label=part_result.originalPart.label,
|
||||||
|
typeGroup=part_result.originalPart.typeGroup, # Use original typeGroup
|
||||||
|
mimeType=part_result.originalPart.mimeType,
|
||||||
|
data=part_result.aiResult, # Use AI result as data
|
||||||
|
metadata={
|
||||||
|
**part_result.originalPart.metadata,
|
||||||
|
"aiResult": True,
|
||||||
|
"partIndex": part_result.partIndex,
|
||||||
|
"documentId": part_result.documentId,
|
||||||
|
"processingTime": part_result.processingTime,
|
||||||
|
"success": part_result.metadata.get("success", False)
|
||||||
|
}
|
||||||
|
)
|
||||||
|
content_parts.append(content_part)
|
||||||
|
|
||||||
|
# Use existing merging strategy from options
|
||||||
|
merge_strategy = {
|
||||||
|
"useIntelligentMerging": True,
|
||||||
|
"groupBy": "documentId", # Group by document
|
||||||
|
"orderBy": "partIndex", # Order by part index
|
||||||
|
"mergeType": "concatenate"
|
||||||
|
}
|
||||||
|
|
||||||
|
if options and hasattr(options, 'mergeStrategy'):
|
||||||
|
merge_strategy.update(options.mergeStrategy)
|
||||||
|
|
||||||
|
# Apply existing merging logic using the sophisticated merging system
|
||||||
|
from modules.services.serviceExtraction.subPipeline import _applyMerging
|
||||||
|
merged_parts = _applyMerging(content_parts, merge_strategy)
|
||||||
|
|
||||||
|
# Convert merged parts back to final string
|
||||||
|
final_content = "\n\n".join([part.data for part in merged_parts])
|
||||||
|
|
||||||
|
logger.info(f"Merged {len(partResults)} parts using existing sophisticated merging system")
|
||||||
|
return final_content.strip()
|
||||||
|
|
||||||
|
def _convertPartResultsToJson(
|
||||||
|
self,
|
||||||
|
partResults: List['PartResult'],
|
||||||
|
options: Optional[AiCallOptions] = None
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""Convert part results to JSON format using existing sophisticated merging system."""
|
||||||
|
if not partResults:
|
||||||
|
return {"metadata": {"title": "Empty Document"}, "sections": []}
|
||||||
|
|
||||||
|
# Convert PartResults back to ContentParts for existing merger system
|
||||||
|
from modules.datamodels.datamodelExtraction import ContentPart
|
||||||
|
content_parts = []
|
||||||
|
for part_result in partResults:
|
||||||
|
# Create ContentPart from PartResult with proper typeGroup
|
||||||
|
content_part = ContentPart(
|
||||||
|
id=part_result.originalPart.id,
|
||||||
|
parentId=part_result.originalPart.parentId,
|
||||||
|
label=part_result.originalPart.label,
|
||||||
|
typeGroup=part_result.originalPart.typeGroup, # Use original typeGroup
|
||||||
|
mimeType=part_result.originalPart.mimeType,
|
||||||
|
data=part_result.aiResult, # Use AI result as data
|
||||||
|
metadata={
|
||||||
|
**part_result.originalPart.metadata,
|
||||||
|
"aiResult": True,
|
||||||
|
"partIndex": part_result.partIndex,
|
||||||
|
"documentId": part_result.documentId,
|
||||||
|
"processingTime": part_result.processingTime,
|
||||||
|
"success": part_result.metadata.get("success", False)
|
||||||
|
}
|
||||||
|
)
|
||||||
|
content_parts.append(content_part)
|
||||||
|
|
||||||
|
# Use existing merging strategy for JSON mode
|
||||||
|
merge_strategy = {
|
||||||
|
"useIntelligentMerging": True,
|
||||||
|
"groupBy": "documentId", # Group by document
|
||||||
|
"orderBy": "partIndex", # Order by part index
|
||||||
|
"mergeType": "concatenate"
|
||||||
|
}
|
||||||
|
|
||||||
|
if options and hasattr(options, 'mergeStrategy'):
|
||||||
|
merge_strategy.update(options.mergeStrategy)
|
||||||
|
|
||||||
|
# Apply existing merging logic using the sophisticated merging system
|
||||||
|
from modules.services.serviceExtraction.subPipeline import _applyMerging
|
||||||
|
merged_parts = _applyMerging(content_parts, merge_strategy)
|
||||||
|
|
||||||
|
# Convert merged parts to JSON format
|
||||||
|
all_sections = []
|
||||||
|
document_titles = []
|
||||||
|
|
||||||
|
for part in merged_parts:
|
||||||
|
if part.metadata.get("success", False):
|
||||||
|
try:
|
||||||
|
# Parse JSON from AI result
|
||||||
|
part_json = json.loads(part.data)
|
||||||
|
|
||||||
|
# Check if this is a multi-file response (has "documents" key)
|
||||||
|
if isinstance(part_json, dict) and "documents" in part_json:
|
||||||
|
# This is a multi-file response - merge all documents
|
||||||
|
logger.debug(f"Processing multi-file response from part {part.id} with {len(part_json['documents'])} documents")
|
||||||
|
|
||||||
|
# Return multi-file response directly
|
||||||
|
return {
|
||||||
|
"metadata": part_json.get("metadata", {"title": "Merged Document"}),
|
||||||
|
"documents": part_json["documents"]
|
||||||
|
}
|
||||||
|
|
||||||
|
# Extract sections from single-file response
|
||||||
|
elif isinstance(part_json, dict) and "sections" in part_json:
|
||||||
|
for section in part_json["sections"]:
|
||||||
|
# Add part context to section
|
||||||
|
section["metadata"] = section.get("metadata", {})
|
||||||
|
section["metadata"]["source_part"] = part.id
|
||||||
|
section["metadata"]["source_document"] = part.metadata.get("documentId", "unknown")
|
||||||
|
section["metadata"]["part_index"] = part.metadata.get("partIndex", 0)
|
||||||
|
all_sections.append(section)
|
||||||
|
|
||||||
|
# Extract document title
|
||||||
|
if isinstance(part_json, dict) and "metadata" in part_json:
|
||||||
|
title = part_json["metadata"].get("title", "")
|
||||||
|
if title and title not in document_titles:
|
||||||
|
document_titles.append(title)
|
||||||
|
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
logger.warning(f"Failed to parse JSON from part {part.id}: {str(e)}")
|
||||||
|
# Create a fallback section for invalid JSON
|
||||||
|
fallback_section = {
|
||||||
|
"id": f"error_section_{part.id}",
|
||||||
|
"title": "Error Section",
|
||||||
|
"content_type": "paragraph",
|
||||||
|
"elements": [{
|
||||||
|
"text": f"Error parsing part {part.id}: {str(e)}"
|
||||||
|
}],
|
||||||
|
"order": part.metadata.get("partIndex", 0),
|
||||||
|
"metadata": {
|
||||||
|
"source_document": part.metadata.get("documentId", "unknown"),
|
||||||
|
"part_id": part.id,
|
||||||
|
"error": str(e)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
all_sections.append(fallback_section)
|
||||||
|
else:
|
||||||
|
# Handle error parts
|
||||||
|
error_section = {
|
||||||
|
"id": f"error_section_{part.id}",
|
||||||
|
"title": "Error Section",
|
||||||
|
"content_type": "paragraph",
|
||||||
|
"elements": [{
|
||||||
|
"text": f"Error in part {part.id}: {part.metadata.get('error', 'Unknown error')}"
|
||||||
|
}],
|
||||||
|
"order": part.metadata.get("partIndex", 0),
|
||||||
|
"metadata": {
|
||||||
|
"source_document": part.metadata.get("documentId", "unknown"),
|
||||||
|
"part_id": part.id,
|
||||||
|
"error": part.metadata.get('error', 'Unknown error')
|
||||||
|
}
|
||||||
|
}
|
||||||
|
all_sections.append(error_section)
|
||||||
|
|
||||||
|
# Sort sections by order
|
||||||
|
all_sections.sort(key=lambda x: x.get("order", 0))
|
||||||
|
|
||||||
|
# Create merged document with sections
|
||||||
|
merged_document = {
|
||||||
|
"metadata": {
|
||||||
|
"title": document_titles[0] if document_titles else "Merged Document",
|
||||||
|
"extraction_method": "model_aware_chunking_with_merging",
|
||||||
|
"version": "2.0"
|
||||||
|
},
|
||||||
|
"sections": all_sections,
|
||||||
|
"summary": f"Merged document using sophisticated merging system",
|
||||||
|
"tags": ["merged", "ai_generated", "model_aware", "sophisticated_merging"]
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.info(f"Converted {len(partResults)} parts to JSON format using existing sophisticated merging system")
|
||||||
|
return merged_document
|
||||||
|
|
||||||
def _mergeChunkResults(
|
def _mergeChunkResults(
|
||||||
self,
|
self,
|
||||||
chunkResults: List[ChunkResult],
|
chunkResults: List[ChunkResult],
|
||||||
options: Optional[AiCallOptions] = None
|
options: Optional[AiCallOptions] = None
|
||||||
) -> str:
|
) -> str:
|
||||||
"""Merge chunk results while preserving document structure and chunk order."""
|
"""Merge chunk results using existing sophisticated merging system."""
|
||||||
|
|
||||||
if not chunkResults:
|
if not chunkResults:
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
# Get merging configuration from options
|
# Convert ChunkResults back to ContentParts for existing merger system
|
||||||
chunk_separator = "\n\n---\n\n"
|
from modules.datamodels.datamodelExtraction import ContentPart
|
||||||
include_document_headers = True
|
content_parts = []
|
||||||
include_chunk_metadata = False
|
|
||||||
|
|
||||||
if options:
|
|
||||||
if hasattr(options, 'chunkSeparator'):
|
|
||||||
chunk_separator = options.chunkSeparator
|
|
||||||
elif hasattr(options, 'mergeStrategy') and options.mergeStrategy:
|
|
||||||
chunk_separator = options.mergeStrategy.get("chunkSeparator", "\n\n---\n\n")
|
|
||||||
|
|
||||||
# Check for enhanced options
|
|
||||||
if hasattr(options, 'preserveChunkMetadata'):
|
|
||||||
include_chunk_metadata = options.preserveChunkMetadata
|
|
||||||
|
|
||||||
# Group chunk results by document
|
|
||||||
results_by_document = {}
|
|
||||||
for chunk_result in chunkResults:
|
for chunk_result in chunkResults:
|
||||||
doc_id = chunk_result.documentId
|
# Create ContentPart from ChunkResult with proper typeGroup
|
||||||
if doc_id not in results_by_document:
|
content_part = ContentPart(
|
||||||
results_by_document[doc_id] = []
|
id=chunk_result.originalChunk.id,
|
||||||
results_by_document[doc_id].append(chunk_result)
|
parentId=chunk_result.originalChunk.parentId,
|
||||||
|
label=chunk_result.originalChunk.label,
|
||||||
|
typeGroup=chunk_result.originalChunk.typeGroup, # Use original typeGroup
|
||||||
|
mimeType=chunk_result.originalChunk.mimeType,
|
||||||
|
data=chunk_result.aiResult, # Use AI result as data
|
||||||
|
metadata={
|
||||||
|
**chunk_result.originalChunk.metadata,
|
||||||
|
"aiResult": True,
|
||||||
|
"chunk": True,
|
||||||
|
"chunkIndex": chunk_result.chunkIndex,
|
||||||
|
"documentId": chunk_result.documentId,
|
||||||
|
"processingTime": chunk_result.processingTime,
|
||||||
|
"success": chunk_result.metadata.get("success", False)
|
||||||
|
}
|
||||||
|
)
|
||||||
|
content_parts.append(content_part)
|
||||||
|
|
||||||
# Sort chunks within each document by chunk index
|
# Use existing merging strategy from options
|
||||||
for doc_id in results_by_document:
|
merge_strategy = {
|
||||||
results_by_document[doc_id].sort(key=lambda x: x.chunkIndex)
|
"useIntelligentMerging": True,
|
||||||
|
"groupBy": "documentId", # Group by document
|
||||||
|
"orderBy": "chunkIndex", # Order by chunk index
|
||||||
|
"mergeType": "concatenate"
|
||||||
|
}
|
||||||
|
|
||||||
# Merge results for each document
|
if options and hasattr(options, 'mergeStrategy'):
|
||||||
merged_documents = []
|
merge_strategy.update(options.mergeStrategy)
|
||||||
|
|
||||||
for doc_id, doc_chunks in results_by_document.items():
|
# Apply existing merging logic using the sophisticated merging system
|
||||||
# Build document header if enabled
|
from modules.services.serviceExtraction.subPipeline import _applyMerging
|
||||||
doc_header = ""
|
merged_parts = _applyMerging(content_parts, merge_strategy)
|
||||||
if include_document_headers:
|
|
||||||
doc_header = f"\n\n=== DOCUMENT: {doc_id} ===\n\n"
|
|
||||||
|
|
||||||
# Merge chunks for this document
|
|
||||||
doc_content = ""
|
|
||||||
for i, chunk_result in enumerate(doc_chunks):
|
|
||||||
# Add chunk separator (except for first chunk)
|
|
||||||
if i > 0:
|
|
||||||
doc_content += chunk_separator
|
|
||||||
|
|
||||||
# Add chunk content with optional metadata
|
|
||||||
chunk_metadata = chunk_result.metadata
|
|
||||||
if chunk_metadata.get("success", False):
|
|
||||||
chunk_content = chunk_result.aiResult
|
|
||||||
|
|
||||||
# Add chunk metadata if enabled
|
|
||||||
if include_chunk_metadata:
|
|
||||||
chunk_info = f"[Chunk {chunk_result.chunkIndex} - {chunk_metadata.get('typeGroup', 'unknown')} - {chunk_metadata.get('chunkSize', 0)} chars]"
|
|
||||||
chunk_content = f"{chunk_info}\n{chunk_content}"
|
|
||||||
|
|
||||||
doc_content += chunk_content
|
|
||||||
else:
|
|
||||||
# Handle error chunks
|
|
||||||
error_msg = f"[ERROR in chunk {chunk_result.chunkIndex}: {chunk_metadata.get('error', 'Unknown error')}]"
|
|
||||||
doc_content += error_msg
|
|
||||||
|
|
||||||
merged_documents.append(doc_header + doc_content)
|
|
||||||
|
|
||||||
# Join all documents
|
# Convert merged parts back to final string
|
||||||
final_result = "\n\n".join(merged_documents)
|
final_content = "\n\n".join([part.data for part in merged_parts])
|
||||||
|
|
||||||
logger.info(f"Merged {len(chunkResults)} chunks from {len(results_by_document)} documents")
|
logger.info(f"Merged {len(chunkResults)} chunks using existing sophisticated merging system")
|
||||||
return final_result.strip()
|
return final_content.strip()
|
||||||
|
|
||||||
def _mergeChunkResultsClean(
|
def _mergeChunkResultsClean(
|
||||||
self,
|
self,
|
||||||
chunkResults: List[ChunkResult],
|
chunkResults: List[ChunkResult],
|
||||||
options: Optional[AiCallOptions] = None
|
options: Optional[AiCallOptions] = None
|
||||||
) -> str:
|
) -> str:
|
||||||
"""Merge chunk results in CLEAN mode - no debug metadata or document headers."""
|
"""Merge chunk results in CLEAN mode using existing sophisticated merging system."""
|
||||||
|
|
||||||
if not chunkResults:
|
if not chunkResults:
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
# Get merging configuration from options
|
# Convert ChunkResults back to ContentParts for existing merger system
|
||||||
chunk_separator = "\n\n"
|
from modules.datamodels.datamodelExtraction import ContentPart
|
||||||
include_document_headers = False # CLEAN MODE: No document headers
|
content_parts = []
|
||||||
include_chunk_metadata = False # CLEAN MODE: No chunk metadata
|
|
||||||
|
|
||||||
if options:
|
|
||||||
if hasattr(options, 'chunkSeparator'):
|
|
||||||
chunk_separator = options.chunkSeparator
|
|
||||||
elif hasattr(options, 'mergeStrategy') and options.mergeStrategy:
|
|
||||||
chunk_separator = options.mergeStrategy.get("chunkSeparator", "\n\n")
|
|
||||||
|
|
||||||
# Group chunk results by document
|
|
||||||
results_by_document = {}
|
|
||||||
for chunk_result in chunkResults:
|
for chunk_result in chunkResults:
|
||||||
doc_id = chunk_result.documentId
|
# Skip empty or error chunks in clean mode
|
||||||
if doc_id not in results_by_document:
|
if not chunk_result.metadata.get("success", False):
|
||||||
results_by_document[doc_id] = []
|
continue
|
||||||
results_by_document[doc_id].append(chunk_result)
|
if not chunk_result.aiResult or not chunk_result.aiResult.strip():
|
||||||
|
continue
|
||||||
# Sort chunks within each document by chunk index
|
# Skip container/binary chunks in clean mode
|
||||||
for doc_id in results_by_document:
|
if chunk_result.aiResult.startswith("[Skipped ") and "content:" in chunk_result.aiResult:
|
||||||
results_by_document[doc_id].sort(key=lambda x: x.chunkIndex)
|
continue
|
||||||
|
|
||||||
# Merge results for each document in CLEAN mode
|
|
||||||
merged_documents = []
|
|
||||||
|
|
||||||
for doc_id, doc_chunks in results_by_document.items():
|
|
||||||
# CLEAN MODE: No document headers
|
|
||||||
doc_header = ""
|
|
||||||
|
|
||||||
# Merge chunks for this document
|
# Create ContentPart from ChunkResult with proper typeGroup
|
||||||
doc_content = ""
|
content_part = ContentPart(
|
||||||
for i, chunk_result in enumerate(doc_chunks):
|
id=chunk_result.originalChunk.id,
|
||||||
# Add chunk separator (except for first chunk)
|
parentId=chunk_result.originalChunk.parentId,
|
||||||
if i > 0:
|
label=chunk_result.originalChunk.label,
|
||||||
doc_content += chunk_separator
|
typeGroup=chunk_result.originalChunk.typeGroup, # Use original typeGroup
|
||||||
|
mimeType=chunk_result.originalChunk.mimeType,
|
||||||
# Add chunk content without metadata
|
data=chunk_result.aiResult, # Use AI result as data
|
||||||
chunk_metadata = chunk_result.metadata
|
metadata={
|
||||||
if chunk_metadata.get("success", False):
|
**chunk_result.originalChunk.metadata,
|
||||||
chunk_content = chunk_result.aiResult
|
"aiResult": True,
|
||||||
|
"chunk": True,
|
||||||
# CLEAN MODE: Skip container/binary chunks entirely
|
"chunkIndex": chunk_result.chunkIndex,
|
||||||
if chunk_content.startswith("[Skipped ") and "content:" in chunk_content:
|
"documentId": chunk_result.documentId,
|
||||||
continue # Skip container/binary chunks in clean mode
|
"processingTime": chunk_result.processingTime,
|
||||||
|
"success": chunk_result.metadata.get("success", False)
|
||||||
# CLEAN MODE: Skip empty or whitespace-only chunks
|
}
|
||||||
if not chunk_content.strip():
|
)
|
||||||
continue # Skip empty chunks in clean mode
|
content_parts.append(content_part)
|
||||||
|
|
||||||
# CLEAN MODE: No chunk metadata
|
|
||||||
doc_content += chunk_content
|
|
||||||
else:
|
|
||||||
# Handle error chunks silently in clean mode
|
|
||||||
continue
|
|
||||||
|
|
||||||
merged_documents.append(doc_header + doc_content)
|
|
||||||
|
|
||||||
# Join all documents
|
# Use existing merging strategy for clean mode
|
||||||
final_result = "\n\n".join(merged_documents)
|
merge_strategy = {
|
||||||
|
"useIntelligentMerging": True,
|
||||||
|
"groupBy": "documentId", # Group by document
|
||||||
|
"orderBy": "chunkIndex", # Order by chunk index
|
||||||
|
"mergeType": "concatenate"
|
||||||
|
}
|
||||||
|
|
||||||
return final_result.strip()
|
if options and hasattr(options, 'mergeStrategy'):
|
||||||
|
merge_strategy.update(options.mergeStrategy)
|
||||||
|
|
||||||
|
# Apply existing merging logic using the sophisticated merging system
|
||||||
|
from modules.services.serviceExtraction.subPipeline import _applyMerging
|
||||||
|
merged_parts = _applyMerging(content_parts, merge_strategy)
|
||||||
|
|
||||||
|
# Convert merged parts back to final string
|
||||||
|
final_content = "\n\n".join([part.data for part in merged_parts])
|
||||||
|
|
||||||
|
logger.info(f"Merged {len(content_parts)} chunks in clean mode using existing sophisticated merging system")
|
||||||
|
return final_content.strip()
|
||||||
|
|
||||||
def _mergeChunkResultsJson(
|
def _mergeChunkResultsJson(
|
||||||
self,
|
self,
|
||||||
chunkResults: List[ChunkResult],
|
chunkResults: List[ChunkResult],
|
||||||
options: Optional[AiCallOptions] = None
|
options: Optional[AiCallOptions] = None
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
"""Merge chunk results in JSON mode - returns structured JSON document."""
|
"""Merge chunk results in JSON mode using existing sophisticated merging system."""
|
||||||
|
|
||||||
if not chunkResults:
|
if not chunkResults:
|
||||||
return {"metadata": {"title": "Empty Document"}, "sections": []}
|
return {"metadata": {"title": "Empty Document"}, "sections": []}
|
||||||
|
|
||||||
# Group chunk results by document
|
# Convert ChunkResults back to ContentParts for existing merger system
|
||||||
results_by_document = {}
|
from modules.datamodels.datamodelExtraction import ContentPart
|
||||||
|
content_parts = []
|
||||||
for chunk_result in chunkResults:
|
for chunk_result in chunkResults:
|
||||||
doc_id = chunk_result.documentId
|
# Create ContentPart from ChunkResult with proper typeGroup
|
||||||
if doc_id not in results_by_document:
|
content_part = ContentPart(
|
||||||
results_by_document[doc_id] = []
|
id=chunk_result.originalChunk.id,
|
||||||
results_by_document[doc_id].append(chunk_result)
|
parentId=chunk_result.originalChunk.parentId,
|
||||||
|
label=chunk_result.originalChunk.label,
|
||||||
|
typeGroup=chunk_result.originalChunk.typeGroup, # Use original typeGroup
|
||||||
|
mimeType=chunk_result.originalChunk.mimeType,
|
||||||
|
data=chunk_result.aiResult, # Use AI result as data
|
||||||
|
metadata={
|
||||||
|
**chunk_result.originalChunk.metadata,
|
||||||
|
"aiResult": True,
|
||||||
|
"chunk": True,
|
||||||
|
"chunkIndex": chunk_result.chunkIndex,
|
||||||
|
"documentId": chunk_result.documentId,
|
||||||
|
"processingTime": chunk_result.processingTime,
|
||||||
|
"success": chunk_result.metadata.get("success", False)
|
||||||
|
}
|
||||||
|
)
|
||||||
|
content_parts.append(content_part)
|
||||||
|
|
||||||
# Sort chunks within each document by chunk index
|
# Use existing merging strategy for JSON mode
|
||||||
for doc_id in results_by_document:
|
merge_strategy = {
|
||||||
results_by_document[doc_id].sort(key=lambda x: x.chunkIndex)
|
"useIntelligentMerging": True,
|
||||||
|
"groupBy": "documentId", # Group by document
|
||||||
|
"orderBy": "chunkIndex", # Order by chunk index
|
||||||
|
"mergeType": "concatenate"
|
||||||
|
}
|
||||||
|
|
||||||
# Merge JSON results for each document
|
if options and hasattr(options, 'mergeStrategy'):
|
||||||
all_documents = []
|
merge_strategy.update(options.mergeStrategy)
|
||||||
|
|
||||||
|
# Apply existing merging logic using the sophisticated merging system
|
||||||
|
from modules.services.serviceExtraction.subPipeline import _applyMerging
|
||||||
|
merged_parts = _applyMerging(content_parts, merge_strategy)
|
||||||
|
|
||||||
|
# Convert merged parts to JSON format
|
||||||
all_sections = []
|
all_sections = []
|
||||||
document_titles = []
|
document_titles = []
|
||||||
combined_metadata = {"title": "Merged Document", "splitStrategy": "by_section"}
|
|
||||||
|
|
||||||
for doc_id, doc_chunks in results_by_document.items():
|
for part in merged_parts:
|
||||||
# Process each chunk's JSON result
|
if part.metadata.get("success", False):
|
||||||
for chunk_result in doc_chunks:
|
try:
|
||||||
chunk_metadata = chunk_result.metadata
|
# Parse JSON from AI result
|
||||||
if chunk_metadata.get("success", False):
|
chunk_json = json.loads(part.data)
|
||||||
try:
|
|
||||||
# Parse JSON from AI result
|
# Check if this is a multi-file response (has "documents" key)
|
||||||
chunk_json = json.loads(chunk_result.aiResult)
|
if isinstance(chunk_json, dict) and "documents" in chunk_json:
|
||||||
|
# This is a multi-file response - merge all documents
|
||||||
|
logger.debug(f"Processing multi-file response from part {part.id} with {len(chunk_json['documents'])} documents")
|
||||||
|
|
||||||
# Check if this is a multi-file response (has "documents" key)
|
# Return multi-file response directly
|
||||||
if isinstance(chunk_json, dict) and "documents" in chunk_json:
|
return {
|
||||||
# This is a multi-file response - merge all documents
|
"metadata": chunk_json.get("metadata", {"title": "Merged Document"}),
|
||||||
logger.debug(f"Processing multi-file response from chunk {chunk_result.chunkIndex} with {len(chunk_json['documents'])} documents")
|
"documents": chunk_json["documents"]
|
||||||
|
|
||||||
# Add all documents from this chunk
|
|
||||||
for doc in chunk_json["documents"]:
|
|
||||||
# Add chunk context to document
|
|
||||||
doc["metadata"] = doc.get("metadata", {})
|
|
||||||
doc["metadata"]["source_chunk"] = chunk_result.chunkIndex
|
|
||||||
doc["metadata"]["source_document"] = doc_id
|
|
||||||
all_documents.append(doc)
|
|
||||||
|
|
||||||
# Update combined metadata
|
|
||||||
if "metadata" in chunk_json:
|
|
||||||
combined_metadata.update(chunk_json["metadata"])
|
|
||||||
|
|
||||||
# Extract sections from single-file response (fallback)
|
|
||||||
elif isinstance(chunk_json, dict) and "sections" in chunk_json:
|
|
||||||
for section in chunk_json["sections"]:
|
|
||||||
# Add document context to section
|
|
||||||
section["metadata"] = section.get("metadata", {})
|
|
||||||
section["metadata"]["source_document"] = doc_id
|
|
||||||
section["metadata"]["chunk_index"] = chunk_result.chunkIndex
|
|
||||||
all_sections.append(section)
|
|
||||||
|
|
||||||
# Extract document title
|
|
||||||
if isinstance(chunk_json, dict) and "metadata" in chunk_json:
|
|
||||||
title = chunk_json["metadata"].get("title", "")
|
|
||||||
if title and title not in document_titles:
|
|
||||||
document_titles.append(title)
|
|
||||||
|
|
||||||
except json.JSONDecodeError as e:
|
|
||||||
logger.warning(f"Failed to parse JSON from chunk {chunk_result.chunkIndex}: {str(e)}")
|
|
||||||
# Create a fallback section for invalid JSON
|
|
||||||
fallback_section = {
|
|
||||||
"id": f"error_section_{chunk_result.chunkIndex}",
|
|
||||||
"title": "Error Section",
|
|
||||||
"content_type": "paragraph",
|
|
||||||
"elements": [{
|
|
||||||
"text": f"Error parsing chunk {chunk_result.chunkIndex}: {str(e)}"
|
|
||||||
}],
|
|
||||||
"order": chunk_result.chunkIndex,
|
|
||||||
"metadata": {
|
|
||||||
"source_document": doc_id,
|
|
||||||
"chunk_index": chunk_result.chunkIndex,
|
|
||||||
"error": str(e)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
all_sections.append(fallback_section)
|
|
||||||
else:
|
# Extract sections from single-file response
|
||||||
# Handle error chunks
|
elif isinstance(chunk_json, dict) and "sections" in chunk_json:
|
||||||
error_section = {
|
for section in chunk_json["sections"]:
|
||||||
"id": f"error_section_{chunk_result.chunkIndex}",
|
# Add part context to section
|
||||||
|
section["metadata"] = section.get("metadata", {})
|
||||||
|
section["metadata"]["source_part"] = part.id
|
||||||
|
section["metadata"]["source_document"] = part.metadata.get("documentId", "unknown")
|
||||||
|
section["metadata"]["chunk_index"] = part.metadata.get("chunkIndex", 0)
|
||||||
|
all_sections.append(section)
|
||||||
|
|
||||||
|
# Extract document title
|
||||||
|
if isinstance(chunk_json, dict) and "metadata" in chunk_json:
|
||||||
|
title = chunk_json["metadata"].get("title", "")
|
||||||
|
if title and title not in document_titles:
|
||||||
|
document_titles.append(title)
|
||||||
|
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
logger.warning(f"Failed to parse JSON from part {part.id}: {str(e)}")
|
||||||
|
# Create a fallback section for invalid JSON
|
||||||
|
fallback_section = {
|
||||||
|
"id": f"error_section_{part.id}",
|
||||||
"title": "Error Section",
|
"title": "Error Section",
|
||||||
"content_type": "paragraph",
|
"content_type": "paragraph",
|
||||||
"elements": [{
|
"elements": [{
|
||||||
"text": f"Error in chunk {chunk_result.chunkIndex}: {chunk_metadata.get('error', 'Unknown error')}"
|
"text": f"Error parsing part {part.id}: {str(e)}"
|
||||||
}],
|
}],
|
||||||
"order": chunk_result.chunkIndex,
|
"order": part.metadata.get("chunkIndex", 0),
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"source_document": doc_id,
|
"source_document": part.metadata.get("documentId", "unknown"),
|
||||||
"chunk_index": chunk_result.chunkIndex,
|
"part_id": part.id,
|
||||||
"error": chunk_metadata.get('error', 'Unknown error')
|
"error": str(e)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
all_sections.append(error_section)
|
all_sections.append(fallback_section)
|
||||||
|
else:
|
||||||
|
# Handle error parts
|
||||||
|
error_section = {
|
||||||
|
"id": f"error_section_{part.id}",
|
||||||
|
"title": "Error Section",
|
||||||
|
"content_type": "paragraph",
|
||||||
|
"elements": [{
|
||||||
|
"text": f"Error in part {part.id}: {part.metadata.get('error', 'Unknown error')}"
|
||||||
|
}],
|
||||||
|
"order": part.metadata.get("chunkIndex", 0),
|
||||||
|
"metadata": {
|
||||||
|
"source_document": part.metadata.get("documentId", "unknown"),
|
||||||
|
"part_id": part.id,
|
||||||
|
"error": part.metadata.get('error', 'Unknown error')
|
||||||
|
}
|
||||||
|
}
|
||||||
|
all_sections.append(error_section)
|
||||||
|
|
||||||
# Sort sections by order
|
# Sort sections by order
|
||||||
all_sections.sort(key=lambda x: x.get("order", 0))
|
all_sections.sort(key=lambda x: x.get("order", 0))
|
||||||
|
|
||||||
# If we have merged documents from multi-file responses, return them
|
# Create merged document with sections
|
||||||
if all_documents:
|
|
||||||
logger.info(f"Merged {len(all_documents)} documents from {len(chunkResults)} chunks")
|
|
||||||
return {
|
|
||||||
"metadata": combined_metadata,
|
|
||||||
"documents": all_documents
|
|
||||||
}
|
|
||||||
|
|
||||||
# Otherwise, create merged document with sections (single-file fallback)
|
|
||||||
merged_document = {
|
merged_document = {
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"title": document_titles[0] if document_titles else "Merged Document",
|
"title": document_titles[0] if document_titles else "Merged Document",
|
||||||
"source_documents": list(results_by_document.keys()),
|
"extraction_method": "ai_json_extraction_with_merging",
|
||||||
"extraction_method": "ai_json_extraction",
|
"version": "2.0"
|
||||||
"version": "1.0"
|
|
||||||
},
|
},
|
||||||
"sections": all_sections,
|
"sections": all_sections,
|
||||||
"summary": f"Merged document from {len(results_by_document)} source documents",
|
"summary": f"Merged document using sophisticated merging system",
|
||||||
"tags": ["merged", "ai_generated"]
|
"tags": ["merged", "ai_generated", "sophisticated_merging"]
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.info(f"Merged {len(chunkResults)} chunks from {len(results_by_document)} documents (JSON mode)")
|
logger.info(f"Merged {len(chunkResults)} chunks using existing sophisticated merging system (JSON mode)")
|
||||||
return merged_document
|
return merged_document
|
||||||
|
|
||||||
def _getModelCapabilitiesForContent(self, prompt: str, documents: Optional[List[ChatDocument]], options: AiCallOptions) -> Dict[str, int]:
|
# REMOVED: _getModelCapabilitiesForContent method - no longer needed with model-aware chunking
|
||||||
"""
|
|
||||||
Get model capabilities for content processing, including appropriate size limits for chunking.
|
|
||||||
Uses centralized model selection to determine chunking parameters.
|
|
||||||
"""
|
|
||||||
# Estimate total content size
|
|
||||||
prompt_size = len(prompt.encode('utf-8'))
|
|
||||||
document_size = 0
|
|
||||||
if documents:
|
|
||||||
# Rough estimate of document content size
|
|
||||||
for doc in documents:
|
|
||||||
document_size += doc.fileSize or 0
|
|
||||||
|
|
||||||
total_size = prompt_size + document_size
|
|
||||||
|
|
||||||
# Use centralized model selection to get the best model for chunking parameters
|
|
||||||
try:
|
|
||||||
from modules.aicore.aicoreModelRegistry import modelRegistry
|
|
||||||
from modules.aicore.aicoreModelSelector import model_selector
|
|
||||||
|
|
||||||
# Get available models and select the best one for this operation
|
|
||||||
availableModels = modelRegistry.getAvailableModels()
|
|
||||||
selectedModel = model_selector.selectModel(prompt, "", options, availableModels)
|
|
||||||
|
|
||||||
if selectedModel:
|
|
||||||
context_length = selectedModel.contextLength
|
|
||||||
model_name = selectedModel.name
|
|
||||||
logger.debug(f"Selected model for chunking: {model_name} with context length: {context_length}")
|
|
||||||
else:
|
|
||||||
# Fallback to conservative default if no model selected
|
|
||||||
context_length = 128000 # GPT-4o default
|
|
||||||
model_name = "fallback"
|
|
||||||
logger.warning(f"No model selected for chunking, using fallback context length: {context_length}")
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
# Fallback to conservative default if model selection fails
|
|
||||||
context_length = 128000 # GPT-4o default
|
|
||||||
model_name = "fallback"
|
|
||||||
logger.error(f"Model selection failed for chunking: {e}, using fallback context length: {context_length}")
|
|
||||||
|
|
||||||
# Calculate appropriate sizes
|
|
||||||
# Convert tokens to bytes (rough estimate: 1 token ≈ 4 characters)
|
|
||||||
context_length_bytes = int(context_length * 4)
|
|
||||||
max_context_bytes = int(context_length_bytes * 0.9) # 90% of context length
|
|
||||||
text_chunk_size = int(max_context_bytes * 0.7) # 70% of max context for text chunks
|
|
||||||
image_chunk_size = int(max_context_bytes * 0.8) # 80% of max context for image chunks
|
|
||||||
|
|
||||||
logger.debug(f"Content size: {total_size} bytes, Max context: {max_context_bytes} bytes")
|
|
||||||
logger.debug(f"Text chunk size: {text_chunk_size} bytes, Image chunk size: {image_chunk_size} bytes")
|
|
||||||
|
|
||||||
return {
|
|
||||||
"maxContextBytes": max_context_bytes,
|
|
||||||
"textChunkSize": text_chunk_size,
|
|
||||||
"imageChunkSize": image_chunk_size
|
|
||||||
}
|
|
||||||
|
|
|
||||||
|
|
@ -73,139 +73,18 @@ def runExtraction(extractorRegistry: ExtractorRegistry, chunkerRegistry: Chunker
|
||||||
|
|
||||||
parts = extractor.extract(documentBytes, {"fileName": fileName, "mimeType": mimeType, "options": options})
|
parts = extractor.extract(documentBytes, {"fileName": fileName, "mimeType": mimeType, "options": options})
|
||||||
|
|
||||||
# Apply chunking and size limiting
|
# REMOVED: poolAndLimit(parts, chunkerRegistry, options)
|
||||||
parts = poolAndLimit(parts, chunkerRegistry, options)
|
# REMOVED: Chunking logic - now handled in AI call phase
|
||||||
|
|
||||||
# Optional merge step - but preserve chunks
|
# Apply merging strategy if provided (preserve existing logic)
|
||||||
mergeStrategy = options.get("mergeStrategy", {})
|
mergeStrategy = options.get("mergeStrategy", {})
|
||||||
if mergeStrategy:
|
if mergeStrategy:
|
||||||
|
parts = _applyMerging(parts, mergeStrategy)
|
||||||
# Don't merge chunks - they should stay separate for processing
|
|
||||||
non_chunk_parts = [p for p in parts if not p.metadata.get("chunk", False)]
|
|
||||||
chunk_parts = [p for p in parts if p.metadata.get("chunk", False)]
|
|
||||||
|
|
||||||
logger.debug(f"runExtraction: Preserving {len(chunk_parts)} chunks from merging")
|
|
||||||
logger.debug(f"runExtraction - non_chunk_parts: {len(non_chunk_parts)}, chunk_parts: {len(chunk_parts)}")
|
|
||||||
|
|
||||||
# Apply intelligent merging for small text parts
|
|
||||||
if non_chunk_parts:
|
|
||||||
# Count text parts
|
|
||||||
text_parts = [p for p in non_chunk_parts if p.typeGroup == "text"]
|
|
||||||
if len(text_parts) > 5: # If we have many small text parts, merge them
|
|
||||||
logger.info(f"🔧 Merging {len(text_parts)} small text parts for efficiency")
|
|
||||||
non_chunk_parts = _mergeParts(non_chunk_parts, mergeStrategy)
|
|
||||||
|
|
||||||
# Combine non-chunk parts with chunk parts (chunks stay separate)
|
|
||||||
parts = non_chunk_parts + chunk_parts
|
|
||||||
|
|
||||||
logger.debug(f"runExtraction: Final parts after merging: {len(parts)} (chunks: {len(chunk_parts)})")
|
|
||||||
logger.debug(f"runExtraction - Final parts: {len(parts)} (chunks: {len(chunk_parts)})")
|
|
||||||
# Timestamp-only extraction debug dumps removed
|
|
||||||
|
|
||||||
return ContentExtracted(id=makeId(), parts=parts)
|
return ContentExtracted(id=makeId(), parts=parts)
|
||||||
|
|
||||||
|
|
||||||
def poolAndLimit(parts: List[ContentPart], chunkerRegistry: ChunkerRegistry, options: Dict[str, Any]) -> List[ContentPart]:
|
# REMOVED: poolAndLimit function - chunking now handled in AI call phase
|
||||||
maxSize = int(options.get("maxSize", 0) or 0)
|
|
||||||
chunkAllowed = bool(options.get("chunkAllowed", False))
|
|
||||||
mergeStrategy = options.get("mergeStrategy", {})
|
|
||||||
|
|
||||||
if maxSize <= 0:
|
|
||||||
# Still apply merging if strategy provided
|
|
||||||
if mergeStrategy:
|
|
||||||
return _applyMerging(parts, mergeStrategy)
|
|
||||||
return parts
|
|
||||||
|
|
||||||
# First, try to fit within size limit
|
|
||||||
current = 0
|
|
||||||
kept: List[ContentPart] = []
|
|
||||||
remaining: List[ContentPart] = []
|
|
||||||
|
|
||||||
logger.debug(f"Starting poolAndLimit with {len(parts)} parts, maxSize={maxSize}")
|
|
||||||
|
|
||||||
for i, p in enumerate(parts):
|
|
||||||
size = int(p.metadata.get("size", 0) or 0)
|
|
||||||
# Show first 50 characters of text content for debugging
|
|
||||||
content_preview = p.data[:50].replace('\n', '\\n') if p.data else ""
|
|
||||||
logger.debug(f"Part {i}: {p.typeGroup} - {size} bytes - '{content_preview}...' (current: {current})")
|
|
||||||
if current + size <= maxSize:
|
|
||||||
kept.append(p)
|
|
||||||
current += size
|
|
||||||
logger.debug(f"Part {i} kept (total: {current})")
|
|
||||||
else:
|
|
||||||
remaining.append(p)
|
|
||||||
logger.debug(f"Part {i} moved to remaining")
|
|
||||||
|
|
||||||
logger.debug(f"Kept: {len(kept)}, Remaining: {len(remaining)}")
|
|
||||||
|
|
||||||
# If we have remaining parts and chunking is allowed, try chunking
|
|
||||||
if remaining and chunkAllowed:
|
|
||||||
logger.debug(f"=== CHUNKING ACTIVATED ===")
|
|
||||||
logger.debug(f"Remaining parts to chunk: {len(remaining)}")
|
|
||||||
logger.debug(f"Max size limit: {maxSize} bytes")
|
|
||||||
logger.debug(f"Current size used: {current} bytes")
|
|
||||||
logger.debug(f"Chunking {len(remaining)} remaining parts")
|
|
||||||
|
|
||||||
for p in remaining:
|
|
||||||
if p.typeGroup in ("text", "table", "structure", "image", "container", "binary"):
|
|
||||||
logger.debug(f"Chunking {p.typeGroup} part: {len(p.data)} chars")
|
|
||||||
logger.debug(f"Chunking {p.typeGroup} part with {len(p.data)} chars")
|
|
||||||
chunks = chunkerRegistry.resolve(p.typeGroup).chunk(p, options)
|
|
||||||
logger.debug(f"Created {len(chunks)} chunks")
|
|
||||||
logger.debug(f"Created {len(chunks)} chunks")
|
|
||||||
|
|
||||||
chunks_added = 0
|
|
||||||
for ch in chunks:
|
|
||||||
chSize = int(ch.get("size", 0) or 0)
|
|
||||||
# Add all chunks - don't limit by maxSize since they'll be processed separately
|
|
||||||
kept.append(ContentPart(
|
|
||||||
id=makeId(),
|
|
||||||
parentId=p.id,
|
|
||||||
label=f"chunk_{ch.get('order', 0)}",
|
|
||||||
typeGroup=p.typeGroup,
|
|
||||||
mimeType=p.mimeType,
|
|
||||||
data=ch.get("data", ""),
|
|
||||||
metadata={
|
|
||||||
"size": chSize,
|
|
||||||
"chunk": True,
|
|
||||||
**ch.get("metadata", {})
|
|
||||||
}
|
|
||||||
))
|
|
||||||
chunks_added += 1
|
|
||||||
logger.debug(f"Added chunk {ch.get('order', 0)}: {chSize} bytes")
|
|
||||||
|
|
||||||
logger.debug(f"Added {chunks_added} chunks from {p.typeGroup} part")
|
|
||||||
|
|
||||||
# Apply merging strategy if provided, but preserve chunks
|
|
||||||
if mergeStrategy:
|
|
||||||
# Don't merge chunks - they should stay separate for processing
|
|
||||||
non_chunk_parts = [p for p in kept if not p.metadata.get("chunk", False)]
|
|
||||||
chunk_parts = [p for p in kept if p.metadata.get("chunk", False)]
|
|
||||||
|
|
||||||
logger.debug(f"Preserving {len(chunk_parts)} chunks from merging")
|
|
||||||
|
|
||||||
# Apply intelligent merging for small text parts
|
|
||||||
if non_chunk_parts:
|
|
||||||
# Count text parts
|
|
||||||
text_parts = [p for p in non_chunk_parts if p.typeGroup == "text"]
|
|
||||||
if len(text_parts) > 5: # If we have many small text parts, merge them
|
|
||||||
logger.info(f"🔧 Merging {len(text_parts)} small text parts for efficiency")
|
|
||||||
non_chunk_parts = _applyMerging(non_chunk_parts, mergeStrategy)
|
|
||||||
|
|
||||||
# Combine non-chunk parts with chunk parts (chunks stay separate)
|
|
||||||
kept = non_chunk_parts + chunk_parts
|
|
||||||
|
|
||||||
logger.debug(f"Final parts after merging: {len(kept)} (chunks: {len(chunk_parts)})")
|
|
||||||
logger.debug(f"Final parts after merging: {len(kept)} (chunks: {len(chunk_parts)})")
|
|
||||||
|
|
||||||
# Re-check size after merging
|
|
||||||
totalSize = sum(int(p.metadata.get("size", 0) or 0) for p in kept)
|
|
||||||
if totalSize > maxSize and mergeStrategy.get("maxSize"):
|
|
||||||
# Apply size limit to merged parts
|
|
||||||
kept = _applySizeLimit(kept, maxSize)
|
|
||||||
|
|
||||||
logger.debug(f"poolAndLimit returning {len(kept)} parts")
|
|
||||||
return kept
|
|
||||||
|
|
||||||
|
|
||||||
def _applyMerging(parts: List[ContentPart], strategy: Dict[str, Any]) -> List[ContentPart]:
|
def _applyMerging(parts: List[ContentPart], strategy: Dict[str, Any]) -> List[ContentPart]:
|
||||||
|
|
@ -264,37 +143,5 @@ def _applyMerging(parts: List[ContentPart], strategy: Dict[str, Any]) -> List[Co
|
||||||
return merged
|
return merged
|
||||||
|
|
||||||
|
|
||||||
def _applySizeLimit(parts: List[ContentPart], maxSize: int) -> List[ContentPart]:
|
# REMOVED: _applySizeLimit function - no longer needed after removing poolAndLimit
|
||||||
"""Apply size limit by prioritizing parts and truncating if necessary."""
|
|
||||||
# Sort by priority: text first, then others
|
|
||||||
priority_order = {"text": 0, "table": 1, "structure": 2, "image": 3, "binary": 4, "metadata": 5, "container": 6}
|
|
||||||
sorted_parts = sorted(parts, key=lambda p: priority_order.get(p.typeGroup, 99))
|
|
||||||
|
|
||||||
kept: List[ContentPart] = []
|
|
||||||
current_size = 0
|
|
||||||
|
|
||||||
for part in sorted_parts:
|
|
||||||
part_size = int(part.metadata.get("size", 0) or 0)
|
|
||||||
if current_size + part_size <= maxSize:
|
|
||||||
kept.append(part)
|
|
||||||
current_size += part_size
|
|
||||||
else:
|
|
||||||
# Try to truncate text parts
|
|
||||||
if part.typeGroup == "text" and part_size > 0:
|
|
||||||
remaining_size = maxSize - current_size
|
|
||||||
if remaining_size > 1000: # Only truncate if we have meaningful space
|
|
||||||
truncated_data = part.data[:remaining_size * 4] # Rough character estimate
|
|
||||||
truncated_part = ContentPart(
|
|
||||||
id=makeId(),
|
|
||||||
parentId=part.parentId,
|
|
||||||
label=f"{part.label}_truncated",
|
|
||||||
typeGroup=part.typeGroup,
|
|
||||||
mimeType=part.mimeType,
|
|
||||||
data=truncated_data,
|
|
||||||
metadata={**part.metadata, "size": len(truncated_data.encode('utf-8')), "truncated": True}
|
|
||||||
)
|
|
||||||
kept.append(truncated_part)
|
|
||||||
break
|
|
||||||
|
|
||||||
return kept
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -138,6 +138,7 @@ class AIBehaviorTester:
|
||||||
self.testResults.append(result)
|
self.testResults.append(result)
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
def _extractContinuationInstruction(self, response: str) -> str:
|
def _extractContinuationInstruction(self, response: str) -> str:
|
||||||
"""Extract continuation instruction from response."""
|
"""Extract continuation instruction from response."""
|
||||||
try:
|
try:
|
||||||
|
|
|
||||||
|
|
@ -25,7 +25,7 @@ from modules.datamodels.datamodelAi import (
|
||||||
)
|
)
|
||||||
from modules.datamodels.datamodelUam import User
|
from modules.datamodels.datamodelUam import User
|
||||||
from modules.aicore.aicoreModelRegistry import modelRegistry
|
from modules.aicore.aicoreModelRegistry import modelRegistry
|
||||||
from modules.aicore.aicoreModelSelector import model_selector
|
from modules.aicore.aicoreModelSelector import modelSelector
|
||||||
|
|
||||||
|
|
||||||
class ModelSelectionTester:
|
class ModelSelectionTester:
|
||||||
|
|
@ -45,6 +45,51 @@ class ModelSelectionTester:
|
||||||
|
|
||||||
self.services.ai = await AiService.create(self.services)
|
self.services.ai = await AiService.create(self.services)
|
||||||
|
|
||||||
|
async def _printFallbackListWithContext(self, title: str, prompt: str, context: str, options: AiCallOptions) -> None:
|
||||||
|
print(f"\n{'='*80}")
|
||||||
|
print(f"{title}")
|
||||||
|
print(f"{'='*80}")
|
||||||
|
print(
|
||||||
|
f"Operation={options.operationType.name}, Priority={options.priority.name}, ProcessingMode={options.processingMode.name}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Show context and prompt sizes
|
||||||
|
promptSize = len(prompt.encode("utf-8"))
|
||||||
|
contextSize = len(context.encode("utf-8"))
|
||||||
|
totalSize = promptSize + contextSize
|
||||||
|
print(f"Prompt size: {promptSize} bytes, Context size: {contextSize} bytes, Total: {totalSize} bytes")
|
||||||
|
|
||||||
|
availableModels = modelRegistry.getAvailableModels()
|
||||||
|
failoverModelList = modelSelector.getFailoverModelList(
|
||||||
|
prompt=prompt,
|
||||||
|
context=context,
|
||||||
|
options=options,
|
||||||
|
availableModels=availableModels,
|
||||||
|
)
|
||||||
|
|
||||||
|
if not failoverModelList:
|
||||||
|
print("No suitable models found (capability filter returned empty list).")
|
||||||
|
return
|
||||||
|
|
||||||
|
print("Prioritized fallback model sequence (name | quality | speed | $/1k in | ctx | score):")
|
||||||
|
for idx, m in enumerate(failoverModelList, 1):
|
||||||
|
costIn = getattr(m, "costPer1kTokensInput", 0.0)
|
||||||
|
# Calculate detailed score breakdown
|
||||||
|
promptSize = len(prompt.encode("utf-8"))
|
||||||
|
contextSize = len(context.encode("utf-8"))
|
||||||
|
totalSize = promptSize + contextSize
|
||||||
|
|
||||||
|
# Get detailed scoring
|
||||||
|
sizeRating = modelSelector._getSizeRating(m, totalSize)
|
||||||
|
processingModeRating = modelSelector._getProcessingModeRating(m.processingMode, options.processingMode)
|
||||||
|
priorityRating = modelSelector._getPriorityRating(m, options.priority)
|
||||||
|
totalScore = sizeRating + processingModeRating + priorityRating
|
||||||
|
|
||||||
|
print(
|
||||||
|
f" {idx:>2}. {m.name} | Q={getattr(m, 'qualityRating', 0)} | S={getattr(m, 'speedRating', 0)} | ${costIn:.4f} | ctx={getattr(m, 'contextLength', 0)} | score={totalScore:.3f}"
|
||||||
|
)
|
||||||
|
print(f" Size: {sizeRating:.3f}, ProcessingMode: {processingModeRating:.3f}, Priority: {priorityRating:.3f}")
|
||||||
|
|
||||||
async def _printFallbackList(self, title: str, prompt: str, options: AiCallOptions) -> None:
|
async def _printFallbackList(self, title: str, prompt: str, options: AiCallOptions) -> None:
|
||||||
print(f"\n{'='*80}")
|
print(f"\n{'='*80}")
|
||||||
print(f"{title}")
|
print(f"{title}")
|
||||||
|
|
@ -53,24 +98,43 @@ class ModelSelectionTester:
|
||||||
f"Operation={options.operationType.name}, Priority={options.priority.name}, ProcessingMode={options.processingMode.name}"
|
f"Operation={options.operationType.name}, Priority={options.priority.name}, ProcessingMode={options.processingMode.name}"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Show context and prompt sizes
|
||||||
|
context = "" # Currently using empty context
|
||||||
|
promptSize = len(prompt.encode("utf-8"))
|
||||||
|
contextSize = len(context.encode("utf-8"))
|
||||||
|
totalSize = promptSize + contextSize
|
||||||
|
print(f"Prompt size: {promptSize} bytes, Context size: {contextSize} bytes, Total: {totalSize} bytes")
|
||||||
|
|
||||||
availableModels = modelRegistry.getAvailableModels()
|
availableModels = modelRegistry.getAvailableModels()
|
||||||
fallbackModels = model_selector.getFallbackModels(
|
failoverModelList = modelSelector.getFailoverModelList(
|
||||||
prompt=prompt,
|
prompt=prompt,
|
||||||
context="",
|
context=context,
|
||||||
options=options,
|
options=options,
|
||||||
availableModels=availableModels,
|
availableModels=availableModels,
|
||||||
)
|
)
|
||||||
|
|
||||||
if not fallbackModels:
|
if not failoverModelList:
|
||||||
print("No suitable models found (capability filter returned empty list).")
|
print("No suitable models found (capability filter returned empty list).")
|
||||||
return
|
return
|
||||||
|
|
||||||
print("Prioritized fallback model sequence (name | quality | speed | $/1k in | ctx):")
|
print("Prioritized fallback model sequence (name | quality | speed | $/1k in | ctx | score):")
|
||||||
for idx, m in enumerate(fallbackModels, 1):
|
for idx, m in enumerate(failoverModelList, 1):
|
||||||
costIn = getattr(m, "costPer1kTokensInput", 0.0)
|
costIn = getattr(m, "costPer1kTokensInput", 0.0)
|
||||||
|
# Calculate detailed score breakdown
|
||||||
|
promptSize = len(prompt.encode("utf-8"))
|
||||||
|
contextSize = len(context.encode("utf-8"))
|
||||||
|
totalSize = promptSize + contextSize
|
||||||
|
|
||||||
|
# Get detailed scoring
|
||||||
|
sizeRating = modelSelector._getSizeRating(m, totalSize)
|
||||||
|
processingModeRating = modelSelector._getProcessingModeRating(m.processingMode, options.processingMode)
|
||||||
|
priorityRating = modelSelector._getPriorityRating(m, options.priority)
|
||||||
|
totalScore = sizeRating + processingModeRating + priorityRating
|
||||||
|
|
||||||
print(
|
print(
|
||||||
f" {idx:>2}. {m.name} | Q={getattr(m, 'qualityRating', 0)} | S={getattr(m, 'speedRating', 0)} | ${costIn:.4f} | ctx={getattr(m, 'contextLength', 0)}"
|
f" {idx:>2}. {m.name} | Q={getattr(m, 'qualityRating', 0)} | S={getattr(m, 'speedRating', 0)} | ${costIn:.4f} | ctx={getattr(m, 'contextLength', 0)} | score={totalScore:.3f}"
|
||||||
)
|
)
|
||||||
|
print(f" Size: {sizeRating:.3f}, ProcessingMode: {processingModeRating:.3f}, Priority: {priorityRating:.3f}")
|
||||||
|
|
||||||
async def run(self) -> None:
|
async def run(self) -> None:
|
||||||
# Scenarios reflecting workflows/
|
# Scenarios reflecting workflows/
|
||||||
|
|
@ -146,10 +210,93 @@ class ModelSelectionTester:
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Intent analysis (user input understanding)
|
||||||
|
scenarios.append(
|
||||||
|
(
|
||||||
|
"ANALYSE - Quality, Detailed (Intent Analysis)",
|
||||||
|
"Analyze user intent and extract key requirements from the following request: 'I need to create a comprehensive marketing strategy for our new product launch including budget allocation, timeline, and target audience analysis.'",
|
||||||
|
AiCallOptions(
|
||||||
|
operationType=OperationTypeEnum.ANALYSE,
|
||||||
|
priority=PriorityEnum.QUALITY,
|
||||||
|
compressPrompt=False,
|
||||||
|
compressContext=False,
|
||||||
|
processingMode=ProcessingModeEnum.DETAILED,
|
||||||
|
maxCost=0.08,
|
||||||
|
maxProcessingTime=45,
|
||||||
|
resultFormat="json",
|
||||||
|
temperature=0.2,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Review/Validation (quality assurance)
|
||||||
|
scenarios.append(
|
||||||
|
(
|
||||||
|
"ANALYSE - Quality, Detailed (Review/Validation)",
|
||||||
|
"Review and validate the following business proposal for completeness, accuracy, and compliance with industry standards. Identify any gaps or areas for improvement.",
|
||||||
|
AiCallOptions(
|
||||||
|
operationType=OperationTypeEnum.ANALYSE,
|
||||||
|
priority=PriorityEnum.QUALITY,
|
||||||
|
compressPrompt=False,
|
||||||
|
compressContext=False,
|
||||||
|
processingMode=ProcessingModeEnum.DETAILED,
|
||||||
|
maxCost=0.10,
|
||||||
|
maxProcessingTime=60,
|
||||||
|
resultFormat="json",
|
||||||
|
temperature=0.1,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Large context scenario (to test size-based scoring)
|
||||||
|
scenarios.append(
|
||||||
|
(
|
||||||
|
"GENERAL - Balanced, Advanced (Large Context Test)",
|
||||||
|
"Process this large document and provide a comprehensive summary.",
|
||||||
|
AiCallOptions(
|
||||||
|
operationType=OperationTypeEnum.GENERAL,
|
||||||
|
priority=PriorityEnum.BALANCED,
|
||||||
|
compressPrompt=False,
|
||||||
|
compressContext=False,
|
||||||
|
processingMode=ProcessingModeEnum.ADVANCED,
|
||||||
|
maxCost=0.15,
|
||||||
|
maxProcessingTime=120,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
# Iterate and print lists
|
# Iterate and print lists
|
||||||
for title, prompt, options in scenarios:
|
for title, prompt, options in scenarios:
|
||||||
await self._printFallbackList(title, prompt, options)
|
await self._printFallbackList(title, prompt, options)
|
||||||
|
|
||||||
|
# Test with actual context to see size-based scoring
|
||||||
|
largeContext = """
|
||||||
|
This is a comprehensive business document containing detailed information about our company's strategic initiatives,
|
||||||
|
financial performance, market analysis, competitive landscape, operational metrics, customer feedback,
|
||||||
|
product development roadmap, technology stack, human resources, legal compliance, risk management,
|
||||||
|
sustainability efforts, and future growth plans. The document spans multiple sections including executive summary,
|
||||||
|
market research, financial statements, operational reports, customer insights, product specifications,
|
||||||
|
technology architecture, HR policies, legal frameworks, risk assessments, environmental impact studies,
|
||||||
|
and strategic recommendations. This extensive content is designed to test the model selection algorithm's
|
||||||
|
ability to handle large context sizes and make intelligent decisions about which models are best suited
|
||||||
|
for processing such substantial amounts of information while maintaining efficiency and cost-effectiveness.
|
||||||
|
""" * 10 # Repeat to make it even larger
|
||||||
|
|
||||||
|
await self._printFallbackListWithContext(
|
||||||
|
"GENERAL - Balanced, Advanced (Large Context Test)",
|
||||||
|
"Analyze this comprehensive business document and provide key insights.",
|
||||||
|
largeContext,
|
||||||
|
AiCallOptions(
|
||||||
|
operationType=OperationTypeEnum.GENERAL,
|
||||||
|
priority=PriorityEnum.BALANCED,
|
||||||
|
compressPrompt=False,
|
||||||
|
compressContext=False,
|
||||||
|
processingMode=ProcessingModeEnum.ADVANCED,
|
||||||
|
maxCost=0.15,
|
||||||
|
maxProcessingTime=120,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
async def main() -> None:
|
async def main() -> None:
|
||||||
tester = ModelSelectionTester()
|
tester = ModelSelectionTester()
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue