ready for test revised dynamic ai aware chunking system

This commit is contained in:
ValueOn AG 2025-10-23 00:35:44 +02:00
parent 6b819cc848
commit 36947b6d7e
14 changed files with 1160 additions and 1014 deletions

View file

@ -48,7 +48,7 @@ class ModelRegistry:
try:
# Import the module
module = importlib.import_module(f'modules.connectors.{moduleName}')
module = importlib.import_module(f'modules.aicore.{moduleName}')
# Find connector classes (classes that inherit from BaseConnectorAi)
for attrName in dir(module):

View file

@ -1,158 +0,0 @@
"""
Configuration for dynamic model selection rules.
This makes model selection configurable rather than hardcoded.
"""
from typing import Dict, List, Any
from modules.datamodels.datamodelAi import OperationTypeEnum, ModelCapabilitiesEnum, PriorityEnum, SelectionRule
class ModelSelectionConfig:
"""Configuration for model selection rules."""
def __init__(self):
self.rules = self._loadDefaultRules()
self.fallbackModels = self._loadFallbackModels()
def _loadDefaultRules(self) -> List[SelectionRule]:
"""Load default selection rules."""
return [
# High quality for planning and analysis
SelectionRule(
name="highQualityAnalysis",
condition="Planning or analysis operations requiring high quality",
weight=10.0,
operationTypes=[OperationTypeEnum.PLAN, OperationTypeEnum.ANALYSE],
priority=PriorityEnum.QUALITY,
capabilities=[ModelCapabilitiesEnum.TEXT_GENERATION, ModelCapabilitiesEnum.REASONING, ModelCapabilitiesEnum.ANALYSIS],
minQualityRating=8
),
# Fast processing for basic operations
SelectionRule(
name="fastBasicProcessing",
condition="Basic operations requiring speed",
weight=8.0,
operationTypes=[OperationTypeEnum.GENERAL],
priority=PriorityEnum.SPEED,
capabilities=[ModelCapabilitiesEnum.TEXT_GENERATION, ModelCapabilitiesEnum.CHAT],
minQualityRating=5
),
# Cost-effective for high-volume operations
SelectionRule(
name="costEffectiveProcessing",
condition="High-volume operations where cost matters",
weight=7.0,
operationTypes=[OperationTypeEnum.GENERAL, OperationTypeEnum.GENERATE],
priority=PriorityEnum.COST,
capabilities=[ModelCapabilitiesEnum.TEXT_GENERATION],
maxCost=0.01 # $0.01 per 1k tokens
),
# Image analysis specific
SelectionRule(
name="imageAnalyse",
condition="Image analysis operations",
weight=10.0,
operationTypes=[OperationTypeEnum.IMAGE_ANALYSE],
priority=PriorityEnum.QUALITY,
capabilities=[ModelCapabilitiesEnum.VISION, ModelCapabilitiesEnum.MULTIMODAL],
minQualityRating=8
),
# Web research specific
SelectionRule(
name="webResearch",
condition="Web research operations",
weight=9.0,
operationTypes=[OperationTypeEnum.WEB_RESEARCH],
priority=PriorityEnum.BALANCED,
capabilities=[ModelCapabilitiesEnum.TEXT_GENERATION, ModelCapabilitiesEnum.ANALYSIS, ModelCapabilitiesEnum.WEB_SEARCH],
minQualityRating=7
),
# Large context requirements
SelectionRule(
name="largeContext",
condition="Operations requiring large context",
weight=8.0,
operationTypes=[OperationTypeEnum.GENERAL, OperationTypeEnum.ANALYSE],
priority=PriorityEnum.BALANCED,
capabilities=[ModelCapabilitiesEnum.TEXT_GENERATION],
minContextLength=100000 # 100k tokens
)
]
def _loadFallbackModels(self) -> Dict[str, Dict[str, Any]]:
"""Load fallback model selection criteria."""
return {
OperationTypeEnum.GENERAL: {
"priorityOrder": ["speed", "quality", "cost"],
"operationTypes": [ModelCapabilitiesEnum.TEXT_GENERATION, ModelCapabilitiesEnum.CHAT],
"minQualityRating": 5,
"maxCostPer1k": 0.01
},
OperationTypeEnum.IMAGE_ANALYSE: {
"priorityOrder": ["quality", "speed"],
"operationTypes": [ModelCapabilitiesEnum.VISION, ModelCapabilitiesEnum.MULTIMODAL],
"minQualityRating": 8,
"maxCostPer1k": 0.1
},
OperationTypeEnum.IMAGE_GENERATE: {
"priorityOrder": ["quality", "speed"],
"operationTypes": [ModelCapabilitiesEnum.IMAGE_GENERATE, ModelCapabilitiesEnum.ART, ModelCapabilitiesEnum.VISUAL_CREATION],
"minQualityRating": 8,
"maxCostPer1k": 0.1
},
OperationTypeEnum.WEB_RESEARCH: {
"priorityOrder": ["quality", "speed", "cost"],
"operationTypes": [ModelCapabilitiesEnum.TEXT_GENERATION, ModelCapabilitiesEnum.ANALYSIS],
"preferredTags": [ModelCapabilitiesEnum.WEB_SEARCH],
"minQualityRating": 7,
"maxCostPer1k": 0.02
},
OperationTypeEnum.PLAN: {
"priorityOrder": ["quality", "speed"],
"operationTypes": [ModelCapabilitiesEnum.TEXT_GENERATION, ModelCapabilitiesEnum.REASONING, ModelCapabilitiesEnum.ANALYSIS],
"preferredTags": [PriorityEnum.QUALITY],
"minQualityRating": 8,
"maxCostPer1k": 0.1
},
OperationTypeEnum.ANALYSE: {
"priorityOrder": ["quality", "speed"],
"operationTypes": [ModelCapabilitiesEnum.TEXT_GENERATION, ModelCapabilitiesEnum.ANALYSIS, ModelCapabilitiesEnum.REASONING],
"preferredTags": [PriorityEnum.QUALITY],
"minQualityRating": 8,
"maxCostPer1k": 0.1
}
}
def getRulesForOperation(self, operationType: str) -> List[SelectionRule]:
"""Get rules that apply to a specific operation type."""
return [rule for rule in self.rules if operationType in rule.operationTypes]
def getFallbackCriteria(self, operationType: str) -> Dict[str, Any]:
"""Get fallback selection criteria for a specific operation type."""
return self.fallbackModels.get(operationType, self.fallbackModels[OperationTypeEnum.GENERAL])
def addRule(self, rule: SelectionRule):
"""Add a new selection rule."""
self.rules.append(rule)
def removeRule(self, ruleName: str):
"""Remove a selection rule by name."""
self.rules = [rule for rule in self.rules if rule.name != ruleName]
def updateRule(self, ruleName: str, **kwargs):
"""Update an existing rule."""
for rule in self.rules:
if rule.name == ruleName:
for key, value in kwargs.items():
if hasattr(rule, key):
setattr(rule, key, value)
break
# Global configuration instance
model_selection_config = ModelSelectionConfig()

View file

@ -1,20 +1,20 @@
"""
Dynamic model selector using configurable rules and scoring.
Simplified model selection based on model properties and priority-based sorting.
No complex rules needed - just filter by properties and sort by priority!
"""
import logging
from typing import List, Optional, Dict, Any, Tuple
from modules.datamodels.datamodelAi import AiModel, AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum, ModelCapabilitiesEnum
from modules.aicore.aicoreModelSelectionConfig import model_selection_config
from typing import List, Dict, Any, Optional
from modules.datamodels.datamodelAi import AiModel, AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum
# Configure logger
logger = logging.getLogger(__name__)
class ModelSelector:
"""Dynamic model selector using configurable rules."""
"""Simple model selector based on properties and priority-based sorting."""
def __init__(self):
self.config = model_selection_config
logger.info("ModelSelector initialized with simplified approach")
def selectModel(self,
prompt: str,
@ -22,270 +22,7 @@ class ModelSelector:
options: AiCallOptions,
availableModels: List[AiModel]) -> Optional[AiModel]:
"""
Select the best model based on configurable rules and scoring.
Args:
prompt: User prompt
context: Context data
options: AI call options
availableModels: List of available models to choose from
Returns:
Selected model or None if no suitable model found
"""
if not availableModels:
logger.warning("No models available for selection")
return None
logger.info(f"Selecting model for operation: {options.operationType}, priority: {options.priority}")
# Calculate input size
inputSize = len(prompt.encode("utf-8")) + len(context.encode("utf-8"))
# Get applicable rules
rules = self.config.getRulesForOperation(options.operationType)
logger.debug(f"Found {len(rules)} applicable rules for {options.operationType}")
# Score each model
scoredModels = []
for model in availableModels:
if not model.isAvailable:
continue
score = self._calculateModelScore(model, inputSize, options, rules)
if score > 0: # Only consider models with positive scores
scoredModels.append((model, score))
logger.debug(f"Model {model.name}: score={score:.2f}")
if not scoredModels:
logger.warning("No models passed the selection criteria, trying fallback criteria")
# Try fallback criteria
fallbackCriteria = self.getFallbackCriteria(options.operationType)
return self._selectWithFallbackCriteria(availableModels, fallbackCriteria, inputSize, options)
# Sort by score (highest first)
scoredModels.sort(key=lambda x: x[1], reverse=True)
selectedModel = scoredModels[0][0]
selectedScore = scoredModels[0][1]
logger.info(f"Selected model: {selectedModel.name} (score: {selectedScore:.2f})")
# Log selection details
self._logSelectionDetails(selectedModel, inputSize, options)
return selectedModel
def _calculateModelScore(self,
model: AiModel,
inputSize: int,
options: AiCallOptions,
rules: List) -> float:
"""Calculate score for a model based on rules and criteria."""
score = 0.0
# Check basic requirements
if not self._meetsBasicRequirements(model, inputSize, options):
return 0.0
# Apply rules
for rule in rules:
ruleScore = self._applyRule(model, inputSize, options, rule)
score += ruleScore * rule.weight
# Apply priority-based scoring
priorityScore = self._applyPriorityScoring(model, options)
score += priorityScore
# Apply processing mode scoring
modeScore = self._applyProcessingModeScoring(model, options)
score += modeScore
# Apply cost constraints
if not self._meetsCostConstraints(model, inputSize, options):
score *= 0.1 # Heavily penalize but don't eliminate
return max(0.0, score)
def _meetsBasicRequirements(self, model: AiModel, inputSize: int, options: AiCallOptions) -> bool:
"""Check if model meets basic requirements."""
# Context length check
if model.contextLength > 0 and inputSize > model.contextLength * 0.8:
logger.debug(f"Model {model.name} rejected: input too large ({inputSize} > {model.contextLength * 0.8})")
return False
# Required operation types check
if options.operationTypes:
if not all(opType in model.operationTypes for opType in options.operationTypes):
logger.debug(f"Model {model.name} rejected: missing required operation types")
return False
# Capabilities check
if options.capabilities:
if not all(cap in model.capabilities for cap in options.capabilities):
logger.debug(f"Model {model.name} rejected: missing required capabilities")
return False
# Avoid operation types check
for rule in self.config.getRulesForOperation(options.operationType):
if any(opType in model.operationTypes for opType in rule.avoidOperationTypes):
logger.debug(f"Model {model.name} rejected: has avoid operation types")
return False
return True
def _applyRule(self, model: AiModel, inputSize: int, options: AiCallOptions, rule) -> float:
"""Apply a specific rule to calculate score contribution."""
score = 0.0
# Required operation types match
if all(opType in model.operationTypes for opType in rule.operationTypes):
score += 1.0
# Preferred capabilities match
preferredMatches = sum(1 for cap in rule.preferredCapabilities if cap in model.capabilities)
if rule.preferredCapabilities:
score += (preferredMatches / len(rule.preferredCapabilities)) * 0.5
# Quality rating check
if rule.minQualityRating and model.qualityRating >= rule.minQualityRating:
score += 0.3
# Context length check
if rule.minContextLength and model.contextLength >= rule.minContextLength:
score += 0.2
return score
def _applyPriorityScoring(self, model: AiModel, options: AiCallOptions) -> float:
"""Apply priority-based scoring."""
if options.priority == PriorityEnum.SPEED:
return model.speedRating * 0.1
elif options.priority == PriorityEnum.QUALITY:
return model.qualityRating * 0.1
elif options.priority == PriorityEnum.COST:
# Lower cost = higher score
costScore = max(0, 1.0 - (model.costPer1kTokensInput * 1000))
return costScore * 0.1
else: # BALANCED
return (model.qualityRating + model.speedRating) * 0.05
def _applyProcessingModeScoring(self, model: AiModel, options: AiCallOptions) -> float:
"""Apply processing mode scoring."""
if options.processingMode == ProcessingModeEnum.DETAILED:
if model.priority == PriorityEnum.QUALITY:
return 0.2
elif options.processingMode == ProcessingModeEnum.BASIC:
if model.priority == PriorityEnum.SPEED:
return 0.2
return 0.0
def _meetsCostConstraints(self, model: AiModel, inputSize: int, options: AiCallOptions) -> bool:
"""Check if model meets cost constraints."""
if options.maxCost is None:
return True
# Estimate cost
estimatedTokens = inputSize / 4
estimatedCost = (estimatedTokens / 1000) * model.costPer1kTokensInput
return estimatedCost <= options.maxCost
def _logSelectionDetails(self, model: AiModel, inputSize: int, options: AiCallOptions):
"""Log detailed selection information."""
logger.info(f"Model Selection Details:")
logger.info(f" Selected: {model.displayName} ({model.name})")
logger.info(f" Connector: {model.connectorType}")
logger.info(f" Operation: {options.operationType}")
logger.info(f" Priority: {options.priority}")
logger.info(f" Processing Mode: {options.processingMode}")
logger.info(f" Input Size: {inputSize} bytes")
logger.info(f" Context Length: {model.contextLength}")
logger.info(f" Max Tokens: {model.maxTokens}")
logger.info(f" Quality Rating: {model.qualityRating}/10")
logger.info(f" Speed Rating: {model.speedRating}/10")
logger.info(f" Cost: ${model.costPer1kTokensInput:.4f}/1k tokens")
logger.info(f" Capabilities: {', '.join(model.capabilities)}")
logger.info(f" Priority: {model.priority}")
def getFallbackCriteria(self, operationType: str) -> Dict[str, Any]:
"""Get fallback selection criteria for an operation type."""
return self.config.getFallbackCriteria(operationType)
def _selectWithFallbackCriteria(self,
availableModels: List[AiModel],
fallbackCriteria: Dict[str, Any],
inputSize: int,
options: AiCallOptions) -> Optional[AiModel]:
"""Select model using fallback criteria when normal selection fails."""
logger.info("Using fallback criteria for model selection")
# Filter models by fallback criteria
candidates = []
for model in availableModels:
if not model.isAvailable:
continue
# Check required operation types
if fallbackCriteria.get("operationTypes"):
if not all(opType in model.operationTypes for opType in fallbackCriteria["operationTypes"]):
continue
# Check quality rating
if fallbackCriteria.get("minQualityRating"):
if model.qualityRating < fallbackCriteria["minQualityRating"]:
continue
# Check cost
if fallbackCriteria.get("maxCostPer1k"):
if model.costPer1kTokensInput > fallbackCriteria["maxCostPer1k"]:
continue
# Check context length
if model.contextLength > 0 and inputSize > model.contextLength * 0.8:
continue
candidates.append(model)
if not candidates:
logger.error("No models available even with fallback criteria")
return None
# Sort by priority order from fallback criteria
priorityOrder = fallbackCriteria.get("priorityOrder", ["quality", "speed", "cost"])
def _getPriorityScore(model: AiModel) -> float:
score = 0.0
for i, priority in enumerate(priorityOrder):
weight = len(priorityOrder) - i # Higher weight for earlier priorities
if priority == "quality":
score += model.qualityRating * weight
elif priority == "speed":
score += model.speedRating * weight
elif priority == "cost":
# Lower cost = higher score
score += (1.0 - model.costPer1kTokensInput * 1000) * weight
return score
candidates.sort(key=_getPriorityScore, reverse=True)
selectedModel = candidates[0]
logger.info(f"Fallback selection: {selectedModel.name} (score: {_getPriorityScore(selectedModel):.2f})")
return selectedModel
def getFallbackModels(self,
prompt: str,
context: str,
options: AiCallOptions,
availableModels: List[AiModel]) -> List[AiModel]:
"""
Get prioritized list of models for fallback sequence.
Steps:
1. Filter models by capability requirements
2. Rate models by business requirements (priority, processing mode)
3. Sort by rating (descending), then by cost (ascending)
Select the best model using simple filtering and priority-based sorting.
Args:
prompt: User prompt
@ -294,93 +31,195 @@ class ModelSelector:
availableModels: List of available models
Returns:
Prioritized list of models for fallback sequence
Best model for the request, or None if no suitable model found
"""
if not availableModels:
logger.warning("No models available for fallback selection")
try:
# Get failover models (which includes all filtering and sorting)
failoverModelList = self.getFailoverModelList(prompt, context, options, availableModels)
if not failoverModelList:
logger.warning("No suitable models found for the request")
return None
selectedModel = failoverModelList[0] # First model is the best one
logger.info(f"Selected model: {selectedModel.name} (quality: {selectedModel.qualityRating}, cost: ${selectedModel.costPer1kTokensInput:.4f})")
return selectedModel
except Exception as e:
logger.error(f"Error selecting model: {str(e)}")
return None
def getFailoverModelList(self,
prompt: str,
context: str,
options: AiCallOptions,
availableModels: List[AiModel]) -> List[AiModel]:
"""
Get prioritized list of models using scoring-based ranking.
Args:
prompt: User prompt
context: Context data
options: AI call options
availableModels: List of available models
Returns:
List of models sorted by score (descending)
"""
try:
promptSize = len(prompt.encode("utf-8"))
contextSize = len(context.encode("utf-8"))
totalSize = promptSize + contextSize
# Step 1: Filter by operation type (MUST match)
operationFiltered = [m for m in availableModels if options.operationType in m.operationTypes]
logger.debug(f"After operation type filtering: {len(operationFiltered)} models")
# Step 2: Filter by prompt size (MUST be <= 80% of context size)
promptFiltered = [m for m in operationFiltered if m.contextLength == 0 or promptSize <= m.contextLength * 0.8]
logger.debug(f"After prompt size filtering: {len(promptFiltered)} models")
# Step 3: Calculate scores for each model
scoredModels = []
for model in promptFiltered:
score = self._calculateModelScore(model, promptSize, contextSize, totalSize, options)
scoredModels.append((model, score))
logger.debug(f"Model {model.name}: score={score:.3f}")
# Step 4: Sort by score (descending)
scoredModels.sort(key=lambda x: x[1], reverse=True)
sortedModels = [model for model, score in scoredModels]
logger.debug(f"Final sorted models: {len(sortedModels)} models")
return sortedModels
except Exception as e:
logger.error(f"Error getting failover models: {str(e)}")
return []
logger.info(f"Building fallback sequence for operation: {options.operationType}, priority: {options.priority}")
def _calculateModelScore(self, model: AiModel, promptSize: int, contextSize: int, totalSize: int, options: AiCallOptions) -> float:
"""
Calculate a score for a model based on how well it fulfills the criteria.
# Step 1: Filter by capability requirements
capableModels = self._filterByCapabilities(availableModels, options)
logger.info(f"Step 1 - Capable models: {[m.name for m in capableModels]}")
if not capableModels:
logger.warning("No models meet capability requirements")
return []
# Step 2: Rate models by business requirements
ratedModels = self._rateModelsByBusinessRequirements(capableModels, prompt, context, options)
logger.info(f"Step 2 - Rated models: {[(m.name, rating) for m, rating in ratedModels]}")
# Step 3: Sort by rating (descending), then by cost (ascending)
sortedModels = self._sortModelsByRatingAndCost(ratedModels)
logger.info(f"Step 3 - Sorted fallback sequence: {[m.name for m in sortedModels]}")
return sortedModels
def _filterByCapabilities(self, models: List[AiModel], options: AiCallOptions) -> List[AiModel]:
"""Filter models by required capabilities."""
capableModels = []
for model in models:
if not model.isAvailable:
continue
Args:
model: The model to score
promptSize: Size of the prompt in bytes
contextSize: Size of the context in bytes
totalSize: Total size (prompt + context) in bytes
options: AI call options
# Check if model supports required capabilities
if options.capabilities:
if not all(cap in model.capabilities for cap in options.capabilities):
logger.debug(f"Model {model.name} missing required capabilities: {options.capabilities}")
continue
# Check operation type compatibility
if not self._meetsBasicRequirements(model, options):
logger.debug(f"Model {model.name} doesn't meet basic requirements")
continue
capableModels.append(model)
Returns:
Score for the model (higher is better)
"""
score = 0.0
return capableModels
def _rateModelsByBusinessRequirements(self,
models: List[AiModel],
prompt: str,
context: str,
options: AiCallOptions) -> List[Tuple[AiModel, float]]:
"""Rate models based on business requirements (priority, processing mode)."""
ratedModels = []
inputSize = len(prompt.encode("utf-8")) + len(context.encode("utf-8"))
# 1. Prompt + Context size rating
if model.contextLength > 0:
modelMaxSize = model.contextLength * 0.8 # 80% of model context length
if totalSize <= modelMaxSize:
# Within limits: rating = (prompt+contextsize) / (80% modelsize)
score += totalSize / modelMaxSize
else:
# Exceeds limits: rating = modelsize / (prompt+contextsize) (ensures minimum chunks)
score += modelMaxSize / totalSize
else:
# No context length limit
score += 1.0
for model in models:
# Base score from model selection logic
baseScore = self._calculateModelScore(model, inputSize, options, [])
# Apply priority-based scoring
priorityScore = self._applyPriorityScoring(model, options)
# Apply processing mode scoring
processingScore = self._applyProcessingModeScoring(model, options)
# Combine scores
totalScore = baseScore + priorityScore + processingScore
ratedModels.append((model, totalScore))
logger.debug(f"Model {model.name}: base={baseScore:.2f}, priority={priorityScore:.2f}, processing={processingScore:.2f}, total={totalScore:.2f}")
# 2. Processing Mode rating
if hasattr(options, 'processingMode') and options.processingMode:
score += self._getProcessingModeRating(model.processingMode, options.processingMode)
else:
score += 1.0 # No preference
return ratedModels
def _sortModelsByRatingAndCost(self, ratedModels: List[Tuple[AiModel, float]]) -> List[AiModel]:
"""Sort models by rating (descending), then by cost (ascending)."""
def sortKey(item):
model, rating = item
# Primary sort: rating (descending)
# Secondary sort: cost (ascending)
return (-rating, model.costPer1kTokensInput)
# 3. Priority rating
if hasattr(options, 'priority') and options.priority:
score += self._getPriorityRating(model, options.priority)
else:
score += 1.0 # No preference
sortedItems = sorted(ratedModels, key=sortKey)
return [model for model, rating in sortedItems]
return score
def _getProcessingModeRating(self, modelMode: ProcessingModeEnum, requestedMode: ProcessingModeEnum) -> float:
"""Get processing mode rating based on compatibility."""
if modelMode == requestedMode:
return 1.0
# Compatibility matrix
if requestedMode == ProcessingModeEnum.BASIC:
if modelMode == ProcessingModeEnum.ADVANCED:
return 0.5
elif modelMode == ProcessingModeEnum.DETAILED:
return 0.2
elif requestedMode == ProcessingModeEnum.ADVANCED:
if modelMode == ProcessingModeEnum.BASIC:
return 0.2
elif modelMode == ProcessingModeEnum.DETAILED:
return 0.5
elif requestedMode == ProcessingModeEnum.DETAILED:
if modelMode == ProcessingModeEnum.BASIC:
return 0.2
elif modelMode == ProcessingModeEnum.ADVANCED:
return 0.5
return 0.0 # No compatibility
def _getPriorityRating(self, model: AiModel, requestedPriority: PriorityEnum) -> float:
"""Get priority rating based on model capabilities."""
if requestedPriority == PriorityEnum.BALANCED:
return 1.0
elif requestedPriority == PriorityEnum.SPEED:
return model.speedRating / 10.0
elif requestedPriority == PriorityEnum.QUALITY:
return model.qualityRating / 10.0
elif requestedPriority == PriorityEnum.COST:
# Cost priority: cost gives 1, speed gives 0.5, quality gives 0.2
# Lower cost is better, so we invert the cost rating
costRating = 1.0 - (model.costPer1kTokensInput / 0.1) # Normalize to 0-1
costRating = max(0, costRating) # Ensure non-negative
speedRating = model.speedRating / 10.0 * 0.5
qualityRating = model.qualityRating / 10.0 * 0.2
return costRating + speedRating + qualityRating
return 1.0 # Default
def _getSizeRating(self, model: AiModel, totalSize: int) -> float:
"""Get size rating for a model based on total input size."""
if model.contextLength > 0:
modelMaxSize = model.contextLength * 0.8 # 80% of model context length
if totalSize <= modelMaxSize:
# Within limits: rating = (prompt+contextsize) / (80% modelsize)
return totalSize / modelMaxSize
else:
# Exceeds limits: rating = modelsize / (prompt+contextsize) (ensures minimum chunks)
return modelMaxSize / totalSize
else:
# No context length limit
return 1.0
def _logModelDetails(self, model: AiModel):
"""Log detailed information about a model."""
logger.info(f"Model: {model.name}")
logger.info(f" Display Name: {model.displayName}")
logger.info(f" Connector: {model.connectorType}")
logger.info(f" Context Length: {model.contextLength}")
logger.info(f" Max Tokens: {model.maxTokens}")
logger.info(f" Quality Rating: {model.qualityRating}/10")
logger.info(f" Speed Rating: {model.speedRating}/10")
logger.info(f" Cost: ${model.costPer1kTokensInput:.4f}/1k tokens")
logger.info(f" Capabilities: {', '.join(model.capabilities)}")
logger.info(f" Priority: {model.priority}")
logger.info(f" Processing Mode: {model.processingMode}")
logger.info(f" Operation Types: {', '.join(model.operationTypes)}")
# Global selector instance
model_selector = ModelSelector()
# Global model selector instance
modelSelector = ModelSelector()

View file

@ -63,7 +63,7 @@ class AiAnthropic(BaseConnectorAi):
functionCall=self.callAiBasic,
priority=PriorityEnum.QUALITY,
processingMode=ProcessingModeEnum.DETAILED,
operationTypes=[OperationTypeEnum.PLAN, OperationTypeEnum.ANALYSE],
operationTypes=[OperationTypeEnum.GENERAL, OperationTypeEnum.PLAN, OperationTypeEnum.ANALYSE, OperationTypeEnum.GENERATE],
version="claude-3-5-sonnet-20241022",
calculatePriceUsd=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.015 + (bytesReceived / 4 / 1000) * 0.075
),

View file

@ -34,7 +34,7 @@ class AiInternal(BaseConnectorAi):
functionCall=self.extractDocument,
priority=PriorityEnum.COST,
processingMode=ProcessingModeEnum.BASIC,
operationTypes=[OperationTypeEnum.GENERAL],
operationTypes=[OperationTypeEnum.EXTRACT],
version="internal-extractor-v1",
calculatePriceUsd=lambda processingTime, bytesSent, bytesReceived: 0.001 + (bytesSent + bytesReceived) / (1024 * 1024) * 0.01
),

View file

@ -65,7 +65,7 @@ class AiOpenai(BaseConnectorAi):
functionCall=self.callAiBasic,
priority=PriorityEnum.BALANCED,
processingMode=ProcessingModeEnum.ADVANCED,
operationTypes=[OperationTypeEnum.GENERAL, OperationTypeEnum.ANALYSE],
operationTypes=[OperationTypeEnum.GENERAL, OperationTypeEnum.PLAN, OperationTypeEnum.ANALYSE, OperationTypeEnum.GENERATE],
version="gpt-4o",
calculatePriceUsd=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.03 + (bytesReceived / 4 / 1000) * 0.06
),
@ -83,7 +83,7 @@ class AiOpenai(BaseConnectorAi):
functionCall=self.callAiBasic,
priority=PriorityEnum.SPEED,
processingMode=ProcessingModeEnum.BASIC,
operationTypes=[OperationTypeEnum.GENERAL],
operationTypes=[OperationTypeEnum.GENERAL, OperationTypeEnum.PLAN, OperationTypeEnum.GENERATE],
version="gpt-3.5-turbo",
calculatePriceUsd=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.0015 + (bytesReceived / 4 / 1000) * 0.002
),

View file

@ -63,7 +63,7 @@ class AiPerplexity(BaseConnectorAi):
functionCall=self.callAiBasic,
priority=PriorityEnum.BALANCED,
processingMode=ProcessingModeEnum.ADVANCED,
operationTypes=[OperationTypeEnum.GENERAL, OperationTypeEnum.WEB_RESEARCH],
operationTypes=[OperationTypeEnum.GENERAL, OperationTypeEnum.PLAN, OperationTypeEnum.ANALYSE, OperationTypeEnum.GENERATE, OperationTypeEnum.WEB_RESEARCH],
version="llama-3.1-sonar-large-128k-online",
calculatePriceUsd=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.005 + (bytesReceived / 4 / 1000) * 0.005
),

View file

@ -1,13 +1,17 @@
from typing import Optional, List, Dict, Any, Literal, Callable
from typing import Optional, List, Dict, Any, Literal, Callable, TYPE_CHECKING
from pydantic import BaseModel, Field
from enum import Enum
if TYPE_CHECKING:
from modules.datamodels.datamodelExtraction import ContentPart
# Operation Types
class OperationTypeEnum(str, Enum):
GENERAL = "general"
PLAN = "plan"
ANALYSE = "analyse"
GENERATE = "generate"
EXTRACT = "extract"
WEB_RESEARCH = "webResearch"
IMAGE_ANALYSE = "imageAnalyse"
IMAGE_GENERATE = "imageGenerate"
@ -141,6 +145,7 @@ class AiCallRequest(BaseModel):
prompt: str = Field(description="The user prompt")
context: Optional[str] = Field(default=None, description="Optional external context (e.g., extracted docs)")
options: AiCallOptions = Field(default_factory=AiCallOptions)
contentParts: Optional[List['ContentPart']] = None # NEW: Content parts for model-aware chunking
class AiCallResponse(BaseModel):

View file

@ -28,6 +28,16 @@ class ChunkResult(BaseModel):
metadata: Dict[str, Any] = Field(default_factory=dict)
class PartResult(BaseModel):
"""Preserves the relationship between a content part and its AI result."""
originalPart: ContentPart
aiResult: str
partIndex: int
documentId: str
processingTime: float = 0.0
metadata: Dict[str, Any] = Field(default_factory=dict)
class MergeStrategy(BaseModel):
"""Strategy configuration for merging content parts and AI results."""

View file

@ -7,7 +7,7 @@ import time
logger = logging.getLogger(__name__)
from modules.aicore.aicoreModelRegistry import modelRegistry
from modules.aicore.aicoreModelSelector import model_selector
from modules.aicore.aicoreModelSelector import modelSelector
from modules.datamodels.datamodelAi import (
AiModel,
AiCallOptions,
@ -70,7 +70,7 @@ class AiObjects:
raise ValueError("No AI models available")
# Use the dynamic model selector
selectedModel = model_selector.selectModel(prompt, context, options, availableModels)
selectedModel = modelSelector.selectModel(prompt, context, options, availableModels)
if not selectedModel:
logger.error("No suitable model found for the given criteria")
@ -81,8 +81,15 @@ class AiObjects:
async def call(self, request: AiCallRequest) -> AiCallResponse:
"""Call AI model for text generation with fallback mechanism."""
"""Call AI model for text generation with model-aware chunking."""
# Handle content parts (unified path)
if hasattr(request, 'contentParts') and request.contentParts:
return await self._callWithContentParts(request)
# Handle traditional text/context calls
return await self._callWithTextContext(request)
async def _callWithTextContext(self, request: AiCallRequest) -> AiCallResponse:
"""Call AI model for traditional text/context calls with fallback mechanism."""
prompt = request.prompt
context = request.context or ""
options = request.options
@ -108,11 +115,11 @@ class AiObjects:
temperature = 0.2
maxTokens = getattr(options, "maxTokens", None)
# Get fallback models for this operation type
# Get failover models for this operation type
availableModels = modelRegistry.getAvailableModels()
fallbackModels = model_selector.getFallbackModels(prompt, context, options, availableModels)
failoverModelList = modelSelector.getFailoverModelList(prompt, context, options, availableModels)
if not fallbackModels:
if not failoverModelList:
errorMsg = f"No suitable models found for operation {options.operationType}"
logger.error(errorMsg)
return AiCallResponse(
@ -125,11 +132,11 @@ class AiObjects:
errorCount=1
)
# Try each model in fallback sequence
# Try each model in failover sequence
lastError = None
for attempt, model in enumerate(fallbackModels):
for attempt, model in enumerate(failoverModelList):
try:
logger.info(f"Attempting AI call with model: {model.name} (attempt {attempt + 1}/{len(fallbackModels)})")
logger.info(f"Attempting AI call with model: {model.name} (attempt {attempt + 1}/{len(failoverModelList)})")
# Call the model
response = await self._callWithModel(model, prompt, context, temperature, maxTokens, inputBytes)
@ -142,15 +149,15 @@ class AiObjects:
logger.warning(f"❌ AI call failed with model {model.name}: {str(e)}")
# If this is not the last model, try the next one
if attempt < len(fallbackModels) - 1:
logger.info(f"🔄 Trying next fallback model...")
if attempt < len(failoverModelList) - 1:
logger.info(f"🔄 Trying next failover model...")
continue
else:
# All models failed
logger.error(f"💥 All {len(fallbackModels)} models failed for operation {options.operationType}")
logger.error(f"💥 All {len(failoverModelList)} models failed for operation {options.operationType}")
break
# All fallback attempts failed - return error response
# All failover attempts failed - return error response
errorMsg = f"All AI models failed for operation {options.operationType}. Last error: {str(lastError)}"
logger.error(errorMsg)
return AiCallResponse(
@ -163,6 +170,241 @@ class AiObjects:
errorCount=1
)
async def _callWithContentParts(self, request: AiCallRequest) -> AiCallResponse:
"""Process content parts with model-aware chunking (unified for single and multiple parts)."""
prompt = request.prompt
options = request.options
contentParts = request.contentParts
# Get failover models
availableModels = modelRegistry.getAvailableModels()
failoverModelList = modelSelector.getFailoverModelList(prompt, "", options, availableModels)
if not failoverModelList:
return self._createErrorResponse("No suitable models found", 0, 0)
# Process each content part
allResults = []
for contentPart in contentParts:
partResult = await self._processContentPartWithFallback(contentPart, prompt, options, failoverModelList)
allResults.append(partResult)
# Merge all results
mergedContent = self._mergePartResults(allResults)
return AiCallResponse(
content=mergedContent,
modelName="multiple",
priceUsd=sum(r.priceUsd for r in allResults),
processingTime=sum(r.processingTime for r in allResults),
bytesSent=sum(r.bytesSent for r in allResults),
bytesReceived=sum(r.bytesReceived for r in allResults),
errorCount=sum(r.errorCount for r in allResults)
)
async def _processContentPartWithFallback(self, contentPart, prompt: str, options, failoverModelList) -> AiCallResponse:
"""Process a single content part with model-aware chunking and fallback."""
lastError = None
for attempt, model in enumerate(failoverModelList):
try:
logger.info(f"Processing content part with model: {model.name} (attempt {attempt + 1}/{len(failoverModelList)})")
# Check if part fits in model context
partSize = len(contentPart.data.encode('utf-8')) if contentPart.data else 0
modelContextBytes = model.contextLength * 4 # Convert tokens to bytes
if partSize <= modelContextBytes:
# Part fits - call AI directly
response = await self._callWithModel(model, prompt, contentPart.data, 0.2, None, partSize)
logger.info(f"✅ Content part processed successfully with model: {model.name}")
return response
else:
# Part too large - chunk it
chunks = await self._chunkContentPart(contentPart, model, options)
if not chunks:
raise ValueError(f"Failed to chunk content part for model {model.name}")
# Process each chunk
chunkResults = []
for chunk in chunks:
chunkResponse = await self._callWithModel(model, prompt, chunk['data'], 0.2, None, chunk['size'])
chunkResults.append(chunkResponse)
# Merge chunk results
mergedContent = self._mergeChunkResults(chunkResults)
totalPrice = sum(r.priceUsd for r in chunkResults)
totalTime = sum(r.processingTime for r in chunkResults)
totalBytesSent = sum(r.bytesSent for r in chunkResults)
totalBytesReceived = sum(r.bytesReceived for r in chunkResults)
totalErrors = sum(r.errorCount for r in chunkResults)
logger.info(f"✅ Content part chunked and processed with model: {model.name} ({len(chunks)} chunks)")
return AiCallResponse(
content=mergedContent,
modelName=model.name,
priceUsd=totalPrice,
processingTime=totalTime,
bytesSent=totalBytesSent,
bytesReceived=totalBytesReceived,
errorCount=totalErrors
)
except Exception as e:
lastError = e
logger.warning(f"❌ Model {model.name} failed for content part: {str(e)}")
if attempt < len(failoverModelList) - 1:
logger.info(f"🔄 Trying next failover model...")
continue
else:
logger.error(f"💥 All {len(failoverModelList)} models failed for content part")
break
# All models failed
return self._createErrorResponse(f"All models failed: {str(lastError)}", 0, 0)
async def _chunkContentPart(self, contentPart, model, options) -> List[Dict[str, Any]]:
"""Chunk a content part based on model capabilities."""
# Calculate model-specific chunk sizes
modelContextBytes = model.contextLength * 4 # Convert tokens to bytes
maxContextBytes = int(modelContextBytes * 0.9) # 90% of context length
textChunkSize = int(maxContextBytes * 0.7) # 70% of max context for text chunks
imageChunkSize = int(maxContextBytes * 0.8) # 80% of max context for image chunks
# Build chunking options
chunkingOptions = {
"textChunkSize": textChunkSize,
"imageChunkSize": imageChunkSize,
"maxSize": maxContextBytes,
"chunkAllowed": True
}
# Get appropriate chunker
from modules.services.serviceExtraction.subRegistry import ChunkerRegistry
chunkerRegistry = ChunkerRegistry()
chunker = chunkerRegistry.resolve(contentPart.typeGroup)
if not chunker:
logger.warning(f"No chunker found for typeGroup: {contentPart.typeGroup}")
return []
# Chunk the content part
try:
chunks = chunker.chunk(contentPart, chunkingOptions)
logger.debug(f"Created {len(chunks)} chunks for {contentPart.typeGroup} part")
return chunks
except Exception as e:
logger.error(f"Chunking failed for {contentPart.typeGroup}: {str(e)}")
return []
def _mergePartResults(self, partResults: List[AiCallResponse]) -> str:
"""Merge part results using the existing sophisticated merging system."""
if not partResults:
return ""
# Convert AiCallResponse results to ContentParts for merging
from modules.datamodels.datamodelExtraction import ContentPart
from modules.services.serviceExtraction.subUtils import makeId
content_parts = []
for i, result in enumerate(partResults):
if result.content:
content_part = ContentPart(
id=makeId(),
parentId=None,
label=f"ai_result_{i}",
typeGroup="text", # Default to text for AI results
mimeType="text/plain",
data=result.content,
metadata={
"aiResult": True,
"modelName": result.modelName,
"priceUsd": result.priceUsd,
"processingTime": result.processingTime,
"bytesSent": result.bytesSent,
"bytesReceived": result.bytesReceived
}
)
content_parts.append(content_part)
# Use existing merging system
merge_strategy = {
"useIntelligentMerging": True,
"groupBy": "typeGroup",
"orderBy": "id",
"mergeType": "concatenate"
}
from modules.services.serviceExtraction.subPipeline import _applyMerging
merged_parts = _applyMerging(content_parts, merge_strategy)
# Convert merged parts back to final string
final_content = "\n\n".join([part.data for part in merged_parts])
logger.info(f"Merged {len(partResults)} AI results using existing merging system")
return final_content.strip()
def _mergeChunkResults(self, chunkResults: List[AiCallResponse]) -> str:
"""Merge chunk results using the existing sophisticated merging system."""
if not chunkResults:
return ""
# Convert AiCallResponse results to ContentParts for merging
from modules.datamodels.datamodelExtraction import ContentPart
from modules.services.serviceExtraction.subUtils import makeId
content_parts = []
for i, result in enumerate(chunkResults):
if result.content:
content_part = ContentPart(
id=makeId(),
parentId=None,
label=f"chunk_result_{i}",
typeGroup="text", # Default to text for AI results
mimeType="text/plain",
data=result.content,
metadata={
"aiResult": True,
"chunk": True,
"modelName": result.modelName,
"priceUsd": result.priceUsd,
"processingTime": result.processingTime,
"bytesSent": result.bytesSent,
"bytesReceived": result.bytesReceived
}
)
content_parts.append(content_part)
# Use existing merging system
merge_strategy = {
"useIntelligentMerging": True,
"groupBy": "typeGroup",
"orderBy": "id",
"mergeType": "concatenate"
}
from modules.services.serviceExtraction.subPipeline import _applyMerging
merged_parts = _applyMerging(content_parts, merge_strategy)
# Convert merged parts back to final string
final_content = "\n\n".join([part.data for part in merged_parts])
logger.info(f"Merged {len(chunkResults)} chunk results using existing merging system")
return final_content.strip()
def _createErrorResponse(self, errorMsg: str, inputBytes: int, outputBytes: int) -> AiCallResponse:
"""Create an error response."""
return AiCallResponse(
content=errorMsg,
modelName="error",
priceUsd=0.0,
processingTime=0.0,
bytesSent=inputBytes,
bytesReceived=outputBytes,
errorCount=1
)
async def _callWithModel(self, model: AiModel, prompt: str, context: str, temperature: float, maxTokens: int, inputBytes: int) -> AiCallResponse:
"""Call a specific model and return the response."""
# Replace <TOKEN_LIMIT> placeholder in prompt for this specific model
@ -245,9 +487,9 @@ class AiObjects:
# Get fallback models for image analysis
availableModels = modelRegistry.getAvailableModels()
fallbackModels = model_selector.getFallbackModels(prompt, "", options, availableModels)
failoverModelList = modelSelector.getFailoverModelList(prompt, "", options, availableModels)
if not fallbackModels:
if not failoverModelList:
errorMsg = f"No suitable models found for image analysis"
logger.error(errorMsg)
return AiCallResponse(
@ -262,9 +504,9 @@ class AiObjects:
# Try each model in fallback sequence
lastError = None
for attempt, model in enumerate(fallbackModels):
for attempt, model in enumerate(failoverModelList):
try:
logger.info(f"Attempting image analysis with model: {model.name} (attempt {attempt + 1}/{len(fallbackModels)})")
logger.info(f"Attempting image analysis with model: {model.name} (attempt {attempt + 1}/{len(failoverModelList)})")
# Call the model
response = await self._callImageWithModel(model, prompt, imageData, mimeType, inputBytes)
@ -277,12 +519,12 @@ class AiObjects:
logger.warning(f"❌ Image analysis failed with model {model.name}: {str(e)}")
# If this is not the last model, try the next one
if attempt < len(fallbackModels) - 1:
if attempt < len(failoverModelList) - 1:
logger.info(f"🔄 Trying next fallback model for image analysis...")
continue
else:
# All models failed
logger.error(f"💥 All {len(fallbackModels)} models failed for image analysis")
logger.error(f"💥 All {len(failoverModelList)} models failed for image analysis")
break
# All fallback attempts failed - return error response

View file

@ -54,8 +54,8 @@ class SubDocumentProcessing:
options: Optional[AiCallOptions] = None
) -> str:
"""
Process documents with per-chunk AI calls and merge results.
FIXED: Now preserves chunk relationships and document structure.
Process documents with model-aware chunking and merge results.
NEW: Uses model-aware chunking in AI call phase instead of extraction phase.
Args:
documents: List of ChatDocument objects to process
@ -68,23 +68,14 @@ class SubDocumentProcessing:
if not documents:
return ""
# Get model capabilities for size calculation
model_capabilities = self._getModelCapabilitiesForContent(prompt, documents, options)
# Build extraction options for chunking with intelligent merging
# Build extraction options WITHOUT chunking parameters
extractionOptions: Dict[str, Any] = {
"prompt": prompt,
"operationType": options.operationType if options else "general",
"processDocumentsIndividually": True, # Process each document separately
"maxSize": model_capabilities["maxContextBytes"],
"chunkAllowed": True,
"textChunkSize": model_capabilities["textChunkSize"],
"imageChunkSize": model_capabilities["imageChunkSize"],
"imageMaxPixels": 1024 * 1024,
"imageQuality": 85,
"processDocumentsIndividually": True,
# REMOVED: maxSize, textChunkSize, imageChunkSize
"mergeStrategy": {
"useIntelligentMerging": True, # Enable intelligent token-aware merging
"capabilities": model_capabilities,
"useIntelligentMerging": True,
"prompt": prompt,
"groupBy": "typeGroup",
"orderBy": "id",
@ -95,17 +86,17 @@ class SubDocumentProcessing:
logger.debug(f"Per-chunk extraction options: prompt length={len(extractionOptions.get('prompt', ''))} chars, operationType={extractionOptions.get('operationType')}")
try:
# Extract content with chunking
# Extract content WITHOUT chunking
extractionResult = self.extractionService.extractContent(documents, extractionOptions)
if not isinstance(extractionResult, list):
return "[Error: No extraction results]"
# FIXED: Process chunks with proper mapping
chunkResults = await self._processChunksWithMapping(extractionResult, prompt, options)
# Process parts (not chunks) with model-aware AI calls
partResults = await self._processPartsWithMapping(extractionResult, prompt, options)
# FIXED: Merge with preserved chunk relationships
mergedContent = self._mergeChunkResults(chunkResults, options)
# Merge results using existing merging system
mergedContent = self._mergePartResults(partResults, options)
# Save merged extraction content to debug
self.services.utils.writeDebugFile(mergedContent or '', "extractionMergedText")
@ -123,29 +114,19 @@ class SubDocumentProcessing:
options: Optional[AiCallOptions] = None
) -> Dict[str, Any]:
"""
Process documents with per-chunk AI calls and merge results in JSON mode.
Process documents with model-aware chunking and merge results in JSON mode.
Returns structured JSON document instead of text.
"""
if not documents:
return {"metadata": {"title": "Empty Document"}, "sections": []}
# Get model capabilities for size calculation
model_capabilities = self._getModelCapabilitiesForContent(prompt, documents, options)
# Build extraction options for chunking with intelligent merging
# Build extraction options WITHOUT chunking parameters
extractionOptions: Dict[str, Any] = {
"prompt": prompt,
"operationType": options.operationType if options else "general",
"processDocumentsIndividually": True, # Process each document separately
"maxSize": model_capabilities["maxContextBytes"],
"chunkAllowed": True,
"textChunkSize": model_capabilities["textChunkSize"],
"imageChunkSize": model_capabilities["imageChunkSize"],
"imageMaxPixels": 1024 * 1024,
"imageQuality": 85,
"processDocumentsIndividually": True,
"mergeStrategy": {
"useIntelligentMerging": True, # Enable intelligent token-aware merging
"capabilities": model_capabilities,
"useIntelligentMerging": True,
"prompt": prompt,
"groupBy": "typeGroup",
"orderBy": "id",
@ -156,17 +137,17 @@ class SubDocumentProcessing:
logger.debug(f"Per-chunk extraction options (JSON mode): prompt length={len(extractionOptions.get('prompt', ''))} chars, operationType={extractionOptions.get('operationType')}")
try:
# Extract content with chunking
# Extract content WITHOUT chunking
extractionResult = self.extractionService.extractContent(documents, extractionOptions)
if not isinstance(extractionResult, list):
return {"metadata": {"title": "Error Document"}, "sections": []}
# Process chunks with proper mapping
chunkResults = await self._processChunksWithMapping(extractionResult, prompt, options, generate_json=True)
# Process parts with model-aware chunking
partResults = await self._processPartsWithMapping(extractionResult, prompt, options)
# Merge with JSON mode
mergedJsonDocument = self._mergeChunkResultsJson(chunkResults, options)
# Convert to JSON format (simplified for now)
mergedJsonDocument = self._convertPartResultsToJson(partResults, options)
# Normalize merged JSON into a single canonical table (only if table content exists)
try:
@ -505,6 +486,127 @@ CONTINUATION INSTRUCTIONS:
"""
return await self.processDocumentsPerChunk(documents, prompt, options)
async def _processPartsWithMapping(
self,
extractionResult: List[ContentExtracted],
prompt: str,
options: Optional[AiCallOptions] = None
) -> List['PartResult']:
"""Process content parts with model-aware chunking and proper mapping."""
from modules.datamodels.datamodelExtraction import PartResult
import asyncio
# Collect all parts that need processing
parts_to_process = []
part_index = 0
for ec in extractionResult:
for part in ec.parts:
if part.typeGroup in ("text", "table", "structure", "image", "container", "binary"):
# Skip empty container parts
if part.typeGroup == "container" and (not part.data or len(part.data.strip()) == 0):
logger.debug(f"Skipping empty container part: mimeType={part.mimeType}")
continue
parts_to_process.append({
'part': part,
'part_index': part_index,
'document_id': ec.id
})
part_index += 1
logger.info(f"Processing {len(parts_to_process)} parts with model-aware chunking")
# Process parts in parallel
async def process_single_part(part_info: Dict) -> PartResult:
part = part_info['part']
part_index = part_info['part_index']
document_id = part_info['document_id']
start_time = time.time()
try:
# Create AI call request with content part
from modules.datamodels.datamodelAi import AiCallRequest
request = AiCallRequest(
prompt=prompt,
context="", # Context is in the content part
options=options,
contentParts=[part] # Pass as list for unified processing
)
# Call AI with model-aware chunking
response = await self.aiObjects.call(request)
processing_time = time.time() - start_time
return PartResult(
originalPart=part,
aiResult=response.content,
partIndex=part_index,
documentId=document_id,
processingTime=processing_time,
metadata={
"success": True,
"partSize": len(part.data) if part.data else 0,
"resultSize": len(response.content),
"typeGroup": part.typeGroup,
"modelName": response.modelName,
"priceUsd": response.priceUsd
}
)
except Exception as e:
processing_time = time.time() - start_time
logger.warning(f"Error processing part {part_index}: {str(e)}")
return PartResult(
originalPart=part,
aiResult=f"[Error processing part: {str(e)}]",
partIndex=part_index,
documentId=document_id,
processingTime=processing_time,
metadata={
"success": False,
"error": str(e),
"partSize": len(part.data) if part.data else 0,
"typeGroup": part.typeGroup
}
)
# Process parts with concurrency control
max_concurrent = 5
if options and hasattr(options, 'maxConcurrentParts'):
max_concurrent = options.maxConcurrentParts
semaphore = asyncio.Semaphore(max_concurrent)
async def process_with_semaphore(part_info):
async with semaphore:
return await process_single_part(part_info)
tasks = [process_with_semaphore(part_info) for part_info in parts_to_process]
part_results = await asyncio.gather(*tasks, return_exceptions=True)
# Handle exceptions
processed_results = []
for i, result in enumerate(part_results):
if isinstance(result, Exception):
part_info = parts_to_process[i]
processed_results.append(PartResult(
originalPart=part_info['part'],
aiResult=f"[Error in parallel processing: {str(result)}]",
partIndex=part_info['part_index'],
documentId=part_info['document_id'],
processingTime=0.0,
metadata={"success": False, "error": str(result)}
))
elif result is not None:
processed_results.append(result)
logger.info(f"Completed processing {len(processed_results)} parts")
return processed_results
async def _processChunksWithMapping(
self,
extractionResult: List[ContentExtracted],
@ -907,340 +1009,451 @@ CONTINUATION INSTRUCTIONS:
logger.info(f"Completed processing {len(processed_results)} chunks")
return processed_results
def _mergePartResults(
self,
partResults: List['PartResult'],
options: Optional[AiCallOptions] = None
) -> str:
"""Merge part results using existing sophisticated merging system."""
if not partResults:
return ""
# Convert PartResults back to ContentParts for existing merger system
from modules.datamodels.datamodelExtraction import ContentPart
content_parts = []
for part_result in partResults:
# Create ContentPart from PartResult with proper typeGroup
content_part = ContentPart(
id=part_result.originalPart.id,
parentId=part_result.originalPart.parentId,
label=part_result.originalPart.label,
typeGroup=part_result.originalPart.typeGroup, # Use original typeGroup
mimeType=part_result.originalPart.mimeType,
data=part_result.aiResult, # Use AI result as data
metadata={
**part_result.originalPart.metadata,
"aiResult": True,
"partIndex": part_result.partIndex,
"documentId": part_result.documentId,
"processingTime": part_result.processingTime,
"success": part_result.metadata.get("success", False)
}
)
content_parts.append(content_part)
# Use existing merging strategy from options
merge_strategy = {
"useIntelligentMerging": True,
"groupBy": "documentId", # Group by document
"orderBy": "partIndex", # Order by part index
"mergeType": "concatenate"
}
if options and hasattr(options, 'mergeStrategy'):
merge_strategy.update(options.mergeStrategy)
# Apply existing merging logic using the sophisticated merging system
from modules.services.serviceExtraction.subPipeline import _applyMerging
merged_parts = _applyMerging(content_parts, merge_strategy)
# Convert merged parts back to final string
final_content = "\n\n".join([part.data for part in merged_parts])
logger.info(f"Merged {len(partResults)} parts using existing sophisticated merging system")
return final_content.strip()
def _convertPartResultsToJson(
self,
partResults: List['PartResult'],
options: Optional[AiCallOptions] = None
) -> Dict[str, Any]:
"""Convert part results to JSON format using existing sophisticated merging system."""
if not partResults:
return {"metadata": {"title": "Empty Document"}, "sections": []}
# Convert PartResults back to ContentParts for existing merger system
from modules.datamodels.datamodelExtraction import ContentPart
content_parts = []
for part_result in partResults:
# Create ContentPart from PartResult with proper typeGroup
content_part = ContentPart(
id=part_result.originalPart.id,
parentId=part_result.originalPart.parentId,
label=part_result.originalPart.label,
typeGroup=part_result.originalPart.typeGroup, # Use original typeGroup
mimeType=part_result.originalPart.mimeType,
data=part_result.aiResult, # Use AI result as data
metadata={
**part_result.originalPart.metadata,
"aiResult": True,
"partIndex": part_result.partIndex,
"documentId": part_result.documentId,
"processingTime": part_result.processingTime,
"success": part_result.metadata.get("success", False)
}
)
content_parts.append(content_part)
# Use existing merging strategy for JSON mode
merge_strategy = {
"useIntelligentMerging": True,
"groupBy": "documentId", # Group by document
"orderBy": "partIndex", # Order by part index
"mergeType": "concatenate"
}
if options and hasattr(options, 'mergeStrategy'):
merge_strategy.update(options.mergeStrategy)
# Apply existing merging logic using the sophisticated merging system
from modules.services.serviceExtraction.subPipeline import _applyMerging
merged_parts = _applyMerging(content_parts, merge_strategy)
# Convert merged parts to JSON format
all_sections = []
document_titles = []
for part in merged_parts:
if part.metadata.get("success", False):
try:
# Parse JSON from AI result
part_json = json.loads(part.data)
# Check if this is a multi-file response (has "documents" key)
if isinstance(part_json, dict) and "documents" in part_json:
# This is a multi-file response - merge all documents
logger.debug(f"Processing multi-file response from part {part.id} with {len(part_json['documents'])} documents")
# Return multi-file response directly
return {
"metadata": part_json.get("metadata", {"title": "Merged Document"}),
"documents": part_json["documents"]
}
# Extract sections from single-file response
elif isinstance(part_json, dict) and "sections" in part_json:
for section in part_json["sections"]:
# Add part context to section
section["metadata"] = section.get("metadata", {})
section["metadata"]["source_part"] = part.id
section["metadata"]["source_document"] = part.metadata.get("documentId", "unknown")
section["metadata"]["part_index"] = part.metadata.get("partIndex", 0)
all_sections.append(section)
# Extract document title
if isinstance(part_json, dict) and "metadata" in part_json:
title = part_json["metadata"].get("title", "")
if title and title not in document_titles:
document_titles.append(title)
except json.JSONDecodeError as e:
logger.warning(f"Failed to parse JSON from part {part.id}: {str(e)}")
# Create a fallback section for invalid JSON
fallback_section = {
"id": f"error_section_{part.id}",
"title": "Error Section",
"content_type": "paragraph",
"elements": [{
"text": f"Error parsing part {part.id}: {str(e)}"
}],
"order": part.metadata.get("partIndex", 0),
"metadata": {
"source_document": part.metadata.get("documentId", "unknown"),
"part_id": part.id,
"error": str(e)
}
}
all_sections.append(fallback_section)
else:
# Handle error parts
error_section = {
"id": f"error_section_{part.id}",
"title": "Error Section",
"content_type": "paragraph",
"elements": [{
"text": f"Error in part {part.id}: {part.metadata.get('error', 'Unknown error')}"
}],
"order": part.metadata.get("partIndex", 0),
"metadata": {
"source_document": part.metadata.get("documentId", "unknown"),
"part_id": part.id,
"error": part.metadata.get('error', 'Unknown error')
}
}
all_sections.append(error_section)
# Sort sections by order
all_sections.sort(key=lambda x: x.get("order", 0))
# Create merged document with sections
merged_document = {
"metadata": {
"title": document_titles[0] if document_titles else "Merged Document",
"extraction_method": "model_aware_chunking_with_merging",
"version": "2.0"
},
"sections": all_sections,
"summary": f"Merged document using sophisticated merging system",
"tags": ["merged", "ai_generated", "model_aware", "sophisticated_merging"]
}
logger.info(f"Converted {len(partResults)} parts to JSON format using existing sophisticated merging system")
return merged_document
def _mergeChunkResults(
self,
chunkResults: List[ChunkResult],
options: Optional[AiCallOptions] = None
) -> str:
"""Merge chunk results while preserving document structure and chunk order."""
"""Merge chunk results using existing sophisticated merging system."""
if not chunkResults:
return ""
# Get merging configuration from options
chunk_separator = "\n\n---\n\n"
include_document_headers = True
include_chunk_metadata = False
if options:
if hasattr(options, 'chunkSeparator'):
chunk_separator = options.chunkSeparator
elif hasattr(options, 'mergeStrategy') and options.mergeStrategy:
chunk_separator = options.mergeStrategy.get("chunkSeparator", "\n\n---\n\n")
# Check for enhanced options
if hasattr(options, 'preserveChunkMetadata'):
include_chunk_metadata = options.preserveChunkMetadata
# Group chunk results by document
results_by_document = {}
# Convert ChunkResults back to ContentParts for existing merger system
from modules.datamodels.datamodelExtraction import ContentPart
content_parts = []
for chunk_result in chunkResults:
doc_id = chunk_result.documentId
if doc_id not in results_by_document:
results_by_document[doc_id] = []
results_by_document[doc_id].append(chunk_result)
# Create ContentPart from ChunkResult with proper typeGroup
content_part = ContentPart(
id=chunk_result.originalChunk.id,
parentId=chunk_result.originalChunk.parentId,
label=chunk_result.originalChunk.label,
typeGroup=chunk_result.originalChunk.typeGroup, # Use original typeGroup
mimeType=chunk_result.originalChunk.mimeType,
data=chunk_result.aiResult, # Use AI result as data
metadata={
**chunk_result.originalChunk.metadata,
"aiResult": True,
"chunk": True,
"chunkIndex": chunk_result.chunkIndex,
"documentId": chunk_result.documentId,
"processingTime": chunk_result.processingTime,
"success": chunk_result.metadata.get("success", False)
}
)
content_parts.append(content_part)
# Sort chunks within each document by chunk index
for doc_id in results_by_document:
results_by_document[doc_id].sort(key=lambda x: x.chunkIndex)
# Use existing merging strategy from options
merge_strategy = {
"useIntelligentMerging": True,
"groupBy": "documentId", # Group by document
"orderBy": "chunkIndex", # Order by chunk index
"mergeType": "concatenate"
}
# Merge results for each document
merged_documents = []
if options and hasattr(options, 'mergeStrategy'):
merge_strategy.update(options.mergeStrategy)
for doc_id, doc_chunks in results_by_document.items():
# Build document header if enabled
doc_header = ""
if include_document_headers:
doc_header = f"\n\n=== DOCUMENT: {doc_id} ===\n\n"
# Merge chunks for this document
doc_content = ""
for i, chunk_result in enumerate(doc_chunks):
# Add chunk separator (except for first chunk)
if i > 0:
doc_content += chunk_separator
# Add chunk content with optional metadata
chunk_metadata = chunk_result.metadata
if chunk_metadata.get("success", False):
chunk_content = chunk_result.aiResult
# Add chunk metadata if enabled
if include_chunk_metadata:
chunk_info = f"[Chunk {chunk_result.chunkIndex} - {chunk_metadata.get('typeGroup', 'unknown')} - {chunk_metadata.get('chunkSize', 0)} chars]"
chunk_content = f"{chunk_info}\n{chunk_content}"
doc_content += chunk_content
else:
# Handle error chunks
error_msg = f"[ERROR in chunk {chunk_result.chunkIndex}: {chunk_metadata.get('error', 'Unknown error')}]"
doc_content += error_msg
merged_documents.append(doc_header + doc_content)
# Apply existing merging logic using the sophisticated merging system
from modules.services.serviceExtraction.subPipeline import _applyMerging
merged_parts = _applyMerging(content_parts, merge_strategy)
# Join all documents
final_result = "\n\n".join(merged_documents)
# Convert merged parts back to final string
final_content = "\n\n".join([part.data for part in merged_parts])
logger.info(f"Merged {len(chunkResults)} chunks from {len(results_by_document)} documents")
return final_result.strip()
logger.info(f"Merged {len(chunkResults)} chunks using existing sophisticated merging system")
return final_content.strip()
def _mergeChunkResultsClean(
self,
chunkResults: List[ChunkResult],
options: Optional[AiCallOptions] = None
) -> str:
"""Merge chunk results in CLEAN mode - no debug metadata or document headers."""
"""Merge chunk results in CLEAN mode using existing sophisticated merging system."""
if not chunkResults:
return ""
# Get merging configuration from options
chunk_separator = "\n\n"
include_document_headers = False # CLEAN MODE: No document headers
include_chunk_metadata = False # CLEAN MODE: No chunk metadata
if options:
if hasattr(options, 'chunkSeparator'):
chunk_separator = options.chunkSeparator
elif hasattr(options, 'mergeStrategy') and options.mergeStrategy:
chunk_separator = options.mergeStrategy.get("chunkSeparator", "\n\n")
# Group chunk results by document
results_by_document = {}
# Convert ChunkResults back to ContentParts for existing merger system
from modules.datamodels.datamodelExtraction import ContentPart
content_parts = []
for chunk_result in chunkResults:
doc_id = chunk_result.documentId
if doc_id not in results_by_document:
results_by_document[doc_id] = []
results_by_document[doc_id].append(chunk_result)
# Sort chunks within each document by chunk index
for doc_id in results_by_document:
results_by_document[doc_id].sort(key=lambda x: x.chunkIndex)
# Merge results for each document in CLEAN mode
merged_documents = []
for doc_id, doc_chunks in results_by_document.items():
# CLEAN MODE: No document headers
doc_header = ""
# Skip empty or error chunks in clean mode
if not chunk_result.metadata.get("success", False):
continue
if not chunk_result.aiResult or not chunk_result.aiResult.strip():
continue
# Skip container/binary chunks in clean mode
if chunk_result.aiResult.startswith("[Skipped ") and "content:" in chunk_result.aiResult:
continue
# Merge chunks for this document
doc_content = ""
for i, chunk_result in enumerate(doc_chunks):
# Add chunk separator (except for first chunk)
if i > 0:
doc_content += chunk_separator
# Add chunk content without metadata
chunk_metadata = chunk_result.metadata
if chunk_metadata.get("success", False):
chunk_content = chunk_result.aiResult
# CLEAN MODE: Skip container/binary chunks entirely
if chunk_content.startswith("[Skipped ") and "content:" in chunk_content:
continue # Skip container/binary chunks in clean mode
# CLEAN MODE: Skip empty or whitespace-only chunks
if not chunk_content.strip():
continue # Skip empty chunks in clean mode
# CLEAN MODE: No chunk metadata
doc_content += chunk_content
else:
# Handle error chunks silently in clean mode
continue
merged_documents.append(doc_header + doc_content)
# Create ContentPart from ChunkResult with proper typeGroup
content_part = ContentPart(
id=chunk_result.originalChunk.id,
parentId=chunk_result.originalChunk.parentId,
label=chunk_result.originalChunk.label,
typeGroup=chunk_result.originalChunk.typeGroup, # Use original typeGroup
mimeType=chunk_result.originalChunk.mimeType,
data=chunk_result.aiResult, # Use AI result as data
metadata={
**chunk_result.originalChunk.metadata,
"aiResult": True,
"chunk": True,
"chunkIndex": chunk_result.chunkIndex,
"documentId": chunk_result.documentId,
"processingTime": chunk_result.processingTime,
"success": chunk_result.metadata.get("success", False)
}
)
content_parts.append(content_part)
# Join all documents
final_result = "\n\n".join(merged_documents)
# Use existing merging strategy for clean mode
merge_strategy = {
"useIntelligentMerging": True,
"groupBy": "documentId", # Group by document
"orderBy": "chunkIndex", # Order by chunk index
"mergeType": "concatenate"
}
return final_result.strip()
if options and hasattr(options, 'mergeStrategy'):
merge_strategy.update(options.mergeStrategy)
# Apply existing merging logic using the sophisticated merging system
from modules.services.serviceExtraction.subPipeline import _applyMerging
merged_parts = _applyMerging(content_parts, merge_strategy)
# Convert merged parts back to final string
final_content = "\n\n".join([part.data for part in merged_parts])
logger.info(f"Merged {len(content_parts)} chunks in clean mode using existing sophisticated merging system")
return final_content.strip()
def _mergeChunkResultsJson(
self,
chunkResults: List[ChunkResult],
options: Optional[AiCallOptions] = None
) -> Dict[str, Any]:
"""Merge chunk results in JSON mode - returns structured JSON document."""
"""Merge chunk results in JSON mode using existing sophisticated merging system."""
if not chunkResults:
return {"metadata": {"title": "Empty Document"}, "sections": []}
# Group chunk results by document
results_by_document = {}
# Convert ChunkResults back to ContentParts for existing merger system
from modules.datamodels.datamodelExtraction import ContentPart
content_parts = []
for chunk_result in chunkResults:
doc_id = chunk_result.documentId
if doc_id not in results_by_document:
results_by_document[doc_id] = []
results_by_document[doc_id].append(chunk_result)
# Create ContentPart from ChunkResult with proper typeGroup
content_part = ContentPart(
id=chunk_result.originalChunk.id,
parentId=chunk_result.originalChunk.parentId,
label=chunk_result.originalChunk.label,
typeGroup=chunk_result.originalChunk.typeGroup, # Use original typeGroup
mimeType=chunk_result.originalChunk.mimeType,
data=chunk_result.aiResult, # Use AI result as data
metadata={
**chunk_result.originalChunk.metadata,
"aiResult": True,
"chunk": True,
"chunkIndex": chunk_result.chunkIndex,
"documentId": chunk_result.documentId,
"processingTime": chunk_result.processingTime,
"success": chunk_result.metadata.get("success", False)
}
)
content_parts.append(content_part)
# Sort chunks within each document by chunk index
for doc_id in results_by_document:
results_by_document[doc_id].sort(key=lambda x: x.chunkIndex)
# Use existing merging strategy for JSON mode
merge_strategy = {
"useIntelligentMerging": True,
"groupBy": "documentId", # Group by document
"orderBy": "chunkIndex", # Order by chunk index
"mergeType": "concatenate"
}
# Merge JSON results for each document
all_documents = []
if options and hasattr(options, 'mergeStrategy'):
merge_strategy.update(options.mergeStrategy)
# Apply existing merging logic using the sophisticated merging system
from modules.services.serviceExtraction.subPipeline import _applyMerging
merged_parts = _applyMerging(content_parts, merge_strategy)
# Convert merged parts to JSON format
all_sections = []
document_titles = []
combined_metadata = {"title": "Merged Document", "splitStrategy": "by_section"}
for doc_id, doc_chunks in results_by_document.items():
# Process each chunk's JSON result
for chunk_result in doc_chunks:
chunk_metadata = chunk_result.metadata
if chunk_metadata.get("success", False):
try:
# Parse JSON from AI result
chunk_json = json.loads(chunk_result.aiResult)
for part in merged_parts:
if part.metadata.get("success", False):
try:
# Parse JSON from AI result
chunk_json = json.loads(part.data)
# Check if this is a multi-file response (has "documents" key)
if isinstance(chunk_json, dict) and "documents" in chunk_json:
# This is a multi-file response - merge all documents
logger.debug(f"Processing multi-file response from part {part.id} with {len(chunk_json['documents'])} documents")
# Check if this is a multi-file response (has "documents" key)
if isinstance(chunk_json, dict) and "documents" in chunk_json:
# This is a multi-file response - merge all documents
logger.debug(f"Processing multi-file response from chunk {chunk_result.chunkIndex} with {len(chunk_json['documents'])} documents")
# Add all documents from this chunk
for doc in chunk_json["documents"]:
# Add chunk context to document
doc["metadata"] = doc.get("metadata", {})
doc["metadata"]["source_chunk"] = chunk_result.chunkIndex
doc["metadata"]["source_document"] = doc_id
all_documents.append(doc)
# Update combined metadata
if "metadata" in chunk_json:
combined_metadata.update(chunk_json["metadata"])
# Extract sections from single-file response (fallback)
elif isinstance(chunk_json, dict) and "sections" in chunk_json:
for section in chunk_json["sections"]:
# Add document context to section
section["metadata"] = section.get("metadata", {})
section["metadata"]["source_document"] = doc_id
section["metadata"]["chunk_index"] = chunk_result.chunkIndex
all_sections.append(section)
# Extract document title
if isinstance(chunk_json, dict) and "metadata" in chunk_json:
title = chunk_json["metadata"].get("title", "")
if title and title not in document_titles:
document_titles.append(title)
except json.JSONDecodeError as e:
logger.warning(f"Failed to parse JSON from chunk {chunk_result.chunkIndex}: {str(e)}")
# Create a fallback section for invalid JSON
fallback_section = {
"id": f"error_section_{chunk_result.chunkIndex}",
"title": "Error Section",
"content_type": "paragraph",
"elements": [{
"text": f"Error parsing chunk {chunk_result.chunkIndex}: {str(e)}"
}],
"order": chunk_result.chunkIndex,
"metadata": {
"source_document": doc_id,
"chunk_index": chunk_result.chunkIndex,
"error": str(e)
}
# Return multi-file response directly
return {
"metadata": chunk_json.get("metadata", {"title": "Merged Document"}),
"documents": chunk_json["documents"]
}
all_sections.append(fallback_section)
else:
# Handle error chunks
error_section = {
"id": f"error_section_{chunk_result.chunkIndex}",
# Extract sections from single-file response
elif isinstance(chunk_json, dict) and "sections" in chunk_json:
for section in chunk_json["sections"]:
# Add part context to section
section["metadata"] = section.get("metadata", {})
section["metadata"]["source_part"] = part.id
section["metadata"]["source_document"] = part.metadata.get("documentId", "unknown")
section["metadata"]["chunk_index"] = part.metadata.get("chunkIndex", 0)
all_sections.append(section)
# Extract document title
if isinstance(chunk_json, dict) and "metadata" in chunk_json:
title = chunk_json["metadata"].get("title", "")
if title and title not in document_titles:
document_titles.append(title)
except json.JSONDecodeError as e:
logger.warning(f"Failed to parse JSON from part {part.id}: {str(e)}")
# Create a fallback section for invalid JSON
fallback_section = {
"id": f"error_section_{part.id}",
"title": "Error Section",
"content_type": "paragraph",
"elements": [{
"text": f"Error in chunk {chunk_result.chunkIndex}: {chunk_metadata.get('error', 'Unknown error')}"
"text": f"Error parsing part {part.id}: {str(e)}"
}],
"order": chunk_result.chunkIndex,
"order": part.metadata.get("chunkIndex", 0),
"metadata": {
"source_document": doc_id,
"chunk_index": chunk_result.chunkIndex,
"error": chunk_metadata.get('error', 'Unknown error')
"source_document": part.metadata.get("documentId", "unknown"),
"part_id": part.id,
"error": str(e)
}
}
all_sections.append(error_section)
all_sections.append(fallback_section)
else:
# Handle error parts
error_section = {
"id": f"error_section_{part.id}",
"title": "Error Section",
"content_type": "paragraph",
"elements": [{
"text": f"Error in part {part.id}: {part.metadata.get('error', 'Unknown error')}"
}],
"order": part.metadata.get("chunkIndex", 0),
"metadata": {
"source_document": part.metadata.get("documentId", "unknown"),
"part_id": part.id,
"error": part.metadata.get('error', 'Unknown error')
}
}
all_sections.append(error_section)
# Sort sections by order
all_sections.sort(key=lambda x: x.get("order", 0))
# If we have merged documents from multi-file responses, return them
if all_documents:
logger.info(f"Merged {len(all_documents)} documents from {len(chunkResults)} chunks")
return {
"metadata": combined_metadata,
"documents": all_documents
}
# Otherwise, create merged document with sections (single-file fallback)
# Create merged document with sections
merged_document = {
"metadata": {
"title": document_titles[0] if document_titles else "Merged Document",
"source_documents": list(results_by_document.keys()),
"extraction_method": "ai_json_extraction",
"version": "1.0"
"extraction_method": "ai_json_extraction_with_merging",
"version": "2.0"
},
"sections": all_sections,
"summary": f"Merged document from {len(results_by_document)} source documents",
"tags": ["merged", "ai_generated"]
"summary": f"Merged document using sophisticated merging system",
"tags": ["merged", "ai_generated", "sophisticated_merging"]
}
logger.info(f"Merged {len(chunkResults)} chunks from {len(results_by_document)} documents (JSON mode)")
logger.info(f"Merged {len(chunkResults)} chunks using existing sophisticated merging system (JSON mode)")
return merged_document
def _getModelCapabilitiesForContent(self, prompt: str, documents: Optional[List[ChatDocument]], options: AiCallOptions) -> Dict[str, int]:
"""
Get model capabilities for content processing, including appropriate size limits for chunking.
Uses centralized model selection to determine chunking parameters.
"""
# Estimate total content size
prompt_size = len(prompt.encode('utf-8'))
document_size = 0
if documents:
# Rough estimate of document content size
for doc in documents:
document_size += doc.fileSize or 0
total_size = prompt_size + document_size
# Use centralized model selection to get the best model for chunking parameters
try:
from modules.aicore.aicoreModelRegistry import modelRegistry
from modules.aicore.aicoreModelSelector import model_selector
# Get available models and select the best one for this operation
availableModels = modelRegistry.getAvailableModels()
selectedModel = model_selector.selectModel(prompt, "", options, availableModels)
if selectedModel:
context_length = selectedModel.contextLength
model_name = selectedModel.name
logger.debug(f"Selected model for chunking: {model_name} with context length: {context_length}")
else:
# Fallback to conservative default if no model selected
context_length = 128000 # GPT-4o default
model_name = "fallback"
logger.warning(f"No model selected for chunking, using fallback context length: {context_length}")
except Exception as e:
# Fallback to conservative default if model selection fails
context_length = 128000 # GPT-4o default
model_name = "fallback"
logger.error(f"Model selection failed for chunking: {e}, using fallback context length: {context_length}")
# Calculate appropriate sizes
# Convert tokens to bytes (rough estimate: 1 token ≈ 4 characters)
context_length_bytes = int(context_length * 4)
max_context_bytes = int(context_length_bytes * 0.9) # 90% of context length
text_chunk_size = int(max_context_bytes * 0.7) # 70% of max context for text chunks
image_chunk_size = int(max_context_bytes * 0.8) # 80% of max context for image chunks
logger.debug(f"Content size: {total_size} bytes, Max context: {max_context_bytes} bytes")
logger.debug(f"Text chunk size: {text_chunk_size} bytes, Image chunk size: {image_chunk_size} bytes")
return {
"maxContextBytes": max_context_bytes,
"textChunkSize": text_chunk_size,
"imageChunkSize": image_chunk_size
}
# REMOVED: _getModelCapabilitiesForContent method - no longer needed with model-aware chunking

View file

@ -73,139 +73,18 @@ def runExtraction(extractorRegistry: ExtractorRegistry, chunkerRegistry: Chunker
parts = extractor.extract(documentBytes, {"fileName": fileName, "mimeType": mimeType, "options": options})
# Apply chunking and size limiting
parts = poolAndLimit(parts, chunkerRegistry, options)
# REMOVED: poolAndLimit(parts, chunkerRegistry, options)
# REMOVED: Chunking logic - now handled in AI call phase
# Optional merge step - but preserve chunks
# Apply merging strategy if provided (preserve existing logic)
mergeStrategy = options.get("mergeStrategy", {})
if mergeStrategy:
# Don't merge chunks - they should stay separate for processing
non_chunk_parts = [p for p in parts if not p.metadata.get("chunk", False)]
chunk_parts = [p for p in parts if p.metadata.get("chunk", False)]
logger.debug(f"runExtraction: Preserving {len(chunk_parts)} chunks from merging")
logger.debug(f"runExtraction - non_chunk_parts: {len(non_chunk_parts)}, chunk_parts: {len(chunk_parts)}")
# Apply intelligent merging for small text parts
if non_chunk_parts:
# Count text parts
text_parts = [p for p in non_chunk_parts if p.typeGroup == "text"]
if len(text_parts) > 5: # If we have many small text parts, merge them
logger.info(f"🔧 Merging {len(text_parts)} small text parts for efficiency")
non_chunk_parts = _mergeParts(non_chunk_parts, mergeStrategy)
# Combine non-chunk parts with chunk parts (chunks stay separate)
parts = non_chunk_parts + chunk_parts
logger.debug(f"runExtraction: Final parts after merging: {len(parts)} (chunks: {len(chunk_parts)})")
logger.debug(f"runExtraction - Final parts: {len(parts)} (chunks: {len(chunk_parts)})")
# Timestamp-only extraction debug dumps removed
parts = _applyMerging(parts, mergeStrategy)
return ContentExtracted(id=makeId(), parts=parts)
def poolAndLimit(parts: List[ContentPart], chunkerRegistry: ChunkerRegistry, options: Dict[str, Any]) -> List[ContentPart]:
maxSize = int(options.get("maxSize", 0) or 0)
chunkAllowed = bool(options.get("chunkAllowed", False))
mergeStrategy = options.get("mergeStrategy", {})
if maxSize <= 0:
# Still apply merging if strategy provided
if mergeStrategy:
return _applyMerging(parts, mergeStrategy)
return parts
# First, try to fit within size limit
current = 0
kept: List[ContentPart] = []
remaining: List[ContentPart] = []
logger.debug(f"Starting poolAndLimit with {len(parts)} parts, maxSize={maxSize}")
for i, p in enumerate(parts):
size = int(p.metadata.get("size", 0) or 0)
# Show first 50 characters of text content for debugging
content_preview = p.data[:50].replace('\n', '\\n') if p.data else ""
logger.debug(f"Part {i}: {p.typeGroup} - {size} bytes - '{content_preview}...' (current: {current})")
if current + size <= maxSize:
kept.append(p)
current += size
logger.debug(f"Part {i} kept (total: {current})")
else:
remaining.append(p)
logger.debug(f"Part {i} moved to remaining")
logger.debug(f"Kept: {len(kept)}, Remaining: {len(remaining)}")
# If we have remaining parts and chunking is allowed, try chunking
if remaining and chunkAllowed:
logger.debug(f"=== CHUNKING ACTIVATED ===")
logger.debug(f"Remaining parts to chunk: {len(remaining)}")
logger.debug(f"Max size limit: {maxSize} bytes")
logger.debug(f"Current size used: {current} bytes")
logger.debug(f"Chunking {len(remaining)} remaining parts")
for p in remaining:
if p.typeGroup in ("text", "table", "structure", "image", "container", "binary"):
logger.debug(f"Chunking {p.typeGroup} part: {len(p.data)} chars")
logger.debug(f"Chunking {p.typeGroup} part with {len(p.data)} chars")
chunks = chunkerRegistry.resolve(p.typeGroup).chunk(p, options)
logger.debug(f"Created {len(chunks)} chunks")
logger.debug(f"Created {len(chunks)} chunks")
chunks_added = 0
for ch in chunks:
chSize = int(ch.get("size", 0) or 0)
# Add all chunks - don't limit by maxSize since they'll be processed separately
kept.append(ContentPart(
id=makeId(),
parentId=p.id,
label=f"chunk_{ch.get('order', 0)}",
typeGroup=p.typeGroup,
mimeType=p.mimeType,
data=ch.get("data", ""),
metadata={
"size": chSize,
"chunk": True,
**ch.get("metadata", {})
}
))
chunks_added += 1
logger.debug(f"Added chunk {ch.get('order', 0)}: {chSize} bytes")
logger.debug(f"Added {chunks_added} chunks from {p.typeGroup} part")
# Apply merging strategy if provided, but preserve chunks
if mergeStrategy:
# Don't merge chunks - they should stay separate for processing
non_chunk_parts = [p for p in kept if not p.metadata.get("chunk", False)]
chunk_parts = [p for p in kept if p.metadata.get("chunk", False)]
logger.debug(f"Preserving {len(chunk_parts)} chunks from merging")
# Apply intelligent merging for small text parts
if non_chunk_parts:
# Count text parts
text_parts = [p for p in non_chunk_parts if p.typeGroup == "text"]
if len(text_parts) > 5: # If we have many small text parts, merge them
logger.info(f"🔧 Merging {len(text_parts)} small text parts for efficiency")
non_chunk_parts = _applyMerging(non_chunk_parts, mergeStrategy)
# Combine non-chunk parts with chunk parts (chunks stay separate)
kept = non_chunk_parts + chunk_parts
logger.debug(f"Final parts after merging: {len(kept)} (chunks: {len(chunk_parts)})")
logger.debug(f"Final parts after merging: {len(kept)} (chunks: {len(chunk_parts)})")
# Re-check size after merging
totalSize = sum(int(p.metadata.get("size", 0) or 0) for p in kept)
if totalSize > maxSize and mergeStrategy.get("maxSize"):
# Apply size limit to merged parts
kept = _applySizeLimit(kept, maxSize)
logger.debug(f"poolAndLimit returning {len(kept)} parts")
return kept
# REMOVED: poolAndLimit function - chunking now handled in AI call phase
def _applyMerging(parts: List[ContentPart], strategy: Dict[str, Any]) -> List[ContentPart]:
@ -264,37 +143,5 @@ def _applyMerging(parts: List[ContentPart], strategy: Dict[str, Any]) -> List[Co
return merged
def _applySizeLimit(parts: List[ContentPart], maxSize: int) -> List[ContentPart]:
"""Apply size limit by prioritizing parts and truncating if necessary."""
# Sort by priority: text first, then others
priority_order = {"text": 0, "table": 1, "structure": 2, "image": 3, "binary": 4, "metadata": 5, "container": 6}
sorted_parts = sorted(parts, key=lambda p: priority_order.get(p.typeGroup, 99))
kept: List[ContentPart] = []
current_size = 0
for part in sorted_parts:
part_size = int(part.metadata.get("size", 0) or 0)
if current_size + part_size <= maxSize:
kept.append(part)
current_size += part_size
else:
# Try to truncate text parts
if part.typeGroup == "text" and part_size > 0:
remaining_size = maxSize - current_size
if remaining_size > 1000: # Only truncate if we have meaningful space
truncated_data = part.data[:remaining_size * 4] # Rough character estimate
truncated_part = ContentPart(
id=makeId(),
parentId=part.parentId,
label=f"{part.label}_truncated",
typeGroup=part.typeGroup,
mimeType=part.mimeType,
data=truncated_data,
metadata={**part.metadata, "size": len(truncated_data.encode('utf-8')), "truncated": True}
)
kept.append(truncated_part)
break
return kept
# REMOVED: _applySizeLimit function - no longer needed after removing poolAndLimit

View file

@ -138,6 +138,7 @@ class AIBehaviorTester:
self.testResults.append(result)
return result
def _extractContinuationInstruction(self, response: str) -> str:
"""Extract continuation instruction from response."""
try:

View file

@ -25,7 +25,7 @@ from modules.datamodels.datamodelAi import (
)
from modules.datamodels.datamodelUam import User
from modules.aicore.aicoreModelRegistry import modelRegistry
from modules.aicore.aicoreModelSelector import model_selector
from modules.aicore.aicoreModelSelector import modelSelector
class ModelSelectionTester:
@ -45,6 +45,51 @@ class ModelSelectionTester:
self.services.ai = await AiService.create(self.services)
async def _printFallbackListWithContext(self, title: str, prompt: str, context: str, options: AiCallOptions) -> None:
print(f"\n{'='*80}")
print(f"{title}")
print(f"{'='*80}")
print(
f"Operation={options.operationType.name}, Priority={options.priority.name}, ProcessingMode={options.processingMode.name}"
)
# Show context and prompt sizes
promptSize = len(prompt.encode("utf-8"))
contextSize = len(context.encode("utf-8"))
totalSize = promptSize + contextSize
print(f"Prompt size: {promptSize} bytes, Context size: {contextSize} bytes, Total: {totalSize} bytes")
availableModels = modelRegistry.getAvailableModels()
failoverModelList = modelSelector.getFailoverModelList(
prompt=prompt,
context=context,
options=options,
availableModels=availableModels,
)
if not failoverModelList:
print("No suitable models found (capability filter returned empty list).")
return
print("Prioritized fallback model sequence (name | quality | speed | $/1k in | ctx | score):")
for idx, m in enumerate(failoverModelList, 1):
costIn = getattr(m, "costPer1kTokensInput", 0.0)
# Calculate detailed score breakdown
promptSize = len(prompt.encode("utf-8"))
contextSize = len(context.encode("utf-8"))
totalSize = promptSize + contextSize
# Get detailed scoring
sizeRating = modelSelector._getSizeRating(m, totalSize)
processingModeRating = modelSelector._getProcessingModeRating(m.processingMode, options.processingMode)
priorityRating = modelSelector._getPriorityRating(m, options.priority)
totalScore = sizeRating + processingModeRating + priorityRating
print(
f" {idx:>2}. {m.name} | Q={getattr(m, 'qualityRating', 0)} | S={getattr(m, 'speedRating', 0)} | ${costIn:.4f} | ctx={getattr(m, 'contextLength', 0)} | score={totalScore:.3f}"
)
print(f" Size: {sizeRating:.3f}, ProcessingMode: {processingModeRating:.3f}, Priority: {priorityRating:.3f}")
async def _printFallbackList(self, title: str, prompt: str, options: AiCallOptions) -> None:
print(f"\n{'='*80}")
print(f"{title}")
@ -53,24 +98,43 @@ class ModelSelectionTester:
f"Operation={options.operationType.name}, Priority={options.priority.name}, ProcessingMode={options.processingMode.name}"
)
# Show context and prompt sizes
context = "" # Currently using empty context
promptSize = len(prompt.encode("utf-8"))
contextSize = len(context.encode("utf-8"))
totalSize = promptSize + contextSize
print(f"Prompt size: {promptSize} bytes, Context size: {contextSize} bytes, Total: {totalSize} bytes")
availableModels = modelRegistry.getAvailableModels()
fallbackModels = model_selector.getFallbackModels(
failoverModelList = modelSelector.getFailoverModelList(
prompt=prompt,
context="",
context=context,
options=options,
availableModels=availableModels,
)
if not fallbackModels:
if not failoverModelList:
print("No suitable models found (capability filter returned empty list).")
return
print("Prioritized fallback model sequence (name | quality | speed | $/1k in | ctx):")
for idx, m in enumerate(fallbackModels, 1):
print("Prioritized fallback model sequence (name | quality | speed | $/1k in | ctx | score):")
for idx, m in enumerate(failoverModelList, 1):
costIn = getattr(m, "costPer1kTokensInput", 0.0)
# Calculate detailed score breakdown
promptSize = len(prompt.encode("utf-8"))
contextSize = len(context.encode("utf-8"))
totalSize = promptSize + contextSize
# Get detailed scoring
sizeRating = modelSelector._getSizeRating(m, totalSize)
processingModeRating = modelSelector._getProcessingModeRating(m.processingMode, options.processingMode)
priorityRating = modelSelector._getPriorityRating(m, options.priority)
totalScore = sizeRating + processingModeRating + priorityRating
print(
f" {idx:>2}. {m.name} | Q={getattr(m, 'qualityRating', 0)} | S={getattr(m, 'speedRating', 0)} | ${costIn:.4f} | ctx={getattr(m, 'contextLength', 0)}"
f" {idx:>2}. {m.name} | Q={getattr(m, 'qualityRating', 0)} | S={getattr(m, 'speedRating', 0)} | ${costIn:.4f} | ctx={getattr(m, 'contextLength', 0)} | score={totalScore:.3f}"
)
print(f" Size: {sizeRating:.3f}, ProcessingMode: {processingModeRating:.3f}, Priority: {priorityRating:.3f}")
async def run(self) -> None:
# Scenarios reflecting workflows/
@ -146,10 +210,93 @@ class ModelSelectionTester:
)
)
# Intent analysis (user input understanding)
scenarios.append(
(
"ANALYSE - Quality, Detailed (Intent Analysis)",
"Analyze user intent and extract key requirements from the following request: 'I need to create a comprehensive marketing strategy for our new product launch including budget allocation, timeline, and target audience analysis.'",
AiCallOptions(
operationType=OperationTypeEnum.ANALYSE,
priority=PriorityEnum.QUALITY,
compressPrompt=False,
compressContext=False,
processingMode=ProcessingModeEnum.DETAILED,
maxCost=0.08,
maxProcessingTime=45,
resultFormat="json",
temperature=0.2,
),
)
)
# Review/Validation (quality assurance)
scenarios.append(
(
"ANALYSE - Quality, Detailed (Review/Validation)",
"Review and validate the following business proposal for completeness, accuracy, and compliance with industry standards. Identify any gaps or areas for improvement.",
AiCallOptions(
operationType=OperationTypeEnum.ANALYSE,
priority=PriorityEnum.QUALITY,
compressPrompt=False,
compressContext=False,
processingMode=ProcessingModeEnum.DETAILED,
maxCost=0.10,
maxProcessingTime=60,
resultFormat="json",
temperature=0.1,
),
)
)
# Large context scenario (to test size-based scoring)
scenarios.append(
(
"GENERAL - Balanced, Advanced (Large Context Test)",
"Process this large document and provide a comprehensive summary.",
AiCallOptions(
operationType=OperationTypeEnum.GENERAL,
priority=PriorityEnum.BALANCED,
compressPrompt=False,
compressContext=False,
processingMode=ProcessingModeEnum.ADVANCED,
maxCost=0.15,
maxProcessingTime=120,
),
)
)
# Iterate and print lists
for title, prompt, options in scenarios:
await self._printFallbackList(title, prompt, options)
# Test with actual context to see size-based scoring
largeContext = """
This is a comprehensive business document containing detailed information about our company's strategic initiatives,
financial performance, market analysis, competitive landscape, operational metrics, customer feedback,
product development roadmap, technology stack, human resources, legal compliance, risk management,
sustainability efforts, and future growth plans. The document spans multiple sections including executive summary,
market research, financial statements, operational reports, customer insights, product specifications,
technology architecture, HR policies, legal frameworks, risk assessments, environmental impact studies,
and strategic recommendations. This extensive content is designed to test the model selection algorithm's
ability to handle large context sizes and make intelligent decisions about which models are best suited
for processing such substantial amounts of information while maintaining efficiency and cost-effectiveness.
""" * 10 # Repeat to make it even larger
await self._printFallbackListWithContext(
"GENERAL - Balanced, Advanced (Large Context Test)",
"Analyze this comprehensive business document and provide key insights.",
largeContext,
AiCallOptions(
operationType=OperationTypeEnum.GENERAL,
priority=PriorityEnum.BALANCED,
compressPrompt=False,
compressContext=False,
processingMode=ProcessingModeEnum.ADVANCED,
maxCost=0.15,
maxProcessingTime=120,
),
)
async def main() -> None:
tester = ModelSelectionTester()