ready for test revised dynamic ai aware chunking system

2025-10-23 00:35:44 +02:00 · 2025-10-23 00:35:44 +02:00 · 36947b6d7e
commit 36947b6d7e
parent 6b819cc848
14 changed files with 1160 additions and 1014 deletions
--- a/modules/aicore/aicoreModelRegistry.py
+++ b/modules/aicore/aicoreModelRegistry.py
@ -48,7 +48,7 @@ class ModelRegistry:
                
                try:
                    # Import the module
-                    module = importlib.import_module(f'modules.connectors.{moduleName}')
+                    module = importlib.import_module(f'modules.aicore.{moduleName}')
                    
                    # Find connector classes (classes that inherit from BaseConnectorAi)
                    for attrName in dir(module):
--- a/modules/aicore/aicoreModelSelectionConfig.py
+++ b/modules/aicore/aicoreModelSelectionConfig.py
@ -1,158 +0,0 @@
-"""
-Configuration for dynamic model selection rules.
-This makes model selection configurable rather than hardcoded.
-"""
-
-from typing import Dict, List, Any
-from modules.datamodels.datamodelAi import OperationTypeEnum, ModelCapabilitiesEnum, PriorityEnum, SelectionRule
-
-
-class ModelSelectionConfig:
-    """Configuration for model selection rules."""
-    
-    def __init__(self):
-        self.rules = self._loadDefaultRules()
-        self.fallbackModels = self._loadFallbackModels()
-    
-    def _loadDefaultRules(self) -> List[SelectionRule]:
-        """Load default selection rules."""
-        return [
-            # High quality for planning and analysis
-            SelectionRule(
-                name="highQualityAnalysis",
-                condition="Planning or analysis operations requiring high quality",
-                weight=10.0,
-                operationTypes=[OperationTypeEnum.PLAN, OperationTypeEnum.ANALYSE],
-                priority=PriorityEnum.QUALITY,
-                capabilities=[ModelCapabilitiesEnum.TEXT_GENERATION, ModelCapabilitiesEnum.REASONING, ModelCapabilitiesEnum.ANALYSIS],
-                minQualityRating=8
-            ),
-            
-            # Fast processing for basic operations
-            SelectionRule(
-                name="fastBasicProcessing",
-                condition="Basic operations requiring speed",
-                weight=8.0,
-                operationTypes=[OperationTypeEnum.GENERAL],
-                priority=PriorityEnum.SPEED,
-                capabilities=[ModelCapabilitiesEnum.TEXT_GENERATION, ModelCapabilitiesEnum.CHAT],
-                minQualityRating=5
-            ),
-            
-            # Cost-effective for high-volume operations
-            SelectionRule(
-                name="costEffectiveProcessing",
-                condition="High-volume operations where cost matters",
-                weight=7.0,
-                operationTypes=[OperationTypeEnum.GENERAL, OperationTypeEnum.GENERATE],
-                priority=PriorityEnum.COST,
-                capabilities=[ModelCapabilitiesEnum.TEXT_GENERATION],
-                maxCost=0.01  # $0.01 per 1k tokens
-            ),
-            
-            # Image analysis specific
-            SelectionRule(
-                name="imageAnalyse",
-                condition="Image analysis operations",
-                weight=10.0,
-                operationTypes=[OperationTypeEnum.IMAGE_ANALYSE],
-                priority=PriorityEnum.QUALITY,
-                capabilities=[ModelCapabilitiesEnum.VISION, ModelCapabilitiesEnum.MULTIMODAL],
-                minQualityRating=8
-            ),
-            
-            # Web research specific
-            SelectionRule(
-                name="webResearch",
-                condition="Web research operations",
-                weight=9.0,
-                operationTypes=[OperationTypeEnum.WEB_RESEARCH],
-                priority=PriorityEnum.BALANCED,
-                capabilities=[ModelCapabilitiesEnum.TEXT_GENERATION, ModelCapabilitiesEnum.ANALYSIS, ModelCapabilitiesEnum.WEB_SEARCH],
-                minQualityRating=7
-            ),
-            
-            # Large context requirements
-            SelectionRule(
-                name="largeContext",
-                condition="Operations requiring large context",
-                weight=8.0,
-                operationTypes=[OperationTypeEnum.GENERAL, OperationTypeEnum.ANALYSE],
-                priority=PriorityEnum.BALANCED,
-                capabilities=[ModelCapabilitiesEnum.TEXT_GENERATION],
-                minContextLength=100000  # 100k tokens
-            )
-        ]
-    
-    def _loadFallbackModels(self) -> Dict[str, Dict[str, Any]]:
-        """Load fallback model selection criteria."""
-        return {
-            OperationTypeEnum.GENERAL: {
-                "priorityOrder": ["speed", "quality", "cost"],
-                "operationTypes": [ModelCapabilitiesEnum.TEXT_GENERATION, ModelCapabilitiesEnum.CHAT],
-                "minQualityRating": 5,
-                "maxCostPer1k": 0.01
-            },
-            OperationTypeEnum.IMAGE_ANALYSE: {
-                "priorityOrder": ["quality", "speed"],
-                "operationTypes": [ModelCapabilitiesEnum.VISION, ModelCapabilitiesEnum.MULTIMODAL],
-                "minQualityRating": 8,
-                "maxCostPer1k": 0.1
-            },
-            OperationTypeEnum.IMAGE_GENERATE: {
-                "priorityOrder": ["quality", "speed"],
-                "operationTypes": [ModelCapabilitiesEnum.IMAGE_GENERATE, ModelCapabilitiesEnum.ART, ModelCapabilitiesEnum.VISUAL_CREATION],
-                "minQualityRating": 8,
-                "maxCostPer1k": 0.1
-            },
-            OperationTypeEnum.WEB_RESEARCH: {
-                "priorityOrder": ["quality", "speed", "cost"],
-                "operationTypes": [ModelCapabilitiesEnum.TEXT_GENERATION, ModelCapabilitiesEnum.ANALYSIS],
-                "preferredTags": [ModelCapabilitiesEnum.WEB_SEARCH],
-                "minQualityRating": 7,
-                "maxCostPer1k": 0.02
-            },
-            OperationTypeEnum.PLAN: {
-                "priorityOrder": ["quality", "speed"],
-                "operationTypes": [ModelCapabilitiesEnum.TEXT_GENERATION, ModelCapabilitiesEnum.REASONING, ModelCapabilitiesEnum.ANALYSIS],
-                "preferredTags": [PriorityEnum.QUALITY],
-                "minQualityRating": 8,
-                "maxCostPer1k": 0.1
-            },
-            OperationTypeEnum.ANALYSE: {
-                "priorityOrder": ["quality", "speed"],
-                "operationTypes": [ModelCapabilitiesEnum.TEXT_GENERATION, ModelCapabilitiesEnum.ANALYSIS, ModelCapabilitiesEnum.REASONING],
-                "preferredTags": [PriorityEnum.QUALITY],
-                "minQualityRating": 8,
-                "maxCostPer1k": 0.1
-            }
-        }
-    
-    def getRulesForOperation(self, operationType: str) -> List[SelectionRule]:
-        """Get rules that apply to a specific operation type."""
-        return [rule for rule in self.rules if operationType in rule.operationTypes]
-    
-    def getFallbackCriteria(self, operationType: str) -> Dict[str, Any]:
-        """Get fallback selection criteria for a specific operation type."""
-        return self.fallbackModels.get(operationType, self.fallbackModels[OperationTypeEnum.GENERAL])
-    
-    def addRule(self, rule: SelectionRule):
-        """Add a new selection rule."""
-        self.rules.append(rule)
-    
-    def removeRule(self, ruleName: str):
-        """Remove a selection rule by name."""
-        self.rules = [rule for rule in self.rules if rule.name != ruleName]
-    
-    def updateRule(self, ruleName: str, **kwargs):
-        """Update an existing rule."""
-        for rule in self.rules:
-            if rule.name == ruleName:
-                for key, value in kwargs.items():
-                    if hasattr(rule, key):
-                        setattr(rule, key, value)
-                break
-
-
-# Global configuration instance
-model_selection_config = ModelSelectionConfig()
--- a/modules/aicore/aicoreModelSelector.py
+++ b/modules/aicore/aicoreModelSelector.py
@ -1,20 +1,20 @@
 """
-Dynamic model selector using configurable rules and scoring.
+Simplified model selection based on model properties and priority-based sorting.
+No complex rules needed - just filter by properties and sort by priority!
 """

 import logging
-from typing import List, Optional, Dict, Any, Tuple
-from modules.datamodels.datamodelAi import AiModel, AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum, ModelCapabilitiesEnum
-from modules.aicore.aicoreModelSelectionConfig import model_selection_config
+from typing import List, Dict, Any, Optional
+from modules.datamodels.datamodelAi import AiModel, AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum

+# Configure logger
 logger = logging.getLogger(__name__)

-
 class ModelSelector:
-    """Dynamic model selector using configurable rules."""
+    """Simple model selector based on properties and priority-based sorting."""
    
    def __init__(self):
-        self.config = model_selection_config
+        logger.info("ModelSelector initialized with simplified approach")
    
    def selectModel(self, 
                   prompt: str, 
@ -22,270 +22,7 @@ class ModelSelector:
                   options: AiCallOptions,
                   availableModels: List[AiModel]) -> Optional[AiModel]:
        """
-        Select the best model based on configurable rules and scoring.
-        
-        Args:
-            prompt: User prompt
-            context: Context data
-            options: AI call options
-            availableModels: List of available models to choose from
-            
-        Returns:
-            Selected model or None if no suitable model found
-        """
-        if not availableModels:
-            logger.warning("No models available for selection")
-            return None
-        
-        logger.info(f"Selecting model for operation: {options.operationType}, priority: {options.priority}")
-        
-        # Calculate input size
-        inputSize = len(prompt.encode("utf-8")) + len(context.encode("utf-8"))
-        
-        # Get applicable rules
-        rules = self.config.getRulesForOperation(options.operationType)
-        logger.debug(f"Found {len(rules)} applicable rules for {options.operationType}")
-        
-        # Score each model
-        scoredModels = []
-        for model in availableModels:
-            if not model.isAvailable:
-                continue
-            
-            score = self._calculateModelScore(model, inputSize, options, rules)
-            if score > 0:  # Only consider models with positive scores
-                scoredModels.append((model, score))
-                logger.debug(f"Model {model.name}: score={score:.2f}")
-        
-        if not scoredModels:
-            logger.warning("No models passed the selection criteria, trying fallback criteria")
-            # Try fallback criteria
-            fallbackCriteria = self.getFallbackCriteria(options.operationType)
-            return self._selectWithFallbackCriteria(availableModels, fallbackCriteria, inputSize, options)
-        
-        # Sort by score (highest first)
-        scoredModels.sort(key=lambda x: x[1], reverse=True)
-        
-        selectedModel = scoredModels[0][0]
-        selectedScore = scoredModels[0][1]
-        
-        logger.info(f"Selected model: {selectedModel.name} (score: {selectedScore:.2f})")
-        
-        # Log selection details
-        self._logSelectionDetails(selectedModel, inputSize, options)
-        
-        return selectedModel
-    
-    def _calculateModelScore(self, 
-                           model: AiModel, 
-                           inputSize: int, 
-                           options: AiCallOptions, 
-                           rules: List) -> float:
-        """Calculate score for a model based on rules and criteria."""
-        score = 0.0
-        
-        # Check basic requirements
-        if not self._meetsBasicRequirements(model, inputSize, options):
-            return 0.0
-        
-        # Apply rules
-        for rule in rules:
-            ruleScore = self._applyRule(model, inputSize, options, rule)
-            score += ruleScore * rule.weight
-        
-        # Apply priority-based scoring
-        priorityScore = self._applyPriorityScoring(model, options)
-        score += priorityScore
-        
-        # Apply processing mode scoring
-        modeScore = self._applyProcessingModeScoring(model, options)
-        score += modeScore
-        
-        # Apply cost constraints
-        if not self._meetsCostConstraints(model, inputSize, options):
-            score *= 0.1  # Heavily penalize but don't eliminate
-        
-        return max(0.0, score)
-    
-    def _meetsBasicRequirements(self, model: AiModel, inputSize: int, options: AiCallOptions) -> bool:
-        """Check if model meets basic requirements."""
-        # Context length check
-        if model.contextLength > 0 and inputSize > model.contextLength * 0.8:
-            logger.debug(f"Model {model.name} rejected: input too large ({inputSize} > {model.contextLength * 0.8})")
-            return False
-        
-        # Required operation types check
-        if options.operationTypes:
-            if not all(opType in model.operationTypes for opType in options.operationTypes):
-                logger.debug(f"Model {model.name} rejected: missing required operation types")
-                return False
-        
-        # Capabilities check
-        if options.capabilities:
-            if not all(cap in model.capabilities for cap in options.capabilities):
-                logger.debug(f"Model {model.name} rejected: missing required capabilities")
-                return False
-        
-        # Avoid operation types check
-        for rule in self.config.getRulesForOperation(options.operationType):
-            if any(opType in model.operationTypes for opType in rule.avoidOperationTypes):
-                logger.debug(f"Model {model.name} rejected: has avoid operation types")
-                return False
-        
-        return True
-    
-    def _applyRule(self, model: AiModel, inputSize: int, options: AiCallOptions, rule) -> float:
-        """Apply a specific rule to calculate score contribution."""
-        score = 0.0
-        
-        # Required operation types match
-        if all(opType in model.operationTypes for opType in rule.operationTypes):
-            score += 1.0
-        
-        # Preferred capabilities match
-        preferredMatches = sum(1 for cap in rule.preferredCapabilities if cap in model.capabilities)
-        if rule.preferredCapabilities:
-            score += (preferredMatches / len(rule.preferredCapabilities)) * 0.5
-        
-        # Quality rating check
-        if rule.minQualityRating and model.qualityRating >= rule.minQualityRating:
-            score += 0.3
-        
-        # Context length check
-        if rule.minContextLength and model.contextLength >= rule.minContextLength:
-            score += 0.2
-        
-        return score
-    
-    def _applyPriorityScoring(self, model: AiModel, options: AiCallOptions) -> float:
-        """Apply priority-based scoring."""
-        if options.priority == PriorityEnum.SPEED:
-            return model.speedRating * 0.1
-        elif options.priority == PriorityEnum.QUALITY:
-            return model.qualityRating * 0.1
-        elif options.priority == PriorityEnum.COST:
-            # Lower cost = higher score
-            costScore = max(0, 1.0 - (model.costPer1kTokensInput * 1000))
-            return costScore * 0.1
-        else:  # BALANCED
-            return (model.qualityRating + model.speedRating) * 0.05
-    
-    def _applyProcessingModeScoring(self, model: AiModel, options: AiCallOptions) -> float:
-        """Apply processing mode scoring."""
-        if options.processingMode == ProcessingModeEnum.DETAILED:
-            if model.priority == PriorityEnum.QUALITY:
-                return 0.2
-        elif options.processingMode == ProcessingModeEnum.BASIC:
-            if model.priority == PriorityEnum.SPEED:
-                return 0.2
-        
-        return 0.0
-    
-    def _meetsCostConstraints(self, model: AiModel, inputSize: int, options: AiCallOptions) -> bool:
-        """Check if model meets cost constraints."""
-        if options.maxCost is None:
-            return True
-        
-        # Estimate cost
-        estimatedTokens = inputSize / 4
-        estimatedCost = (estimatedTokens / 1000) * model.costPer1kTokensInput
-        
-        return estimatedCost <= options.maxCost
-    
-    def _logSelectionDetails(self, model: AiModel, inputSize: int, options: AiCallOptions):
-        """Log detailed selection information."""
-        logger.info(f"Model Selection Details:")
-        logger.info(f"  Selected: {model.displayName} ({model.name})")
-        logger.info(f"  Connector: {model.connectorType}")
-        logger.info(f"  Operation: {options.operationType}")
-        logger.info(f"  Priority: {options.priority}")
-        logger.info(f"  Processing Mode: {options.processingMode}")
-        logger.info(f"  Input Size: {inputSize} bytes")
-        logger.info(f"  Context Length: {model.contextLength}")
-        logger.info(f"  Max Tokens: {model.maxTokens}")
-        logger.info(f"  Quality Rating: {model.qualityRating}/10")
-        logger.info(f"  Speed Rating: {model.speedRating}/10")
-        logger.info(f"  Cost: ${model.costPer1kTokensInput:.4f}/1k tokens")
-        logger.info(f"  Capabilities: {', '.join(model.capabilities)}")
-        logger.info(f"  Priority: {model.priority}")
-    
-    def getFallbackCriteria(self, operationType: str) -> Dict[str, Any]:
-        """Get fallback selection criteria for an operation type."""
-        return self.config.getFallbackCriteria(operationType)
-    
-    def _selectWithFallbackCriteria(self, 
-                                  availableModels: List[AiModel], 
-                                  fallbackCriteria: Dict[str, Any], 
-                                  inputSize: int, 
-                                  options: AiCallOptions) -> Optional[AiModel]:
-        """Select model using fallback criteria when normal selection fails."""
-        logger.info("Using fallback criteria for model selection")
-        
-        # Filter models by fallback criteria
-        candidates = []
-        for model in availableModels:
-            if not model.isAvailable:
-                continue
-            
-            # Check required operation types
-            if fallbackCriteria.get("operationTypes"):
-                if not all(opType in model.operationTypes for opType in fallbackCriteria["operationTypes"]):
-                    continue
-            
-            # Check quality rating
-            if fallbackCriteria.get("minQualityRating"):
-                if model.qualityRating < fallbackCriteria["minQualityRating"]:
-                    continue
-            
-            # Check cost
-            if fallbackCriteria.get("maxCostPer1k"):
-                if model.costPer1kTokensInput > fallbackCriteria["maxCostPer1k"]:
-                    continue
-            
-            # Check context length
-            if model.contextLength > 0 and inputSize > model.contextLength * 0.8:
-                continue
-            
-            candidates.append(model)
-        
-        if not candidates:
-            logger.error("No models available even with fallback criteria")
-            return None
-        
-        # Sort by priority order from fallback criteria
-        priorityOrder = fallbackCriteria.get("priorityOrder", ["quality", "speed", "cost"])
-        
-        def _getPriorityScore(model: AiModel) -> float:
-            score = 0.0
-            for i, priority in enumerate(priorityOrder):
-                weight = len(priorityOrder) - i  # Higher weight for earlier priorities
-                if priority == "quality":
-                    score += model.qualityRating * weight
-                elif priority == "speed":
-                    score += model.speedRating * weight
-                elif priority == "cost":
-                    # Lower cost = higher score
-                    score += (1.0 - model.costPer1kTokensInput * 1000) * weight
-            return score
-        
-        candidates.sort(key=_getPriorityScore, reverse=True)
-        selectedModel = candidates[0]
-        
-        logger.info(f"Fallback selection: {selectedModel.name} (score: {_getPriorityScore(selectedModel):.2f})")
-        return selectedModel
-
-    def getFallbackModels(self, 
-                         prompt: str, 
-                         context: str, 
-                         options: AiCallOptions,
-                         availableModels: List[AiModel]) -> List[AiModel]:
-        """
-        Get prioritized list of models for fallback sequence.
-        
-        Steps:
-        1. Filter models by capability requirements
-        2. Rate models by business requirements (priority, processing mode)
-        3. Sort by rating (descending), then by cost (ascending)
+        Select the best model using simple filtering and priority-based sorting.
        
        Args:
            prompt: User prompt
@ -294,93 +31,195 @@ class ModelSelector:
            availableModels: List of available models
            
        Returns:
-            Prioritized list of models for fallback sequence
+            Best model for the request, or None if no suitable model found
        """
-        if not availableModels:
-            logger.warning("No models available for fallback selection")
+        try:
+            # Get failover models (which includes all filtering and sorting)
+            failoverModelList = self.getFailoverModelList(prompt, context, options, availableModels)
+            
+            if not failoverModelList:
+                logger.warning("No suitable models found for the request")
+                return None
+            
+            selectedModel = failoverModelList[0]  # First model is the best one
+            logger.info(f"Selected model: {selectedModel.name} (quality: {selectedModel.qualityRating}, cost: ${selectedModel.costPer1kTokensInput:.4f})")
+            return selectedModel
+            
+        except Exception as e:
+            logger.error(f"Error selecting model: {str(e)}")
+            return None
+
+    def getFailoverModelList(self, 
+                         prompt: str, 
+                         context: str, 
+                         options: AiCallOptions,
+                         availableModels: List[AiModel]) -> List[AiModel]:
+        """
+        Get prioritized list of models using scoring-based ranking.
+        
+        Args:
+            prompt: User prompt
+            context: Context data
+            options: AI call options
+            availableModels: List of available models
+            
+        Returns:
+            List of models sorted by score (descending)
+        """
+        try:
+            promptSize = len(prompt.encode("utf-8"))
+            contextSize = len(context.encode("utf-8"))
+            totalSize = promptSize + contextSize
+            
+            # Step 1: Filter by operation type (MUST match)
+            operationFiltered = [m for m in availableModels if options.operationType in m.operationTypes]
+            logger.debug(f"After operation type filtering: {len(operationFiltered)} models")
+            
+            # Step 2: Filter by prompt size (MUST be <= 80% of context size)
+            promptFiltered = [m for m in operationFiltered if m.contextLength == 0 or promptSize <= m.contextLength * 0.8]
+            logger.debug(f"After prompt size filtering: {len(promptFiltered)} models")
+            
+            # Step 3: Calculate scores for each model
+            scoredModels = []
+            for model in promptFiltered:
+                score = self._calculateModelScore(model, promptSize, contextSize, totalSize, options)
+                scoredModels.append((model, score))
+                logger.debug(f"Model {model.name}: score={score:.3f}")
+            
+            # Step 4: Sort by score (descending)
+            scoredModels.sort(key=lambda x: x[1], reverse=True)
+            sortedModels = [model for model, score in scoredModels]
+            
+            logger.debug(f"Final sorted models: {len(sortedModels)} models")
+            return sortedModels
+            
+        except Exception as e:
+            logger.error(f"Error getting failover models: {str(e)}")
            return []
        
-        logger.info(f"Building fallback sequence for operation: {options.operationType}, priority: {options.priority}")
+    def _calculateModelScore(self, model: AiModel, promptSize: int, contextSize: int, totalSize: int, options: AiCallOptions) -> float:
+        """
+        Calculate a score for a model based on how well it fulfills the criteria.
        
-        # Step 1: Filter by capability requirements
-        capableModels = self._filterByCapabilities(availableModels, options)
-        logger.info(f"Step 1 - Capable models: {[m.name for m in capableModels]}")
-        
-        if not capableModels:
-            logger.warning("No models meet capability requirements")
-            return []
-        
-        # Step 2: Rate models by business requirements
-        ratedModels = self._rateModelsByBusinessRequirements(capableModels, prompt, context, options)
-        logger.info(f"Step 2 - Rated models: {[(m.name, rating) for m, rating in ratedModels]}")
-        
-        # Step 3: Sort by rating (descending), then by cost (ascending)
-        sortedModels = self._sortModelsByRatingAndCost(ratedModels)
-        logger.info(f"Step 3 - Sorted fallback sequence: {[m.name for m in sortedModels]}")
-        
-        return sortedModels
-
-    def _filterByCapabilities(self, models: List[AiModel], options: AiCallOptions) -> List[AiModel]:
-        """Filter models by required capabilities."""
-        capableModels = []
-        
-        for model in models:
-            if not model.isAvailable:
-                continue
+        Args:
+            model: The model to score
+            promptSize: Size of the prompt in bytes
+            contextSize: Size of the context in bytes
+            totalSize: Total size (prompt + context) in bytes
+            options: AI call options
            
-            # Check if model supports required capabilities
-            if options.capabilities:
-                if not all(cap in model.capabilities for cap in options.capabilities):
-                    logger.debug(f"Model {model.name} missing required capabilities: {options.capabilities}")
-                    continue
-            
-            # Check operation type compatibility
-            if not self._meetsBasicRequirements(model, options):
-                logger.debug(f"Model {model.name} doesn't meet basic requirements")
-                continue
-            
-            capableModels.append(model)
+        Returns:
+            Score for the model (higher is better)
+        """
+        score = 0.0
        
-        return capableModels
-
-    def _rateModelsByBusinessRequirements(self, 
-                                        models: List[AiModel], 
-                                        prompt: str, 
-                                        context: str, 
-                                        options: AiCallOptions) -> List[Tuple[AiModel, float]]:
-        """Rate models based on business requirements (priority, processing mode)."""
-        ratedModels = []
-        inputSize = len(prompt.encode("utf-8")) + len(context.encode("utf-8"))
+        # 1. Prompt + Context size rating
+        if model.contextLength > 0:
+            modelMaxSize = model.contextLength * 0.8  # 80% of model context length
+            if totalSize <= modelMaxSize:
+                # Within limits: rating = (prompt+contextsize) / (80% modelsize)
+                score += totalSize / modelMaxSize
+            else:
+                # Exceeds limits: rating = modelsize / (prompt+contextsize) (ensures minimum chunks)
+                score += modelMaxSize / totalSize
+        else:
+            # No context length limit
+            score += 1.0
        
-        for model in models:
-            # Base score from model selection logic
-            baseScore = self._calculateModelScore(model, inputSize, options, [])
-            
-            # Apply priority-based scoring
-            priorityScore = self._applyPriorityScoring(model, options)
-            
-            # Apply processing mode scoring
-            processingScore = self._applyProcessingModeScoring(model, options)
-            
-            # Combine scores
-            totalScore = baseScore + priorityScore + processingScore
-            
-            ratedModels.append((model, totalScore))
-            logger.debug(f"Model {model.name}: base={baseScore:.2f}, priority={priorityScore:.2f}, processing={processingScore:.2f}, total={totalScore:.2f}")
+        # 2. Processing Mode rating
+        if hasattr(options, 'processingMode') and options.processingMode:
+            score += self._getProcessingModeRating(model.processingMode, options.processingMode)
+        else:
+            score += 1.0  # No preference
        
-        return ratedModels
-
-    def _sortModelsByRatingAndCost(self, ratedModels: List[Tuple[AiModel, float]]) -> List[AiModel]:
-        """Sort models by rating (descending), then by cost (ascending)."""
-        def sortKey(item):
-            model, rating = item
-            # Primary sort: rating (descending)
-            # Secondary sort: cost (ascending)
-            return (-rating, model.costPer1kTokensInput)
+        # 3. Priority rating
+        if hasattr(options, 'priority') and options.priority:
+            score += self._getPriorityRating(model, options.priority)
+        else:
+            score += 1.0  # No preference
        
-        sortedItems = sorted(ratedModels, key=sortKey)
-        return [model for model, rating in sortedItems]
+        return score
+    
+    def _getProcessingModeRating(self, modelMode: ProcessingModeEnum, requestedMode: ProcessingModeEnum) -> float:
+        """Get processing mode rating based on compatibility."""
+        if modelMode == requestedMode:
+            return 1.0
+        
+        # Compatibility matrix
+        if requestedMode == ProcessingModeEnum.BASIC:
+            if modelMode == ProcessingModeEnum.ADVANCED:
+                return 0.5
+            elif modelMode == ProcessingModeEnum.DETAILED:
+                return 0.2
+        
+        elif requestedMode == ProcessingModeEnum.ADVANCED:
+            if modelMode == ProcessingModeEnum.BASIC:
+                return 0.2
+            elif modelMode == ProcessingModeEnum.DETAILED:
+                return 0.5
+        
+        elif requestedMode == ProcessingModeEnum.DETAILED:
+            if modelMode == ProcessingModeEnum.BASIC:
+                return 0.2
+            elif modelMode == ProcessingModeEnum.ADVANCED:
+                return 0.5
+        
+        return 0.0  # No compatibility
+    
+    def _getPriorityRating(self, model: AiModel, requestedPriority: PriorityEnum) -> float:
+        """Get priority rating based on model capabilities."""
+        if requestedPriority == PriorityEnum.BALANCED:
+            return 1.0
+        
+        elif requestedPriority == PriorityEnum.SPEED:
+            return model.speedRating / 10.0
+        
+        elif requestedPriority == PriorityEnum.QUALITY:
+            return model.qualityRating / 10.0
+        
+        elif requestedPriority == PriorityEnum.COST:
+            # Cost priority: cost gives 1, speed gives 0.5, quality gives 0.2
+            # Lower cost is better, so we invert the cost rating
+            costRating = 1.0 - (model.costPer1kTokensInput / 0.1)  # Normalize to 0-1
+            costRating = max(0, costRating)  # Ensure non-negative
+            
+            speedRating = model.speedRating / 10.0 * 0.5
+            qualityRating = model.qualityRating / 10.0 * 0.2
+            
+            return costRating + speedRating + qualityRating
+        
+        return 1.0  # Default
+    
+    def _getSizeRating(self, model: AiModel, totalSize: int) -> float:
+        """Get size rating for a model based on total input size."""
+        if model.contextLength > 0:
+            modelMaxSize = model.contextLength * 0.8  # 80% of model context length
+            if totalSize <= modelMaxSize:
+                # Within limits: rating = (prompt+contextsize) / (80% modelsize)
+                return totalSize / modelMaxSize
+            else:
+                # Exceeds limits: rating = modelsize / (prompt+contextsize) (ensures minimum chunks)
+                return modelMaxSize / totalSize
+        else:
+            # No context length limit
+            return 1.0
+    
+    
+    def _logModelDetails(self, model: AiModel):
+        """Log detailed information about a model."""
+        logger.info(f"Model: {model.name}")
+        logger.info(f"  Display Name: {model.displayName}")
+        logger.info(f"  Connector: {model.connectorType}")
+        logger.info(f"  Context Length: {model.contextLength}")
+        logger.info(f"  Max Tokens: {model.maxTokens}")
+        logger.info(f"  Quality Rating: {model.qualityRating}/10")
+        logger.info(f"  Speed Rating: {model.speedRating}/10")
+        logger.info(f"  Cost: ${model.costPer1kTokensInput:.4f}/1k tokens")
+        logger.info(f"  Capabilities: {', '.join(model.capabilities)}")
+        logger.info(f"  Priority: {model.priority}")
+        logger.info(f"  Processing Mode: {model.processingMode}")
+        logger.info(f"  Operation Types: {', '.join(model.operationTypes)}")


-# Global selector instance
-model_selector = ModelSelector()
+# Global model selector instance
+modelSelector = ModelSelector()
--- a/modules/aicore/aicorePluginAnthropic.py
+++ b/modules/aicore/aicorePluginAnthropic.py
@ -63,7 +63,7 @@ class AiAnthropic(BaseConnectorAi):
                functionCall=self.callAiBasic,
                priority=PriorityEnum.QUALITY,
                processingMode=ProcessingModeEnum.DETAILED,
-                operationTypes=[OperationTypeEnum.PLAN, OperationTypeEnum.ANALYSE],
+                operationTypes=[OperationTypeEnum.GENERAL, OperationTypeEnum.PLAN, OperationTypeEnum.ANALYSE, OperationTypeEnum.GENERATE],
                version="claude-3-5-sonnet-20241022",
                calculatePriceUsd=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.015 + (bytesReceived / 4 / 1000) * 0.075
            ),
--- a/modules/aicore/aicorePluginInternal.py
+++ b/modules/aicore/aicorePluginInternal.py
@ -34,7 +34,7 @@ class AiInternal(BaseConnectorAi):
                functionCall=self.extractDocument,
                priority=PriorityEnum.COST,
                processingMode=ProcessingModeEnum.BASIC,
-                operationTypes=[OperationTypeEnum.GENERAL],
+                operationTypes=[OperationTypeEnum.EXTRACT],
                version="internal-extractor-v1",
                calculatePriceUsd=lambda processingTime, bytesSent, bytesReceived: 0.001 + (bytesSent + bytesReceived) / (1024 * 1024) * 0.01
            ),
--- a/modules/aicore/aicorePluginOpenai.py
+++ b/modules/aicore/aicorePluginOpenai.py
@ -65,7 +65,7 @@ class AiOpenai(BaseConnectorAi):
                functionCall=self.callAiBasic,
                priority=PriorityEnum.BALANCED,
                processingMode=ProcessingModeEnum.ADVANCED,
-                operationTypes=[OperationTypeEnum.GENERAL, OperationTypeEnum.ANALYSE],
+                operationTypes=[OperationTypeEnum.GENERAL, OperationTypeEnum.PLAN, OperationTypeEnum.ANALYSE, OperationTypeEnum.GENERATE],
                version="gpt-4o",
                calculatePriceUsd=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.03 + (bytesReceived / 4 / 1000) * 0.06
            ),
@ -83,7 +83,7 @@ class AiOpenai(BaseConnectorAi):
                functionCall=self.callAiBasic,
                priority=PriorityEnum.SPEED,
                processingMode=ProcessingModeEnum.BASIC,
-                operationTypes=[OperationTypeEnum.GENERAL],
+                operationTypes=[OperationTypeEnum.GENERAL, OperationTypeEnum.PLAN, OperationTypeEnum.GENERATE],
                version="gpt-3.5-turbo",
                calculatePriceUsd=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.0015 + (bytesReceived / 4 / 1000) * 0.002
            ),
--- a/modules/aicore/aicorePluginPerplexity.py
+++ b/modules/aicore/aicorePluginPerplexity.py
@ -63,7 +63,7 @@ class AiPerplexity(BaseConnectorAi):
                functionCall=self.callAiBasic,
                priority=PriorityEnum.BALANCED,
                processingMode=ProcessingModeEnum.ADVANCED,
-                operationTypes=[OperationTypeEnum.GENERAL, OperationTypeEnum.WEB_RESEARCH],
+                operationTypes=[OperationTypeEnum.GENERAL, OperationTypeEnum.PLAN, OperationTypeEnum.ANALYSE, OperationTypeEnum.GENERATE, OperationTypeEnum.WEB_RESEARCH],
                version="llama-3.1-sonar-large-128k-online",
                calculatePriceUsd=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.005 + (bytesReceived / 4 / 1000) * 0.005
            ),
--- a/modules/datamodels/datamodelAi.py
+++ b/modules/datamodels/datamodelAi.py
@ -1,13 +1,17 @@
-from typing import Optional, List, Dict, Any, Literal, Callable
+from typing import Optional, List, Dict, Any, Literal, Callable, TYPE_CHECKING
 from pydantic import BaseModel, Field
 from enum import Enum

+if TYPE_CHECKING:
+    from modules.datamodels.datamodelExtraction import ContentPart
+
 # Operation Types
 class OperationTypeEnum(str, Enum):
    GENERAL = "general"
    PLAN = "plan"
    ANALYSE = "analyse"
    GENERATE = "generate"
+    EXTRACT = "extract"
    WEB_RESEARCH = "webResearch"
    IMAGE_ANALYSE = "imageAnalyse"
    IMAGE_GENERATE = "imageGenerate"
@ -141,6 +145,7 @@ class AiCallRequest(BaseModel):
    prompt: str = Field(description="The user prompt")
    context: Optional[str] = Field(default=None, description="Optional external context (e.g., extracted docs)")
    options: AiCallOptions = Field(default_factory=AiCallOptions)
+    contentParts: Optional[List['ContentPart']] = None  # NEW: Content parts for model-aware chunking


 class AiCallResponse(BaseModel):
--- a/modules/datamodels/datamodelExtraction.py
+++ b/modules/datamodels/datamodelExtraction.py
@ -28,6 +28,16 @@ class ChunkResult(BaseModel):
    metadata: Dict[str, Any] = Field(default_factory=dict)


+class PartResult(BaseModel):
+    """Preserves the relationship between a content part and its AI result."""
+    originalPart: ContentPart
+    aiResult: str
+    partIndex: int
+    documentId: str
+    processingTime: float = 0.0
+    metadata: Dict[str, Any] = Field(default_factory=dict)
+
+
 class MergeStrategy(BaseModel):
    """Strategy configuration for merging content parts and AI results."""
    
--- a/modules/interfaces/interfaceAiObjects.py
+++ b/modules/interfaces/interfaceAiObjects.py
@ -7,7 +7,7 @@ import time
 logger = logging.getLogger(__name__)

 from modules.aicore.aicoreModelRegistry import modelRegistry
-from modules.aicore.aicoreModelSelector import model_selector
+from modules.aicore.aicoreModelSelector import modelSelector
 from modules.datamodels.datamodelAi import (
    AiModel,
    AiCallOptions, 
@ -70,7 +70,7 @@ class AiObjects:
            raise ValueError("No AI models available")
        
        # Use the dynamic model selector
-        selectedModel = model_selector.selectModel(prompt, context, options, availableModels)
+        selectedModel = modelSelector.selectModel(prompt, context, options, availableModels)
        
        if not selectedModel:
            logger.error("No suitable model found for the given criteria")
@ -81,8 +81,15 @@ class AiObjects:


    async def call(self, request: AiCallRequest) -> AiCallResponse:
-        """Call AI model for text generation with fallback mechanism."""
-        
+        """Call AI model for text generation with model-aware chunking."""
+        # Handle content parts (unified path)
+        if hasattr(request, 'contentParts') and request.contentParts:
+            return await self._callWithContentParts(request)
+        # Handle traditional text/context calls
+        return await self._callWithTextContext(request)
+
+    async def _callWithTextContext(self, request: AiCallRequest) -> AiCallResponse:
+        """Call AI model for traditional text/context calls with fallback mechanism."""
        prompt = request.prompt
        context = request.context or ""
        options = request.options
@ -108,11 +115,11 @@ class AiObjects:
            temperature = 0.2
        maxTokens = getattr(options, "maxTokens", None)

-        # Get fallback models for this operation type
+        # Get failover models for this operation type
        availableModels = modelRegistry.getAvailableModels()
-        fallbackModels = model_selector.getFallbackModels(prompt, context, options, availableModels)
+        failoverModelList = modelSelector.getFailoverModelList(prompt, context, options, availableModels)
        
-        if not fallbackModels:
+        if not failoverModelList:
            errorMsg = f"No suitable models found for operation {options.operationType}"
            logger.error(errorMsg)
            return AiCallResponse(
@ -125,11 +132,11 @@ class AiObjects:
                errorCount=1
            )

-        # Try each model in fallback sequence
+        # Try each model in failover sequence
        lastError = None
-        for attempt, model in enumerate(fallbackModels):
+        for attempt, model in enumerate(failoverModelList):
            try:
-                logger.info(f"Attempting AI call with model: {model.name} (attempt {attempt + 1}/{len(fallbackModels)})")
+                logger.info(f"Attempting AI call with model: {model.name} (attempt {attempt + 1}/{len(failoverModelList)})")
                
                # Call the model
                response = await self._callWithModel(model, prompt, context, temperature, maxTokens, inputBytes)
@ -142,15 +149,15 @@ class AiObjects:
                logger.warning(f"❌ AI call failed with model {model.name}: {str(e)}")
                
                # If this is not the last model, try the next one
-                if attempt < len(fallbackModels) - 1:
-                    logger.info(f"🔄 Trying next fallback model...")
+                if attempt < len(failoverModelList) - 1:
+                    logger.info(f"🔄 Trying next failover model...")
                    continue
                else:
                    # All models failed
-                    logger.error(f"💥 All {len(fallbackModels)} models failed for operation {options.operationType}")
+                    logger.error(f"💥 All {len(failoverModelList)} models failed for operation {options.operationType}")
                    break

-        # All fallback attempts failed - return error response
+        # All failover attempts failed - return error response
        errorMsg = f"All AI models failed for operation {options.operationType}. Last error: {str(lastError)}"
        logger.error(errorMsg)
        return AiCallResponse(
@ -163,6 +170,241 @@ class AiObjects:
            errorCount=1
        )

+    async def _callWithContentParts(self, request: AiCallRequest) -> AiCallResponse:
+        """Process content parts with model-aware chunking (unified for single and multiple parts)."""
+        prompt = request.prompt
+        options = request.options
+        contentParts = request.contentParts
+        
+        # Get failover models
+        availableModels = modelRegistry.getAvailableModels()
+        failoverModelList = modelSelector.getFailoverModelList(prompt, "", options, availableModels)
+        
+        if not failoverModelList:
+            return self._createErrorResponse("No suitable models found", 0, 0)
+        
+        # Process each content part
+        allResults = []
+        for contentPart in contentParts:
+            partResult = await self._processContentPartWithFallback(contentPart, prompt, options, failoverModelList)
+            allResults.append(partResult)
+        
+        # Merge all results
+        mergedContent = self._mergePartResults(allResults)
+        
+        return AiCallResponse(
+            content=mergedContent,
+            modelName="multiple",
+            priceUsd=sum(r.priceUsd for r in allResults),
+            processingTime=sum(r.processingTime for r in allResults),
+            bytesSent=sum(r.bytesSent for r in allResults),
+            bytesReceived=sum(r.bytesReceived for r in allResults),
+            errorCount=sum(r.errorCount for r in allResults)
+        )
+
+    async def _processContentPartWithFallback(self, contentPart, prompt: str, options, failoverModelList) -> AiCallResponse:
+        """Process a single content part with model-aware chunking and fallback."""
+        lastError = None
+        
+        for attempt, model in enumerate(failoverModelList):
+            try:
+                logger.info(f"Processing content part with model: {model.name} (attempt {attempt + 1}/{len(failoverModelList)})")
+                
+                # Check if part fits in model context
+                partSize = len(contentPart.data.encode('utf-8')) if contentPart.data else 0
+                modelContextBytes = model.contextLength * 4  # Convert tokens to bytes
+                
+                if partSize <= modelContextBytes:
+                    # Part fits - call AI directly
+                    response = await self._callWithModel(model, prompt, contentPart.data, 0.2, None, partSize)
+                    logger.info(f"✅ Content part processed successfully with model: {model.name}")
+                    return response
+                else:
+                    # Part too large - chunk it
+                    chunks = await self._chunkContentPart(contentPart, model, options)
+                    if not chunks:
+                        raise ValueError(f"Failed to chunk content part for model {model.name}")
+                    
+                    # Process each chunk
+                    chunkResults = []
+                    for chunk in chunks:
+                        chunkResponse = await self._callWithModel(model, prompt, chunk['data'], 0.2, None, chunk['size'])
+                        chunkResults.append(chunkResponse)
+                    
+                    # Merge chunk results
+                    mergedContent = self._mergeChunkResults(chunkResults)
+                    totalPrice = sum(r.priceUsd for r in chunkResults)
+                    totalTime = sum(r.processingTime for r in chunkResults)
+                    totalBytesSent = sum(r.bytesSent for r in chunkResults)
+                    totalBytesReceived = sum(r.bytesReceived for r in chunkResults)
+                    totalErrors = sum(r.errorCount for r in chunkResults)
+                    
+                    logger.info(f"✅ Content part chunked and processed with model: {model.name} ({len(chunks)} chunks)")
+                    return AiCallResponse(
+                        content=mergedContent,
+                        modelName=model.name,
+                        priceUsd=totalPrice,
+                        processingTime=totalTime,
+                        bytesSent=totalBytesSent,
+                        bytesReceived=totalBytesReceived,
+                        errorCount=totalErrors
+                    )
+                    
+            except Exception as e:
+                lastError = e
+                logger.warning(f"❌ Model {model.name} failed for content part: {str(e)}")
+                
+                if attempt < len(failoverModelList) - 1:
+                    logger.info(f"🔄 Trying next failover model...")
+                    continue
+                else:
+                    logger.error(f"💥 All {len(failoverModelList)} models failed for content part")
+                    break
+        
+        # All models failed
+        return self._createErrorResponse(f"All models failed: {str(lastError)}", 0, 0)
+
+    async def _chunkContentPart(self, contentPart, model, options) -> List[Dict[str, Any]]:
+        """Chunk a content part based on model capabilities."""
+        # Calculate model-specific chunk sizes
+        modelContextBytes = model.contextLength * 4  # Convert tokens to bytes
+        maxContextBytes = int(modelContextBytes * 0.9)  # 90% of context length
+        textChunkSize = int(maxContextBytes * 0.7)  # 70% of max context for text chunks
+        imageChunkSize = int(maxContextBytes * 0.8)  # 80% of max context for image chunks
+        
+        # Build chunking options
+        chunkingOptions = {
+            "textChunkSize": textChunkSize,
+            "imageChunkSize": imageChunkSize,
+            "maxSize": maxContextBytes,
+            "chunkAllowed": True
+        }
+        
+        # Get appropriate chunker
+        from modules.services.serviceExtraction.subRegistry import ChunkerRegistry
+        chunkerRegistry = ChunkerRegistry()
+        chunker = chunkerRegistry.resolve(contentPart.typeGroup)
+        
+        if not chunker:
+            logger.warning(f"No chunker found for typeGroup: {contentPart.typeGroup}")
+            return []
+        
+        # Chunk the content part
+        try:
+            chunks = chunker.chunk(contentPart, chunkingOptions)
+            logger.debug(f"Created {len(chunks)} chunks for {contentPart.typeGroup} part")
+            return chunks
+        except Exception as e:
+            logger.error(f"Chunking failed for {contentPart.typeGroup}: {str(e)}")
+            return []
+
+    def _mergePartResults(self, partResults: List[AiCallResponse]) -> str:
+        """Merge part results using the existing sophisticated merging system."""
+        if not partResults:
+            return ""
+        
+        # Convert AiCallResponse results to ContentParts for merging
+        from modules.datamodels.datamodelExtraction import ContentPart
+        from modules.services.serviceExtraction.subUtils import makeId
+        
+        content_parts = []
+        for i, result in enumerate(partResults):
+            if result.content:
+                content_part = ContentPart(
+                    id=makeId(),
+                    parentId=None,
+                    label=f"ai_result_{i}",
+                    typeGroup="text",  # Default to text for AI results
+                    mimeType="text/plain",
+                    data=result.content,
+                    metadata={
+                        "aiResult": True,
+                        "modelName": result.modelName,
+                        "priceUsd": result.priceUsd,
+                        "processingTime": result.processingTime,
+                        "bytesSent": result.bytesSent,
+                        "bytesReceived": result.bytesReceived
+                    }
+                )
+                content_parts.append(content_part)
+        
+        # Use existing merging system
+        merge_strategy = {
+            "useIntelligentMerging": True,
+            "groupBy": "typeGroup",
+            "orderBy": "id",
+            "mergeType": "concatenate"
+        }
+        
+        from modules.services.serviceExtraction.subPipeline import _applyMerging
+        merged_parts = _applyMerging(content_parts, merge_strategy)
+        
+        # Convert merged parts back to final string
+        final_content = "\n\n".join([part.data for part in merged_parts])
+        
+        logger.info(f"Merged {len(partResults)} AI results using existing merging system")
+        return final_content.strip()
+
+    def _mergeChunkResults(self, chunkResults: List[AiCallResponse]) -> str:
+        """Merge chunk results using the existing sophisticated merging system."""
+        if not chunkResults:
+            return ""
+        
+        # Convert AiCallResponse results to ContentParts for merging
+        from modules.datamodels.datamodelExtraction import ContentPart
+        from modules.services.serviceExtraction.subUtils import makeId
+        
+        content_parts = []
+        for i, result in enumerate(chunkResults):
+            if result.content:
+                content_part = ContentPart(
+                    id=makeId(),
+                    parentId=None,
+                    label=f"chunk_result_{i}",
+                    typeGroup="text",  # Default to text for AI results
+                    mimeType="text/plain",
+                    data=result.content,
+                    metadata={
+                        "aiResult": True,
+                        "chunk": True,
+                        "modelName": result.modelName,
+                        "priceUsd": result.priceUsd,
+                        "processingTime": result.processingTime,
+                        "bytesSent": result.bytesSent,
+                        "bytesReceived": result.bytesReceived
+                    }
+                )
+                content_parts.append(content_part)
+        
+        # Use existing merging system
+        merge_strategy = {
+            "useIntelligentMerging": True,
+            "groupBy": "typeGroup",
+            "orderBy": "id",
+            "mergeType": "concatenate"
+        }
+        
+        from modules.services.serviceExtraction.subPipeline import _applyMerging
+        merged_parts = _applyMerging(content_parts, merge_strategy)
+        
+        # Convert merged parts back to final string
+        final_content = "\n\n".join([part.data for part in merged_parts])
+        
+        logger.info(f"Merged {len(chunkResults)} chunk results using existing merging system")
+        return final_content.strip()
+
+    def _createErrorResponse(self, errorMsg: str, inputBytes: int, outputBytes: int) -> AiCallResponse:
+        """Create an error response."""
+        return AiCallResponse(
+            content=errorMsg,
+            modelName="error",
+            priceUsd=0.0,
+            processingTime=0.0,
+            bytesSent=inputBytes,
+            bytesReceived=outputBytes,
+            errorCount=1
+        )
+
    async def _callWithModel(self, model: AiModel, prompt: str, context: str, temperature: float, maxTokens: int, inputBytes: int) -> AiCallResponse:
        """Call a specific model and return the response."""
        # Replace <TOKEN_LIMIT> placeholder in prompt for this specific model
@ -245,9 +487,9 @@ class AiObjects:
        
        # Get fallback models for image analysis
        availableModels = modelRegistry.getAvailableModels()
-        fallbackModels = model_selector.getFallbackModels(prompt, "", options, availableModels)
+        failoverModelList = modelSelector.getFailoverModelList(prompt, "", options, availableModels)
        
-        if not fallbackModels:
+        if not failoverModelList:
            errorMsg = f"No suitable models found for image analysis"
            logger.error(errorMsg)
            return AiCallResponse(
@ -262,9 +504,9 @@ class AiObjects:

        # Try each model in fallback sequence
        lastError = None
-        for attempt, model in enumerate(fallbackModels):
+        for attempt, model in enumerate(failoverModelList):
            try:
-                logger.info(f"Attempting image analysis with model: {model.name} (attempt {attempt + 1}/{len(fallbackModels)})")
+                logger.info(f"Attempting image analysis with model: {model.name} (attempt {attempt + 1}/{len(failoverModelList)})")
                
                # Call the model
                response = await self._callImageWithModel(model, prompt, imageData, mimeType, inputBytes)
@ -277,12 +519,12 @@ class AiObjects:
                logger.warning(f"❌ Image analysis failed with model {model.name}: {str(e)}")
                
                # If this is not the last model, try the next one
-                if attempt < len(fallbackModels) - 1:
+                if attempt < len(failoverModelList) - 1:
                    logger.info(f"🔄 Trying next fallback model for image analysis...")
                    continue
                else:
                    # All models failed
-                    logger.error(f"💥 All {len(fallbackModels)} models failed for image analysis")
+                    logger.error(f"💥 All {len(failoverModelList)} models failed for image analysis")
                    break

        # All fallback attempts failed - return error response
--- a/modules/services/serviceAi/subDocumentProcessing.py
+++ b/modules/services/serviceAi/subDocumentProcessing.py
@ -54,8 +54,8 @@ class SubDocumentProcessing:
        options: Optional[AiCallOptions] = None
        ) -> str:
        """
-        Process documents with per-chunk AI calls and merge results.
-        FIXED: Now preserves chunk relationships and document structure.
+        Process documents with model-aware chunking and merge results.
+        NEW: Uses model-aware chunking in AI call phase instead of extraction phase.
        
        Args:
            documents: List of ChatDocument objects to process
@ -68,23 +68,14 @@ class SubDocumentProcessing:
        if not documents:
            return ""
        
-        # Get model capabilities for size calculation
-        model_capabilities = self._getModelCapabilitiesForContent(prompt, documents, options)
-        
-        # Build extraction options for chunking with intelligent merging
+        # Build extraction options WITHOUT chunking parameters
        extractionOptions: Dict[str, Any] = {
            "prompt": prompt,
            "operationType": options.operationType if options else "general",
-            "processDocumentsIndividually": True,  # Process each document separately
-            "maxSize": model_capabilities["maxContextBytes"],
-            "chunkAllowed": True,
-            "textChunkSize": model_capabilities["textChunkSize"],
-            "imageChunkSize": model_capabilities["imageChunkSize"],
-            "imageMaxPixels": 1024 * 1024,
-            "imageQuality": 85,
+            "processDocumentsIndividually": True,
+            # REMOVED: maxSize, textChunkSize, imageChunkSize
            "mergeStrategy": {
-                "useIntelligentMerging": True,  # Enable intelligent token-aware merging
-                "capabilities": model_capabilities,
+                "useIntelligentMerging": True,
                "prompt": prompt,
                "groupBy": "typeGroup",
                "orderBy": "id",
@ -95,17 +86,17 @@ class SubDocumentProcessing:
        logger.debug(f"Per-chunk extraction options: prompt length={len(extractionOptions.get('prompt', ''))} chars, operationType={extractionOptions.get('operationType')}")
        
        try:
-            # Extract content with chunking
+            # Extract content WITHOUT chunking
            extractionResult = self.extractionService.extractContent(documents, extractionOptions)
            
            if not isinstance(extractionResult, list):
                return "[Error: No extraction results]"
            
-            # FIXED: Process chunks with proper mapping
-            chunkResults = await self._processChunksWithMapping(extractionResult, prompt, options)
+            # Process parts (not chunks) with model-aware AI calls
+            partResults = await self._processPartsWithMapping(extractionResult, prompt, options)
            
-            # FIXED: Merge with preserved chunk relationships
-            mergedContent = self._mergeChunkResults(chunkResults, options)
+            # Merge results using existing merging system
+            mergedContent = self._mergePartResults(partResults, options)
            
            # Save merged extraction content to debug
            self.services.utils.writeDebugFile(mergedContent or '', "extractionMergedText")
@ -123,29 +114,19 @@ class SubDocumentProcessing:
        options: Optional[AiCallOptions] = None
        ) -> Dict[str, Any]:
        """
-        Process documents with per-chunk AI calls and merge results in JSON mode.
+        Process documents with model-aware chunking and merge results in JSON mode.
        Returns structured JSON document instead of text.
        """
        if not documents:
            return {"metadata": {"title": "Empty Document"}, "sections": []}
        
-        # Get model capabilities for size calculation
-        model_capabilities = self._getModelCapabilitiesForContent(prompt, documents, options)
-        
-        # Build extraction options for chunking with intelligent merging
+        # Build extraction options WITHOUT chunking parameters
        extractionOptions: Dict[str, Any] = {
            "prompt": prompt,
            "operationType": options.operationType if options else "general",
-            "processDocumentsIndividually": True,  # Process each document separately
-            "maxSize": model_capabilities["maxContextBytes"],
-            "chunkAllowed": True,
-            "textChunkSize": model_capabilities["textChunkSize"],
-            "imageChunkSize": model_capabilities["imageChunkSize"],
-            "imageMaxPixels": 1024 * 1024,
-            "imageQuality": 85,
+            "processDocumentsIndividually": True,
            "mergeStrategy": {
-                "useIntelligentMerging": True,  # Enable intelligent token-aware merging
-                "capabilities": model_capabilities,
+                "useIntelligentMerging": True,
                "prompt": prompt,
                "groupBy": "typeGroup",
                "orderBy": "id",
@ -156,17 +137,17 @@ class SubDocumentProcessing:
        logger.debug(f"Per-chunk extraction options (JSON mode): prompt length={len(extractionOptions.get('prompt', ''))} chars, operationType={extractionOptions.get('operationType')}")
        
        try:
-            # Extract content with chunking
+            # Extract content WITHOUT chunking
            extractionResult = self.extractionService.extractContent(documents, extractionOptions)
            
            if not isinstance(extractionResult, list):
                return {"metadata": {"title": "Error Document"}, "sections": []}
            
-            # Process chunks with proper mapping
-            chunkResults = await self._processChunksWithMapping(extractionResult, prompt, options, generate_json=True)
+            # Process parts with model-aware chunking
+            partResults = await self._processPartsWithMapping(extractionResult, prompt, options)
            
-            # Merge with JSON mode
-            mergedJsonDocument = self._mergeChunkResultsJson(chunkResults, options)
+            # Convert to JSON format (simplified for now)
+            mergedJsonDocument = self._convertPartResultsToJson(partResults, options)

            # Normalize merged JSON into a single canonical table (only if table content exists)
            try:
@ -505,6 +486,127 @@ CONTINUATION INSTRUCTIONS:
        """
        return await self.processDocumentsPerChunk(documents, prompt, options)

+    async def _processPartsWithMapping(
+        self,
+        extractionResult: List[ContentExtracted],
+        prompt: str,
+        options: Optional[AiCallOptions] = None
+        ) -> List['PartResult']:
+        """Process content parts with model-aware chunking and proper mapping."""
+        from modules.datamodels.datamodelExtraction import PartResult
+        import asyncio
+        
+        # Collect all parts that need processing
+        parts_to_process = []
+        part_index = 0
+            
+        for ec in extractionResult:
+            for part in ec.parts:
+                if part.typeGroup in ("text", "table", "structure", "image", "container", "binary"):
+                    # Skip empty container parts
+                    if part.typeGroup == "container" and (not part.data or len(part.data.strip()) == 0):
+                        logger.debug(f"Skipping empty container part: mimeType={part.mimeType}")
+                        continue
+                    
+                    parts_to_process.append({
+                        'part': part,
+                        'part_index': part_index,
+                        'document_id': ec.id
+                    })
+                    part_index += 1
+        
+        logger.info(f"Processing {len(parts_to_process)} parts with model-aware chunking")
+        
+        # Process parts in parallel
+        async def process_single_part(part_info: Dict) -> PartResult:
+            part = part_info['part']
+            part_index = part_info['part_index']
+            document_id = part_info['document_id']
+            
+            start_time = time.time()
+            
+            try:
+                # Create AI call request with content part
+                from modules.datamodels.datamodelAi import AiCallRequest
+                request = AiCallRequest(
+                    prompt=prompt,
+                    context="",  # Context is in the content part
+                    options=options,
+                    contentParts=[part]  # Pass as list for unified processing
+                )
+                
+                # Call AI with model-aware chunking
+                response = await self.aiObjects.call(request)
+                
+                processing_time = time.time() - start_time
+                
+                return PartResult(
+                    originalPart=part,
+                    aiResult=response.content,
+                    partIndex=part_index,
+                    documentId=document_id,
+                    processingTime=processing_time,
+                    metadata={
+                        "success": True,
+                        "partSize": len(part.data) if part.data else 0,
+                        "resultSize": len(response.content),
+                        "typeGroup": part.typeGroup,
+                        "modelName": response.modelName,
+                        "priceUsd": response.priceUsd
+                    }
+                )
+                
+            except Exception as e:
+                processing_time = time.time() - start_time
+                logger.warning(f"Error processing part {part_index}: {str(e)}")
+                
+                return PartResult(
+                    originalPart=part,
+                    aiResult=f"[Error processing part: {str(e)}]",
+                    partIndex=part_index,
+                    documentId=document_id,
+                    processingTime=processing_time,
+                    metadata={
+                        "success": False,
+                        "error": str(e),
+                        "partSize": len(part.data) if part.data else 0,
+                        "typeGroup": part.typeGroup
+                    }
+                )
+        
+        # Process parts with concurrency control
+        max_concurrent = 5
+        if options and hasattr(options, 'maxConcurrentParts'):
+            max_concurrent = options.maxConcurrentParts
+        
+        semaphore = asyncio.Semaphore(max_concurrent)
+        
+        async def process_with_semaphore(part_info):
+            async with semaphore:
+                return await process_single_part(part_info)
+        
+        tasks = [process_with_semaphore(part_info) for part_info in parts_to_process]
+        part_results = await asyncio.gather(*tasks, return_exceptions=True)
+        
+        # Handle exceptions
+        processed_results = []
+        for i, result in enumerate(part_results):
+            if isinstance(result, Exception):
+                part_info = parts_to_process[i]
+                processed_results.append(PartResult(
+                    originalPart=part_info['part'],
+                    aiResult=f"[Error in parallel processing: {str(result)}]",
+                    partIndex=part_info['part_index'],
+                    documentId=part_info['document_id'],
+                    processingTime=0.0,
+                    metadata={"success": False, "error": str(result)}
+                ))
+            elif result is not None:
+                processed_results.append(result)
+        
+        logger.info(f"Completed processing {len(processed_results)} parts")
+        return processed_results
+
    async def _processChunksWithMapping(
        self,
        extractionResult: List[ContentExtracted],
@ -907,340 +1009,451 @@ CONTINUATION INSTRUCTIONS:
        logger.info(f"Completed processing {len(processed_results)} chunks")
        return processed_results

+    def _mergePartResults(
+        self,
+        partResults: List['PartResult'],
+        options: Optional[AiCallOptions] = None
+        ) -> str:
+        """Merge part results using existing sophisticated merging system."""
+        if not partResults:
+            return ""
+        
+        # Convert PartResults back to ContentParts for existing merger system
+        from modules.datamodels.datamodelExtraction import ContentPart
+        content_parts = []
+        for part_result in partResults:
+            # Create ContentPart from PartResult with proper typeGroup
+            content_part = ContentPart(
+                id=part_result.originalPart.id,
+                parentId=part_result.originalPart.parentId,
+                label=part_result.originalPart.label,
+                typeGroup=part_result.originalPart.typeGroup,  # Use original typeGroup
+                mimeType=part_result.originalPart.mimeType,
+                data=part_result.aiResult,  # Use AI result as data
+                metadata={
+                    **part_result.originalPart.metadata,
+                    "aiResult": True,
+                    "partIndex": part_result.partIndex,
+                    "documentId": part_result.documentId,
+                    "processingTime": part_result.processingTime,
+                    "success": part_result.metadata.get("success", False)
+                }
+            )
+            content_parts.append(content_part)
+        
+        # Use existing merging strategy from options
+        merge_strategy = {
+            "useIntelligentMerging": True,
+            "groupBy": "documentId",  # Group by document
+            "orderBy": "partIndex",   # Order by part index
+            "mergeType": "concatenate"
+        }
+        
+        if options and hasattr(options, 'mergeStrategy'):
+            merge_strategy.update(options.mergeStrategy)
+        
+        # Apply existing merging logic using the sophisticated merging system
+        from modules.services.serviceExtraction.subPipeline import _applyMerging
+        merged_parts = _applyMerging(content_parts, merge_strategy)
+        
+        # Convert merged parts back to final string
+        final_content = "\n\n".join([part.data for part in merged_parts])
+        
+        logger.info(f"Merged {len(partResults)} parts using existing sophisticated merging system")
+        return final_content.strip()
+
+    def _convertPartResultsToJson(
+        self,
+        partResults: List['PartResult'],
+        options: Optional[AiCallOptions] = None
+        ) -> Dict[str, Any]:
+        """Convert part results to JSON format using existing sophisticated merging system."""
+        if not partResults:
+            return {"metadata": {"title": "Empty Document"}, "sections": []}
+        
+        # Convert PartResults back to ContentParts for existing merger system
+        from modules.datamodels.datamodelExtraction import ContentPart
+        content_parts = []
+        for part_result in partResults:
+            # Create ContentPart from PartResult with proper typeGroup
+            content_part = ContentPart(
+                id=part_result.originalPart.id,
+                parentId=part_result.originalPart.parentId,
+                label=part_result.originalPart.label,
+                typeGroup=part_result.originalPart.typeGroup,  # Use original typeGroup
+                mimeType=part_result.originalPart.mimeType,
+                data=part_result.aiResult,  # Use AI result as data
+                metadata={
+                    **part_result.originalPart.metadata,
+                    "aiResult": True,
+                    "partIndex": part_result.partIndex,
+                    "documentId": part_result.documentId,
+                    "processingTime": part_result.processingTime,
+                    "success": part_result.metadata.get("success", False)
+                }
+            )
+            content_parts.append(content_part)
+        
+        # Use existing merging strategy for JSON mode
+        merge_strategy = {
+            "useIntelligentMerging": True,
+            "groupBy": "documentId",  # Group by document
+            "orderBy": "partIndex",   # Order by part index
+            "mergeType": "concatenate"
+        }
+        
+        if options and hasattr(options, 'mergeStrategy'):
+            merge_strategy.update(options.mergeStrategy)
+        
+        # Apply existing merging logic using the sophisticated merging system
+        from modules.services.serviceExtraction.subPipeline import _applyMerging
+        merged_parts = _applyMerging(content_parts, merge_strategy)
+        
+        # Convert merged parts to JSON format
+        all_sections = []
+        document_titles = []
+        
+        for part in merged_parts:
+            if part.metadata.get("success", False):
+                try:
+                    # Parse JSON from AI result
+                    part_json = json.loads(part.data)
+                    
+                    # Check if this is a multi-file response (has "documents" key)
+                    if isinstance(part_json, dict) and "documents" in part_json:
+                        # This is a multi-file response - merge all documents
+                        logger.debug(f"Processing multi-file response from part {part.id} with {len(part_json['documents'])} documents")
+                        
+                        # Return multi-file response directly
+                        return {
+                            "metadata": part_json.get("metadata", {"title": "Merged Document"}),
+                            "documents": part_json["documents"]
+                        }
+                    
+                    # Extract sections from single-file response
+                    elif isinstance(part_json, dict) and "sections" in part_json:
+                        for section in part_json["sections"]:
+                            # Add part context to section
+                            section["metadata"] = section.get("metadata", {})
+                            section["metadata"]["source_part"] = part.id
+                            section["metadata"]["source_document"] = part.metadata.get("documentId", "unknown")
+                            section["metadata"]["part_index"] = part.metadata.get("partIndex", 0)
+                            all_sections.append(section)
+                    
+                    # Extract document title
+                    if isinstance(part_json, dict) and "metadata" in part_json:
+                        title = part_json["metadata"].get("title", "")
+                        if title and title not in document_titles:
+                            document_titles.append(title)
+                            
+                except json.JSONDecodeError as e:
+                    logger.warning(f"Failed to parse JSON from part {part.id}: {str(e)}")
+                    # Create a fallback section for invalid JSON
+                    fallback_section = {
+                        "id": f"error_section_{part.id}",
+                        "title": "Error Section",
+                        "content_type": "paragraph",
+                        "elements": [{
+                            "text": f"Error parsing part {part.id}: {str(e)}"
+                        }],
+                        "order": part.metadata.get("partIndex", 0),
+                        "metadata": {
+                            "source_document": part.metadata.get("documentId", "unknown"),
+                            "part_id": part.id,
+                            "error": str(e)
+                        }
+                    }
+                    all_sections.append(fallback_section)
+            else:
+                # Handle error parts
+                error_section = {
+                    "id": f"error_section_{part.id}",
+                    "title": "Error Section",
+                    "content_type": "paragraph",
+                    "elements": [{
+                        "text": f"Error in part {part.id}: {part.metadata.get('error', 'Unknown error')}"
+                    }],
+                    "order": part.metadata.get("partIndex", 0),
+                    "metadata": {
+                        "source_document": part.metadata.get("documentId", "unknown"),
+                        "part_id": part.id,
+                        "error": part.metadata.get('error', 'Unknown error')
+                    }
+                }
+                all_sections.append(error_section)
+        
+        # Sort sections by order
+        all_sections.sort(key=lambda x: x.get("order", 0))
+        
+        # Create merged document with sections
+        merged_document = {
+            "metadata": {
+                "title": document_titles[0] if document_titles else "Merged Document",
+                "extraction_method": "model_aware_chunking_with_merging",
+                "version": "2.0"
+            },
+            "sections": all_sections,
+            "summary": f"Merged document using sophisticated merging system",
+            "tags": ["merged", "ai_generated", "model_aware", "sophisticated_merging"]
+        }
+        
+        logger.info(f"Converted {len(partResults)} parts to JSON format using existing sophisticated merging system")
+        return merged_document
+
    def _mergeChunkResults(
        self,
        chunkResults: List[ChunkResult],
        options: Optional[AiCallOptions] = None
        ) -> str:
-        """Merge chunk results while preserving document structure and chunk order."""
-        
+        """Merge chunk results using existing sophisticated merging system."""
        if not chunkResults:
            return ""
        
-        # Get merging configuration from options
-        chunk_separator = "\n\n---\n\n"
-        include_document_headers = True
-        include_chunk_metadata = False
-        
-        if options:
-            if hasattr(options, 'chunkSeparator'):
-                chunk_separator = options.chunkSeparator
-            elif hasattr(options, 'mergeStrategy') and options.mergeStrategy:
-                chunk_separator = options.mergeStrategy.get("chunkSeparator", "\n\n---\n\n")
-            
-            # Check for enhanced options
-            if hasattr(options, 'preserveChunkMetadata'):
-                include_chunk_metadata = options.preserveChunkMetadata
-        
-        # Group chunk results by document
-        results_by_document = {}
+        # Convert ChunkResults back to ContentParts for existing merger system
+        from modules.datamodels.datamodelExtraction import ContentPart
+        content_parts = []
        for chunk_result in chunkResults:
-            doc_id = chunk_result.documentId
-            if doc_id not in results_by_document:
-                results_by_document[doc_id] = []
-            results_by_document[doc_id].append(chunk_result)
+            # Create ContentPart from ChunkResult with proper typeGroup
+            content_part = ContentPart(
+                id=chunk_result.originalChunk.id,
+                parentId=chunk_result.originalChunk.parentId,
+                label=chunk_result.originalChunk.label,
+                typeGroup=chunk_result.originalChunk.typeGroup,  # Use original typeGroup
+                mimeType=chunk_result.originalChunk.mimeType,
+                data=chunk_result.aiResult,  # Use AI result as data
+                metadata={
+                    **chunk_result.originalChunk.metadata,
+                    "aiResult": True,
+                    "chunk": True,
+                    "chunkIndex": chunk_result.chunkIndex,
+                    "documentId": chunk_result.documentId,
+                    "processingTime": chunk_result.processingTime,
+                    "success": chunk_result.metadata.get("success", False)
+                }
+            )
+            content_parts.append(content_part)
        
-        # Sort chunks within each document by chunk index
-        for doc_id in results_by_document:
-            results_by_document[doc_id].sort(key=lambda x: x.chunkIndex)
+        # Use existing merging strategy from options
+        merge_strategy = {
+            "useIntelligentMerging": True,
+            "groupBy": "documentId",  # Group by document
+            "orderBy": "chunkIndex",  # Order by chunk index
+            "mergeType": "concatenate"
+        }
        
-        # Merge results for each document
-        merged_documents = []
+        if options and hasattr(options, 'mergeStrategy'):
+            merge_strategy.update(options.mergeStrategy)
        
-        for doc_id, doc_chunks in results_by_document.items():
-            # Build document header if enabled
-            doc_header = ""
-            if include_document_headers:
-                doc_header = f"\n\n=== DOCUMENT: {doc_id} ===\n\n"
-            
-            # Merge chunks for this document
-            doc_content = ""
-            for i, chunk_result in enumerate(doc_chunks):
-                # Add chunk separator (except for first chunk)
-                if i > 0:
-                    doc_content += chunk_separator
-                
-                # Add chunk content with optional metadata
-                chunk_metadata = chunk_result.metadata
-                if chunk_metadata.get("success", False):
-                    chunk_content = chunk_result.aiResult
-                    
-                    # Add chunk metadata if enabled
-                    if include_chunk_metadata:
-                        chunk_info = f"[Chunk {chunk_result.chunkIndex} - {chunk_metadata.get('typeGroup', 'unknown')} - {chunk_metadata.get('chunkSize', 0)} chars]"
-                        chunk_content = f"{chunk_info}\n{chunk_content}"
-                    
-                    doc_content += chunk_content
-                else:
-                    # Handle error chunks
-                    error_msg = f"[ERROR in chunk {chunk_result.chunkIndex}: {chunk_metadata.get('error', 'Unknown error')}]"
-                    doc_content += error_msg
-            
-            merged_documents.append(doc_header + doc_content)
+        # Apply existing merging logic using the sophisticated merging system
+        from modules.services.serviceExtraction.subPipeline import _applyMerging
+        merged_parts = _applyMerging(content_parts, merge_strategy)
        
-        # Join all documents
-        final_result = "\n\n".join(merged_documents)
+        # Convert merged parts back to final string
+        final_content = "\n\n".join([part.data for part in merged_parts])
        
-        logger.info(f"Merged {len(chunkResults)} chunks from {len(results_by_document)} documents")
-        return final_result.strip()
+        logger.info(f"Merged {len(chunkResults)} chunks using existing sophisticated merging system")
+        return final_content.strip()

    def _mergeChunkResultsClean(
        self,
        chunkResults: List[ChunkResult],
        options: Optional[AiCallOptions] = None
        ) -> str:
-        """Merge chunk results in CLEAN mode - no debug metadata or document headers."""
-        
+        """Merge chunk results in CLEAN mode using existing sophisticated merging system."""
        if not chunkResults:
            return ""
        
-        # Get merging configuration from options
-        chunk_separator = "\n\n"
-        include_document_headers = False  # CLEAN MODE: No document headers
-        include_chunk_metadata = False    # CLEAN MODE: No chunk metadata
-        
-        if options:
-            if hasattr(options, 'chunkSeparator'):
-                chunk_separator = options.chunkSeparator
-            elif hasattr(options, 'mergeStrategy') and options.mergeStrategy:
-                chunk_separator = options.mergeStrategy.get("chunkSeparator", "\n\n")
-        
-        # Group chunk results by document
-        results_by_document = {}
+        # Convert ChunkResults back to ContentParts for existing merger system
+        from modules.datamodels.datamodelExtraction import ContentPart
+        content_parts = []
        for chunk_result in chunkResults:
-            doc_id = chunk_result.documentId
-            if doc_id not in results_by_document:
-                results_by_document[doc_id] = []
-            results_by_document[doc_id].append(chunk_result)
-        
-        # Sort chunks within each document by chunk index
-        for doc_id in results_by_document:
-            results_by_document[doc_id].sort(key=lambda x: x.chunkIndex)
-        
-        # Merge results for each document in CLEAN mode
-        merged_documents = []
-        
-        for doc_id, doc_chunks in results_by_document.items():
-            # CLEAN MODE: No document headers
-            doc_header = ""
+            # Skip empty or error chunks in clean mode
+            if not chunk_result.metadata.get("success", False):
+                continue
+            if not chunk_result.aiResult or not chunk_result.aiResult.strip():
+                continue
+            # Skip container/binary chunks in clean mode
+            if chunk_result.aiResult.startswith("[Skipped ") and "content:" in chunk_result.aiResult:
+                continue
            
-            # Merge chunks for this document
-            doc_content = ""
-            for i, chunk_result in enumerate(doc_chunks):
-                # Add chunk separator (except for first chunk)
-                if i > 0:
-                    doc_content += chunk_separator
-                
-                # Add chunk content without metadata
-                chunk_metadata = chunk_result.metadata
-                if chunk_metadata.get("success", False):
-                    chunk_content = chunk_result.aiResult
-                    
-                    # CLEAN MODE: Skip container/binary chunks entirely
-                    if chunk_content.startswith("[Skipped ") and "content:" in chunk_content:
-                        continue  # Skip container/binary chunks in clean mode
-                    
-                    # CLEAN MODE: Skip empty or whitespace-only chunks
-                    if not chunk_content.strip():
-                        continue  # Skip empty chunks in clean mode
-                    
-                    # CLEAN MODE: No chunk metadata
-                    doc_content += chunk_content
-                else:
-                    # Handle error chunks silently in clean mode
-                    continue
-            
-            merged_documents.append(doc_header + doc_content)
+            # Create ContentPart from ChunkResult with proper typeGroup
+            content_part = ContentPart(
+                id=chunk_result.originalChunk.id,
+                parentId=chunk_result.originalChunk.parentId,
+                label=chunk_result.originalChunk.label,
+                typeGroup=chunk_result.originalChunk.typeGroup,  # Use original typeGroup
+                mimeType=chunk_result.originalChunk.mimeType,
+                data=chunk_result.aiResult,  # Use AI result as data
+                metadata={
+                    **chunk_result.originalChunk.metadata,
+                    "aiResult": True,
+                    "chunk": True,
+                    "chunkIndex": chunk_result.chunkIndex,
+                    "documentId": chunk_result.documentId,
+                    "processingTime": chunk_result.processingTime,
+                    "success": chunk_result.metadata.get("success", False)
+                }
+            )
+            content_parts.append(content_part)
        
-        # Join all documents
-        final_result = "\n\n".join(merged_documents)
+        # Use existing merging strategy for clean mode
+        merge_strategy = {
+            "useIntelligentMerging": True,
+            "groupBy": "documentId",  # Group by document
+            "orderBy": "chunkIndex",  # Order by chunk index
+            "mergeType": "concatenate"
+        }
        
-        return final_result.strip()
+        if options and hasattr(options, 'mergeStrategy'):
+            merge_strategy.update(options.mergeStrategy)
+        
+        # Apply existing merging logic using the sophisticated merging system
+        from modules.services.serviceExtraction.subPipeline import _applyMerging
+        merged_parts = _applyMerging(content_parts, merge_strategy)
+        
+        # Convert merged parts back to final string
+        final_content = "\n\n".join([part.data for part in merged_parts])
+        
+        logger.info(f"Merged {len(content_parts)} chunks in clean mode using existing sophisticated merging system")
+        return final_content.strip()

    def _mergeChunkResultsJson(
        self,
        chunkResults: List[ChunkResult],
        options: Optional[AiCallOptions] = None
        ) -> Dict[str, Any]:
-        """Merge chunk results in JSON mode - returns structured JSON document."""
-        
+        """Merge chunk results in JSON mode using existing sophisticated merging system."""
        if not chunkResults:
            return {"metadata": {"title": "Empty Document"}, "sections": []}
        
-        # Group chunk results by document
-        results_by_document = {}
+        # Convert ChunkResults back to ContentParts for existing merger system
+        from modules.datamodels.datamodelExtraction import ContentPart
+        content_parts = []
        for chunk_result in chunkResults:
-            doc_id = chunk_result.documentId
-            if doc_id not in results_by_document:
-                results_by_document[doc_id] = []
-            results_by_document[doc_id].append(chunk_result)
+            # Create ContentPart from ChunkResult with proper typeGroup
+            content_part = ContentPart(
+                id=chunk_result.originalChunk.id,
+                parentId=chunk_result.originalChunk.parentId,
+                label=chunk_result.originalChunk.label,
+                typeGroup=chunk_result.originalChunk.typeGroup,  # Use original typeGroup
+                mimeType=chunk_result.originalChunk.mimeType,
+                data=chunk_result.aiResult,  # Use AI result as data
+                metadata={
+                    **chunk_result.originalChunk.metadata,
+                    "aiResult": True,
+                    "chunk": True,
+                    "chunkIndex": chunk_result.chunkIndex,
+                    "documentId": chunk_result.documentId,
+                    "processingTime": chunk_result.processingTime,
+                    "success": chunk_result.metadata.get("success", False)
+                }
+            )
+            content_parts.append(content_part)
        
-        # Sort chunks within each document by chunk index
-        for doc_id in results_by_document:
-            results_by_document[doc_id].sort(key=lambda x: x.chunkIndex)
+        # Use existing merging strategy for JSON mode
+        merge_strategy = {
+            "useIntelligentMerging": True,
+            "groupBy": "documentId",  # Group by document
+            "orderBy": "chunkIndex",  # Order by chunk index
+            "mergeType": "concatenate"
+        }
        
-        # Merge JSON results for each document
-        all_documents = []
+        if options and hasattr(options, 'mergeStrategy'):
+            merge_strategy.update(options.mergeStrategy)
+        
+        # Apply existing merging logic using the sophisticated merging system
+        from modules.services.serviceExtraction.subPipeline import _applyMerging
+        merged_parts = _applyMerging(content_parts, merge_strategy)
+        
+        # Convert merged parts to JSON format
        all_sections = []
        document_titles = []
-        combined_metadata = {"title": "Merged Document", "splitStrategy": "by_section"}
        
-        for doc_id, doc_chunks in results_by_document.items():
-            # Process each chunk's JSON result
-            for chunk_result in doc_chunks:
-                chunk_metadata = chunk_result.metadata
-                if chunk_metadata.get("success", False):
-                    try:
-                        # Parse JSON from AI result
-                        chunk_json = json.loads(chunk_result.aiResult)
+        for part in merged_parts:
+            if part.metadata.get("success", False):
+                try:
+                    # Parse JSON from AI result
+                    chunk_json = json.loads(part.data)
+                    
+                    # Check if this is a multi-file response (has "documents" key)
+                    if isinstance(chunk_json, dict) and "documents" in chunk_json:
+                        # This is a multi-file response - merge all documents
+                        logger.debug(f"Processing multi-file response from part {part.id} with {len(chunk_json['documents'])} documents")
                        
-                        # Check if this is a multi-file response (has "documents" key)
-                        if isinstance(chunk_json, dict) and "documents" in chunk_json:
-                            # This is a multi-file response - merge all documents
-                            logger.debug(f"Processing multi-file response from chunk {chunk_result.chunkIndex} with {len(chunk_json['documents'])} documents")
-                            
-                            # Add all documents from this chunk
-                            for doc in chunk_json["documents"]:
-                                # Add chunk context to document
-                                doc["metadata"] = doc.get("metadata", {})
-                                doc["metadata"]["source_chunk"] = chunk_result.chunkIndex
-                                doc["metadata"]["source_document"] = doc_id
-                                all_documents.append(doc)
-                            
-                            # Update combined metadata
-                            if "metadata" in chunk_json:
-                                combined_metadata.update(chunk_json["metadata"])
-                        
-                        # Extract sections from single-file response (fallback)
-                        elif isinstance(chunk_json, dict) and "sections" in chunk_json:
-                            for section in chunk_json["sections"]:
-                                # Add document context to section
-                                section["metadata"] = section.get("metadata", {})
-                                section["metadata"]["source_document"] = doc_id
-                                section["metadata"]["chunk_index"] = chunk_result.chunkIndex
-                                all_sections.append(section)
-                        
-                        # Extract document title
-                        if isinstance(chunk_json, dict) and "metadata" in chunk_json:
-                            title = chunk_json["metadata"].get("title", "")
-                            if title and title not in document_titles:
-                                document_titles.append(title)
-                                
-                    except json.JSONDecodeError as e:
-                        logger.warning(f"Failed to parse JSON from chunk {chunk_result.chunkIndex}: {str(e)}")
-                        # Create a fallback section for invalid JSON
-                        fallback_section = {
-                            "id": f"error_section_{chunk_result.chunkIndex}",
-                            "title": "Error Section",
-                            "content_type": "paragraph",
-                            "elements": [{
-                                "text": f"Error parsing chunk {chunk_result.chunkIndex}: {str(e)}"
-                            }],
-                            "order": chunk_result.chunkIndex,
-                            "metadata": {
-                                "source_document": doc_id,
-                                "chunk_index": chunk_result.chunkIndex,
-                                "error": str(e)
-                            }
+                        # Return multi-file response directly
+                        return {
+                            "metadata": chunk_json.get("metadata", {"title": "Merged Document"}),
+                            "documents": chunk_json["documents"]
                        }
-                        all_sections.append(fallback_section)
-                else:
-                    # Handle error chunks
-                    error_section = {
-                        "id": f"error_section_{chunk_result.chunkIndex}",
+                    
+                    # Extract sections from single-file response
+                    elif isinstance(chunk_json, dict) and "sections" in chunk_json:
+                        for section in chunk_json["sections"]:
+                            # Add part context to section
+                            section["metadata"] = section.get("metadata", {})
+                            section["metadata"]["source_part"] = part.id
+                            section["metadata"]["source_document"] = part.metadata.get("documentId", "unknown")
+                            section["metadata"]["chunk_index"] = part.metadata.get("chunkIndex", 0)
+                            all_sections.append(section)
+                    
+                    # Extract document title
+                    if isinstance(chunk_json, dict) and "metadata" in chunk_json:
+                        title = chunk_json["metadata"].get("title", "")
+                        if title and title not in document_titles:
+                            document_titles.append(title)
+                            
+                except json.JSONDecodeError as e:
+                    logger.warning(f"Failed to parse JSON from part {part.id}: {str(e)}")
+                    # Create a fallback section for invalid JSON
+                    fallback_section = {
+                        "id": f"error_section_{part.id}",
                        "title": "Error Section",
                        "content_type": "paragraph",
                        "elements": [{
-                            "text": f"Error in chunk {chunk_result.chunkIndex}: {chunk_metadata.get('error', 'Unknown error')}"
+                            "text": f"Error parsing part {part.id}: {str(e)}"
                        }],
-                        "order": chunk_result.chunkIndex,
+                        "order": part.metadata.get("chunkIndex", 0),
                        "metadata": {
-                            "source_document": doc_id,
-                            "chunk_index": chunk_result.chunkIndex,
-                            "error": chunk_metadata.get('error', 'Unknown error')
+                            "source_document": part.metadata.get("documentId", "unknown"),
+                            "part_id": part.id,
+                            "error": str(e)
                        }
                    }
-                    all_sections.append(error_section)
+                    all_sections.append(fallback_section)
+            else:
+                # Handle error parts
+                error_section = {
+                    "id": f"error_section_{part.id}",
+                    "title": "Error Section",
+                    "content_type": "paragraph",
+                    "elements": [{
+                        "text": f"Error in part {part.id}: {part.metadata.get('error', 'Unknown error')}"
+                    }],
+                    "order": part.metadata.get("chunkIndex", 0),
+                    "metadata": {
+                        "source_document": part.metadata.get("documentId", "unknown"),
+                        "part_id": part.id,
+                        "error": part.metadata.get('error', 'Unknown error')
+                    }
+                }
+                all_sections.append(error_section)
        
        # Sort sections by order
        all_sections.sort(key=lambda x: x.get("order", 0))
        
-        # If we have merged documents from multi-file responses, return them
-        if all_documents:
-            logger.info(f"Merged {len(all_documents)} documents from {len(chunkResults)} chunks")
-            return {
-                "metadata": combined_metadata,
-                "documents": all_documents
-            }
-        
-        # Otherwise, create merged document with sections (single-file fallback)
+        # Create merged document with sections
        merged_document = {
            "metadata": {
                "title": document_titles[0] if document_titles else "Merged Document",
-                "source_documents": list(results_by_document.keys()),
-                "extraction_method": "ai_json_extraction",
-                "version": "1.0"
+                "extraction_method": "ai_json_extraction_with_merging",
+                "version": "2.0"
            },
            "sections": all_sections,
-            "summary": f"Merged document from {len(results_by_document)} source documents",
-            "tags": ["merged", "ai_generated"]
+            "summary": f"Merged document using sophisticated merging system",
+            "tags": ["merged", "ai_generated", "sophisticated_merging"]
        }
        
-        logger.info(f"Merged {len(chunkResults)} chunks from {len(results_by_document)} documents (JSON mode)")
+        logger.info(f"Merged {len(chunkResults)} chunks using existing sophisticated merging system (JSON mode)")
        return merged_document

-    def _getModelCapabilitiesForContent(self, prompt: str, documents: Optional[List[ChatDocument]], options: AiCallOptions) -> Dict[str, int]:
-        """
-        Get model capabilities for content processing, including appropriate size limits for chunking.
-        Uses centralized model selection to determine chunking parameters.
-        """
-        # Estimate total content size
-        prompt_size = len(prompt.encode('utf-8'))
-        document_size = 0
-        if documents:
-            # Rough estimate of document content size
-            for doc in documents:
-                document_size += doc.fileSize or 0
-        
-        total_size = prompt_size + document_size
-        
-        # Use centralized model selection to get the best model for chunking parameters
-        try:
-            from modules.aicore.aicoreModelRegistry import modelRegistry
-            from modules.aicore.aicoreModelSelector import model_selector
-            
-            # Get available models and select the best one for this operation
-            availableModels = modelRegistry.getAvailableModels()
-            selectedModel = model_selector.selectModel(prompt, "", options, availableModels)
-            
-            if selectedModel:
-                context_length = selectedModel.contextLength
-                model_name = selectedModel.name
-                logger.debug(f"Selected model for chunking: {model_name} with context length: {context_length}")
-            else:
-                # Fallback to conservative default if no model selected
-                context_length = 128000  # GPT-4o default
-                model_name = "fallback"
-                logger.warning(f"No model selected for chunking, using fallback context length: {context_length}")
-                
-        except Exception as e:
-            # Fallback to conservative default if model selection fails
-            context_length = 128000  # GPT-4o default
-            model_name = "fallback"
-            logger.error(f"Model selection failed for chunking: {e}, using fallback context length: {context_length}")
-        
-        # Calculate appropriate sizes
-        # Convert tokens to bytes (rough estimate: 1 token ≈ 4 characters)
-        context_length_bytes = int(context_length * 4)
-        max_context_bytes = int(context_length_bytes * 0.9)  # 90% of context length
-        text_chunk_size = int(max_context_bytes * 0.7)  # 70% of max context for text chunks
-        image_chunk_size = int(max_context_bytes * 0.8)  # 80% of max context for image chunks
-        
-        logger.debug(f"Content size: {total_size} bytes, Max context: {max_context_bytes} bytes")
-        logger.debug(f"Text chunk size: {text_chunk_size} bytes, Image chunk size: {image_chunk_size} bytes")
-        
-        return {
-            "maxContextBytes": max_context_bytes,
-            "textChunkSize": text_chunk_size,
-            "imageChunkSize": image_chunk_size
-        }
+# REMOVED: _getModelCapabilitiesForContent method - no longer needed with model-aware chunking
--- a/modules/services/serviceExtraction/subPipeline.py
+++ b/modules/services/serviceExtraction/subPipeline.py
@ -73,139 +73,18 @@ def runExtraction(extractorRegistry: ExtractorRegistry, chunkerRegistry: Chunker

    parts = extractor.extract(documentBytes, {"fileName": fileName, "mimeType": mimeType, "options": options})
    
-    # Apply chunking and size limiting
-    parts = poolAndLimit(parts, chunkerRegistry, options)
+    # REMOVED: poolAndLimit(parts, chunkerRegistry, options)
+    # REMOVED: Chunking logic - now handled in AI call phase
    
-    # Optional merge step - but preserve chunks
+    # Apply merging strategy if provided (preserve existing logic)
    mergeStrategy = options.get("mergeStrategy", {})
    if mergeStrategy:
-        
-        # Don't merge chunks - they should stay separate for processing
-        non_chunk_parts = [p for p in parts if not p.metadata.get("chunk", False)]
-        chunk_parts = [p for p in parts if p.metadata.get("chunk", False)]
-        
-        logger.debug(f"runExtraction: Preserving {len(chunk_parts)} chunks from merging")
-        logger.debug(f"runExtraction - non_chunk_parts: {len(non_chunk_parts)}, chunk_parts: {len(chunk_parts)}")
-        
-    # Apply intelligent merging for small text parts
-    if non_chunk_parts:
-        # Count text parts
-        text_parts = [p for p in non_chunk_parts if p.typeGroup == "text"]
-        if len(text_parts) > 5:  # If we have many small text parts, merge them
-            logger.info(f"🔧 Merging {len(text_parts)} small text parts for efficiency")
-            non_chunk_parts = _mergeParts(non_chunk_parts, mergeStrategy)
-        
-        # Combine non-chunk parts with chunk parts (chunks stay separate)
-        parts = non_chunk_parts + chunk_parts
-        
-        logger.debug(f"runExtraction: Final parts after merging: {len(parts)} (chunks: {len(chunk_parts)})")
-        logger.debug(f"runExtraction - Final parts: {len(parts)} (chunks: {len(chunk_parts)})")
-    # Timestamp-only extraction debug dumps removed
-
+        parts = _applyMerging(parts, mergeStrategy)
+    
    return ContentExtracted(id=makeId(), parts=parts)


-def poolAndLimit(parts: List[ContentPart], chunkerRegistry: ChunkerRegistry, options: Dict[str, Any]) -> List[ContentPart]:
-    maxSize = int(options.get("maxSize", 0) or 0)
-    chunkAllowed = bool(options.get("chunkAllowed", False))
-    mergeStrategy = options.get("mergeStrategy", {})
-
-    if maxSize <= 0:
-        # Still apply merging if strategy provided
-        if mergeStrategy:
-            return _applyMerging(parts, mergeStrategy)
-        return parts
-
-    # First, try to fit within size limit
-    current = 0
-    kept: List[ContentPart] = []
-    remaining: List[ContentPart] = []
-    
-    logger.debug(f"Starting poolAndLimit with {len(parts)} parts, maxSize={maxSize}")
-    
-    for i, p in enumerate(parts):
-        size = int(p.metadata.get("size", 0) or 0)
-        # Show first 50 characters of text content for debugging
-        content_preview = p.data[:50].replace('\n', '\\n') if p.data else ""
-        logger.debug(f"Part {i}: {p.typeGroup} - {size} bytes - '{content_preview}...' (current: {current})")
-        if current + size <= maxSize:
-            kept.append(p)
-            current += size
-            logger.debug(f"Part {i} kept (total: {current})")
-        else:
-            remaining.append(p)
-            logger.debug(f"Part {i} moved to remaining")
-    
-    logger.debug(f"Kept: {len(kept)}, Remaining: {len(remaining)}")
-
-    # If we have remaining parts and chunking is allowed, try chunking
-    if remaining and chunkAllowed:
-        logger.debug(f"=== CHUNKING ACTIVATED ===")
-        logger.debug(f"Remaining parts to chunk: {len(remaining)}")
-        logger.debug(f"Max size limit: {maxSize} bytes")
-        logger.debug(f"Current size used: {current} bytes")
-        logger.debug(f"Chunking {len(remaining)} remaining parts")
-        
-        for p in remaining:
-            if p.typeGroup in ("text", "table", "structure", "image", "container", "binary"):
-                logger.debug(f"Chunking {p.typeGroup} part: {len(p.data)} chars")
-                logger.debug(f"Chunking {p.typeGroup} part with {len(p.data)} chars")
-                chunks = chunkerRegistry.resolve(p.typeGroup).chunk(p, options)
-                logger.debug(f"Created {len(chunks)} chunks")
-                logger.debug(f"Created {len(chunks)} chunks")
-                
-                chunks_added = 0
-                for ch in chunks:
-                    chSize = int(ch.get("size", 0) or 0)
-                    # Add all chunks - don't limit by maxSize since they'll be processed separately
-                    kept.append(ContentPart(
-                        id=makeId(),
-                        parentId=p.id,
-                        label=f"chunk_{ch.get('order', 0)}",
-                        typeGroup=p.typeGroup,
-                        mimeType=p.mimeType,
-                        data=ch.get("data", ""),
-                        metadata={
-                            "size": chSize, 
-                            "chunk": True,
-                            **ch.get("metadata", {})
-                        }
-                    ))
-                    chunks_added += 1
-                    logger.debug(f"Added chunk {ch.get('order', 0)}: {chSize} bytes")
-                
-                logger.debug(f"Added {chunks_added} chunks from {p.typeGroup} part")
-
-    # Apply merging strategy if provided, but preserve chunks
-    if mergeStrategy:
-        # Don't merge chunks - they should stay separate for processing
-        non_chunk_parts = [p for p in kept if not p.metadata.get("chunk", False)]
-        chunk_parts = [p for p in kept if p.metadata.get("chunk", False)]
-        
-        logger.debug(f"Preserving {len(chunk_parts)} chunks from merging")
-        
-    # Apply intelligent merging for small text parts
-    if non_chunk_parts:
-        # Count text parts
-        text_parts = [p for p in non_chunk_parts if p.typeGroup == "text"]
-        if len(text_parts) > 5:  # If we have many small text parts, merge them
-            logger.info(f"🔧 Merging {len(text_parts)} small text parts for efficiency")
-            non_chunk_parts = _applyMerging(non_chunk_parts, mergeStrategy)
-        
-        # Combine non-chunk parts with chunk parts (chunks stay separate)
-        kept = non_chunk_parts + chunk_parts
-        
-        logger.debug(f"Final parts after merging: {len(kept)} (chunks: {len(chunk_parts)})")
-        logger.debug(f"Final parts after merging: {len(kept)} (chunks: {len(chunk_parts)})")
-        
-        # Re-check size after merging
-        totalSize = sum(int(p.metadata.get("size", 0) or 0) for p in kept)
-        if totalSize > maxSize and mergeStrategy.get("maxSize"):
-            # Apply size limit to merged parts
-            kept = _applySizeLimit(kept, maxSize)
-
-    logger.debug(f"poolAndLimit returning {len(kept)} parts")
-    return kept
+# REMOVED: poolAndLimit function - chunking now handled in AI call phase


 def _applyMerging(parts: List[ContentPart], strategy: Dict[str, Any]) -> List[ContentPart]:
@ -264,37 +143,5 @@ def _applyMerging(parts: List[ContentPart], strategy: Dict[str, Any]) -> List[Co
    return merged


-def _applySizeLimit(parts: List[ContentPart], maxSize: int) -> List[ContentPart]:
-    """Apply size limit by prioritizing parts and truncating if necessary."""
-    # Sort by priority: text first, then others
-    priority_order = {"text": 0, "table": 1, "structure": 2, "image": 3, "binary": 4, "metadata": 5, "container": 6}
-    sorted_parts = sorted(parts, key=lambda p: priority_order.get(p.typeGroup, 99))
-    
-    kept: List[ContentPart] = []
-    current_size = 0
-    
-    for part in sorted_parts:
-        part_size = int(part.metadata.get("size", 0) or 0)
-        if current_size + part_size <= maxSize:
-            kept.append(part)
-            current_size += part_size
-        else:
-            # Try to truncate text parts
-            if part.typeGroup == "text" and part_size > 0:
-                remaining_size = maxSize - current_size
-                if remaining_size > 1000:  # Only truncate if we have meaningful space
-                    truncated_data = part.data[:remaining_size * 4]  # Rough character estimate
-                    truncated_part = ContentPart(
-                        id=makeId(),
-                        parentId=part.parentId,
-                        label=f"{part.label}_truncated",
-                        typeGroup=part.typeGroup,
-                        mimeType=part.mimeType,
-                        data=truncated_data,
-                        metadata={**part.metadata, "size": len(truncated_data.encode('utf-8')), "truncated": True}
-                    )
-                    kept.append(truncated_part)
-            break
-    
-    return kept
+# REMOVED: _applySizeLimit function - no longer needed after removing poolAndLimit

--- a/test_ai_behavior.py
+++ b/test_ai_behavior.py
@ -138,6 +138,7 @@ class AIBehaviorTester:
        self.testResults.append(result)
        return result
    
+    
    def _extractContinuationInstruction(self, response: str) -> str:
        """Extract continuation instruction from response."""
        try:
--- a/test_ai_model_selection.py
+++ b/test_ai_model_selection.py
@ -25,7 +25,7 @@ from modules.datamodels.datamodelAi import (
 )
 from modules.datamodels.datamodelUam import User
 from modules.aicore.aicoreModelRegistry import modelRegistry
-from modules.aicore.aicoreModelSelector import model_selector
+from modules.aicore.aicoreModelSelector import modelSelector


 class ModelSelectionTester:
@ -45,6 +45,51 @@ class ModelSelectionTester:

        self.services.ai = await AiService.create(self.services)

+    async def _printFallbackListWithContext(self, title: str, prompt: str, context: str, options: AiCallOptions) -> None:
+        print(f"\n{'='*80}")
+        print(f"{title}")
+        print(f"{'='*80}")
+        print(
+            f"Operation={options.operationType.name}, Priority={options.priority.name}, ProcessingMode={options.processingMode.name}"
+        )
+
+        # Show context and prompt sizes
+        promptSize = len(prompt.encode("utf-8"))
+        contextSize = len(context.encode("utf-8"))
+        totalSize = promptSize + contextSize
+        print(f"Prompt size: {promptSize} bytes, Context size: {contextSize} bytes, Total: {totalSize} bytes")
+
+        availableModels = modelRegistry.getAvailableModels()
+        failoverModelList = modelSelector.getFailoverModelList(
+            prompt=prompt,
+            context=context,
+            options=options,
+            availableModels=availableModels,
+        )
+
+        if not failoverModelList:
+            print("No suitable models found (capability filter returned empty list).")
+            return
+
+        print("Prioritized fallback model sequence (name | quality | speed | $/1k in | ctx | score):")
+        for idx, m in enumerate(failoverModelList, 1):
+            costIn = getattr(m, "costPer1kTokensInput", 0.0)
+            # Calculate detailed score breakdown
+            promptSize = len(prompt.encode("utf-8"))
+            contextSize = len(context.encode("utf-8"))
+            totalSize = promptSize + contextSize
+            
+            # Get detailed scoring
+            sizeRating = modelSelector._getSizeRating(m, totalSize)
+            processingModeRating = modelSelector._getProcessingModeRating(m.processingMode, options.processingMode)
+            priorityRating = modelSelector._getPriorityRating(m, options.priority)
+            totalScore = sizeRating + processingModeRating + priorityRating
+            
+            print(
+                f" {idx:>2}. {m.name} | Q={getattr(m, 'qualityRating', 0)} | S={getattr(m, 'speedRating', 0)} | ${costIn:.4f} | ctx={getattr(m, 'contextLength', 0)} | score={totalScore:.3f}"
+            )
+            print(f"      Size: {sizeRating:.3f}, ProcessingMode: {processingModeRating:.3f}, Priority: {priorityRating:.3f}")
+
    async def _printFallbackList(self, title: str, prompt: str, options: AiCallOptions) -> None:
        print(f"\n{'='*80}")
        print(f"{title}")
@ -53,24 +98,43 @@ class ModelSelectionTester:
            f"Operation={options.operationType.name}, Priority={options.priority.name}, ProcessingMode={options.processingMode.name}"
        )

+        # Show context and prompt sizes
+        context = ""  # Currently using empty context
+        promptSize = len(prompt.encode("utf-8"))
+        contextSize = len(context.encode("utf-8"))
+        totalSize = promptSize + contextSize
+        print(f"Prompt size: {promptSize} bytes, Context size: {contextSize} bytes, Total: {totalSize} bytes")
+
        availableModels = modelRegistry.getAvailableModels()
-        fallbackModels = model_selector.getFallbackModels(
+        failoverModelList = modelSelector.getFailoverModelList(
            prompt=prompt,
-            context="",
+            context=context,
            options=options,
            availableModels=availableModels,
        )

-        if not fallbackModels:
+        if not failoverModelList:
            print("No suitable models found (capability filter returned empty list).")
            return

-        print("Prioritized fallback model sequence (name | quality | speed | $/1k in | ctx):")
-        for idx, m in enumerate(fallbackModels, 1):
+        print("Prioritized fallback model sequence (name | quality | speed | $/1k in | ctx | score):")
+        for idx, m in enumerate(failoverModelList, 1):
            costIn = getattr(m, "costPer1kTokensInput", 0.0)
+            # Calculate detailed score breakdown
+            promptSize = len(prompt.encode("utf-8"))
+            contextSize = len(context.encode("utf-8"))
+            totalSize = promptSize + contextSize
+            
+            # Get detailed scoring
+            sizeRating = modelSelector._getSizeRating(m, totalSize)
+            processingModeRating = modelSelector._getProcessingModeRating(m.processingMode, options.processingMode)
+            priorityRating = modelSelector._getPriorityRating(m, options.priority)
+            totalScore = sizeRating + processingModeRating + priorityRating
+            
            print(
-                f" {idx:>2}. {m.name} | Q={getattr(m, 'qualityRating', 0)} | S={getattr(m, 'speedRating', 0)} | ${costIn:.4f} | ctx={getattr(m, 'contextLength', 0)}"
+                f" {idx:>2}. {m.name} | Q={getattr(m, 'qualityRating', 0)} | S={getattr(m, 'speedRating', 0)} | ${costIn:.4f} | ctx={getattr(m, 'contextLength', 0)} | score={totalScore:.3f}"
            )
+            print(f"      Size: {sizeRating:.3f}, ProcessingMode: {processingModeRating:.3f}, Priority: {priorityRating:.3f}")

    async def run(self) -> None:
        # Scenarios reflecting workflows/
@ -146,10 +210,93 @@ class ModelSelectionTester:
            )
        )

+        # Intent analysis (user input understanding)
+        scenarios.append(
+            (
+                "ANALYSE - Quality, Detailed (Intent Analysis)",
+                "Analyze user intent and extract key requirements from the following request: 'I need to create a comprehensive marketing strategy for our new product launch including budget allocation, timeline, and target audience analysis.'",
+                AiCallOptions(
+                    operationType=OperationTypeEnum.ANALYSE,
+                    priority=PriorityEnum.QUALITY,
+                    compressPrompt=False,
+                    compressContext=False,
+                    processingMode=ProcessingModeEnum.DETAILED,
+                    maxCost=0.08,
+                    maxProcessingTime=45,
+                    resultFormat="json",
+                    temperature=0.2,
+                ),
+            )
+        )
+
+        # Review/Validation (quality assurance)
+        scenarios.append(
+            (
+                "ANALYSE - Quality, Detailed (Review/Validation)",
+                "Review and validate the following business proposal for completeness, accuracy, and compliance with industry standards. Identify any gaps or areas for improvement.",
+                AiCallOptions(
+                    operationType=OperationTypeEnum.ANALYSE,
+                    priority=PriorityEnum.QUALITY,
+                    compressPrompt=False,
+                    compressContext=False,
+                    processingMode=ProcessingModeEnum.DETAILED,
+                    maxCost=0.10,
+                    maxProcessingTime=60,
+                    resultFormat="json",
+                    temperature=0.1,
+                ),
+            )
+        )
+
+        # Large context scenario (to test size-based scoring)
+        scenarios.append(
+            (
+                "GENERAL - Balanced, Advanced (Large Context Test)",
+                "Process this large document and provide a comprehensive summary.",
+                AiCallOptions(
+                    operationType=OperationTypeEnum.GENERAL,
+                    priority=PriorityEnum.BALANCED,
+                    compressPrompt=False,
+                    compressContext=False,
+                    processingMode=ProcessingModeEnum.ADVANCED,
+                    maxCost=0.15,
+                    maxProcessingTime=120,
+                ),
+            )
+        )
+
        # Iterate and print lists
        for title, prompt, options in scenarios:
            await self._printFallbackList(title, prompt, options)

+        # Test with actual context to see size-based scoring
+        largeContext = """
+        This is a comprehensive business document containing detailed information about our company's strategic initiatives, 
+        financial performance, market analysis, competitive landscape, operational metrics, customer feedback, 
+        product development roadmap, technology stack, human resources, legal compliance, risk management, 
+        sustainability efforts, and future growth plans. The document spans multiple sections including executive summary, 
+        market research, financial statements, operational reports, customer insights, product specifications, 
+        technology architecture, HR policies, legal frameworks, risk assessments, environmental impact studies, 
+        and strategic recommendations. This extensive content is designed to test the model selection algorithm's 
+        ability to handle large context sizes and make intelligent decisions about which models are best suited 
+        for processing such substantial amounts of information while maintaining efficiency and cost-effectiveness.
+        """ * 10  # Repeat to make it even larger
+
+        await self._printFallbackListWithContext(
+            "GENERAL - Balanced, Advanced (Large Context Test)",
+            "Analyze this comprehensive business document and provide key insights.",
+            largeContext,
+            AiCallOptions(
+                operationType=OperationTypeEnum.GENERAL,
+                priority=PriorityEnum.BALANCED,
+                compressPrompt=False,
+                compressContext=False,
+                processingMode=ProcessingModeEnum.ADVANCED,
+                maxCost=0.15,
+                maxProcessingTime=120,
+            ),
+        )
+

 async def main() -> None:
    tester = ModelSelectionTester()